[Feature] Add modules for remote sensing detection

own
Liu Yi 3 years ago committed by GitHub
commit 16c85bf3c2
  1. 6
      paddlers/__init__.py
  2. 1
      paddlers/datasets/__init__.py
  3. 445
      paddlers/datasets/voc.py
  4. 15
      paddlers/models/__init__.py
  5. 0
      paddlers/models/ppcd/__init__.py
  6. 0
      paddlers/models/ppcls/__init__.py
  7. 16
      paddlers/models/ppdet/__init__.py
  8. 15
      paddlers/models/ppdet/core/__init__.py
  9. 13
      paddlers/models/ppdet/core/config/__init__.py
  10. 248
      paddlers/models/ppdet/core/config/schema.py
  11. 118
      paddlers/models/ppdet/core/config/yaml_helpers.py
  12. 278
      paddlers/models/ppdet/core/workspace.py
  13. 21
      paddlers/models/ppdet/data/__init__.py
  14. 13
      paddlers/models/ppdet/data/crop_utils/__init__.py
  15. 585
      paddlers/models/ppdet/data/crop_utils/annotation_cropper.py
  16. 170
      paddlers/models/ppdet/data/crop_utils/chip_box_utils.py
  17. 302
      paddlers/models/ppdet/data/reader.py
  18. 67
      paddlers/models/ppdet/data/shm_utils.py
  19. 29
      paddlers/models/ppdet/data/source/__init__.py
  20. 904
      paddlers/models/ppdet/data/source/category.py
  21. 251
      paddlers/models/ppdet/data/source/coco.py
  22. 197
      paddlers/models/ppdet/data/source/dataset.py
  23. 669
      paddlers/models/ppdet/data/source/keypoint_coco.py
  24. 636
      paddlers/models/ppdet/data/source/mot.py
  25. 191
      paddlers/models/ppdet/data/source/sniper_coco.py
  26. 231
      paddlers/models/ppdet/data/source/voc.py
  27. 180
      paddlers/models/ppdet/data/source/widerface.py
  28. 28
      paddlers/models/ppdet/data/transform/__init__.py
  29. 270
      paddlers/models/ppdet/data/transform/atss_assigner.py
  30. 1591
      paddlers/models/ppdet/data/transform/autoaugment_utils.py
  31. 1080
      paddlers/models/ppdet/data/transform/batch_operators.py
  32. 86
      paddlers/models/ppdet/data/transform/gridmask_utils.py
  33. 868
      paddlers/models/ppdet/data/transform/keypoint_operators.py
  34. 628
      paddlers/models/ppdet/data/transform/mot_operators.py
  35. 498
      paddlers/models/ppdet/data/transform/op_helper.py
  36. 3025
      paddlers/models/ppdet/data/transform/operators.py
  37. 30
      paddlers/models/ppdet/engine/__init__.py
  38. 340
      paddlers/models/ppdet/engine/callbacks.py
  39. 50
      paddlers/models/ppdet/engine/env.py
  40. 177
      paddlers/models/ppdet/engine/export_utils.py
  41. 538
      paddlers/models/ppdet/engine/tracker.py
  42. 742
      paddlers/models/ppdet/engine/trainer.py
  43. 29
      paddlers/models/ppdet/metrics/__init__.py
  44. 184
      paddlers/models/ppdet/metrics/coco_utils.py
  45. 149
      paddlers/models/ppdet/metrics/json_results.py
  46. 401
      paddlers/models/ppdet/metrics/keypoint_metrics.py
  47. 444
      paddlers/models/ppdet/metrics/map_utils.py
  48. 470
      paddlers/models/ppdet/metrics/mcmot_metrics.py
  49. 434
      paddlers/models/ppdet/metrics/metrics.py
  50. 1236
      paddlers/models/ppdet/metrics/mot_metrics.py
  51. 428
      paddlers/models/ppdet/metrics/munkres.py
  52. 393
      paddlers/models/ppdet/metrics/widerface_utils.py
  53. 18
      paddlers/models/ppdet/model_zoo/__init__.py
  54. 84
      paddlers/models/ppdet/model_zoo/model_zoo.py
  55. 45
      paddlers/models/ppdet/modeling/__init__.py
  56. 51
      paddlers/models/ppdet/modeling/architectures/__init__.py
  57. 91
      paddlers/models/ppdet/modeling/architectures/blazeface.py
  58. 144
      paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
  59. 108
      paddlers/models/ppdet/modeling/architectures/centernet.py
  60. 69
      paddlers/models/ppdet/modeling/architectures/deepsort.py
  61. 93
      paddlers/models/ppdet/modeling/architectures/detr.py
  62. 100
      paddlers/models/ppdet/modeling/architectures/fairmot.py
  63. 106
      paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
  64. 105
      paddlers/models/ppdet/modeling/architectures/fcos.py
  65. 87
      paddlers/models/ppdet/modeling/architectures/gfl.py
  66. 111
      paddlers/models/ppdet/modeling/architectures/jde.py
  67. 287
      paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
  68. 267
      paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
  69. 135
      paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
  70. 141
      paddlers/models/ppdet/modeling/architectures/meta_arch.py
  71. 91
      paddlers/models/ppdet/modeling/architectures/picodet.py
  72. 102
      paddlers/models/ppdet/modeling/architectures/s2anet.py
  73. 110
      paddlers/models/ppdet/modeling/architectures/solov2.py
  74. 99
      paddlers/models/ppdet/modeling/architectures/sparse_rcnn.py
  75. 93
      paddlers/models/ppdet/modeling/architectures/ssd.py
  76. 78
      paddlers/models/ppdet/modeling/architectures/tood.py
  77. 98
      paddlers/models/ppdet/modeling/architectures/ttfnet.py
  78. 124
      paddlers/models/ppdet/modeling/architectures/yolo.py
  79. 23
      paddlers/models/ppdet/modeling/assigners/__init__.py
  80. 211
      paddlers/models/ppdet/modeling/assigners/atss_assigner.py
  81. 262
      paddlers/models/ppdet/modeling/assigners/simota_assigner.py
  82. 158
      paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
  83. 195
      paddlers/models/ppdet/modeling/assigners/utils.py
  84. 49
      paddlers/models/ppdet/modeling/backbones/__init__.py
  85. 320
      paddlers/models/ppdet/modeling/backbones/blazenet.py
  86. 340
      paddlers/models/ppdet/modeling/backbones/darknet.py
  87. 244
      paddlers/models/ppdet/modeling/backbones/dla.py
  88. 290
      paddlers/models/ppdet/modeling/backbones/esnet.py
  89. 470
      paddlers/models/ppdet/modeling/backbones/ghostnet.py
  90. 224
      paddlers/models/ppdet/modeling/backbones/hardnet.py
  91. 727
      paddlers/models/ppdet/modeling/backbones/hrnet.py
  92. 259
      paddlers/models/ppdet/modeling/backbones/lcnet.py
  93. 886
      paddlers/models/ppdet/modeling/backbones/lite_hrnet.py
  94. 411
      paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
  95. 479
      paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
  96. 69
      paddlers/models/ppdet/modeling/backbones/name_adapter.py
  97. 358
      paddlers/models/ppdet/modeling/backbones/res2net.py
  98. 609
      paddlers/models/ppdet/modeling/backbones/resnet.py
  99. 139
      paddlers/models/ppdet/modeling/backbones/senet.py
  100. 251
      paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
  101. Some files were not shown because too many files have changed in this diff Show More

@ -1 +1,5 @@
from . import datasets, transforms, utils, tools
from . import tasks, datasets, transforms, utils, tools, models
# TODO, add these info in installation
env_info = {'place': 'gpu', 'num': 1}
__version__ = 0.1

@ -0,0 +1 @@
from .voc import VOCDetection

@ -0,0 +1,445 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import copy
import os
import os.path as osp
import random
import re
import numpy as np
from collections import OrderedDict
import xml.etree.ElementTree as ET
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
from paddlers.transforms import Decode, MixupImage
from paddlers.tools import YOLOAnchorCluster
class VOCDetection(Dataset):
"""读取PascalVOC格式的检测数据集,并对样本进行相应的处理。
Args:
data_dir (str): 数据集所在的目录路径
file_list (str): 描述数据集图片文件和对应标注文件的文件路径文本内每行路径为相对data_dir的相对路
label_list (str): 描述数据集包含的类别信息文件路径
transforms (paddlers.det.transforms): 数据集中每个样本的预处理/增强算子
num_workers (int|str): 数据集中样本在预处理过程中的线程或进程数默认为'auto'当设为'auto'根据
系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8`num_workers`为8否则为CPU核数的
一半
shuffle (bool): 是否需要对数据集中样本打乱顺序默认为False
allow_empty (bool): 是否加载负样本默认为False
empty_ratio (float): 用于指定负样本占总样本数的比例如果小于0或大于等于1则保留全部的负样本默认为1
"""
def __init__(self,
data_dir,
file_list,
label_list,
transforms=None,
num_workers='auto',
shuffle=False,
allow_empty=False,
empty_ratio=1.):
# matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
# or matplotlib.backends is imported for the first time
# pycocotools import matplotlib
import matplotlib
matplotlib.use('Agg')
from pycocotools.coco import COCO
super(VOCDetection, self).__init__()
self.data_dir = data_dir
self.data_fields = None
self.transforms = copy.deepcopy(transforms)
self.num_max_boxes = 50
self.use_mix = False
if self.transforms is not None:
for op in self.transforms.transforms:
if isinstance(op, MixupImage):
self.mixup_op = copy.deepcopy(op)
self.use_mix = True
self.num_max_boxes *= 2
break
self.batch_transforms = None
self.num_workers = get_num_workers(num_workers)
self.shuffle = shuffle
self.allow_empty = allow_empty
self.empty_ratio = empty_ratio
self.file_list = list()
neg_file_list = list()
self.labels = list()
annotations = dict()
annotations['images'] = list()
annotations['categories'] = list()
annotations['annotations'] = list()
cname2cid = OrderedDict()
label_id = 0
with open(label_list, 'r', encoding=get_encoding(label_list)) as f:
for line in f.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
self.labels.append(line.strip())
logging.info("Starting to read file list from dataset...")
for k, v in cname2cid.items():
annotations['categories'].append({
'supercategory': 'component',
'id': v + 1,
'name': k
})
ct = 0
ann_ct = 0
with open(file_list, 'r', encoding=get_encoding(file_list)) as f:
while True:
line = f.readline()
if not line:
break
if len(line.strip().split()) > 2:
raise Exception("A space is defined as the separator, "
"but it exists in image or label name {}."
.format(line))
img_file, xml_file = [
osp.join(data_dir, x) for x in line.strip().split()[:2]
]
img_file = path_normalization(img_file)
xml_file = path_normalization(xml_file)
if not is_pic(img_file):
continue
if not osp.isfile(xml_file):
continue
if not osp.exists(img_file):
logging.warning('The image file {} does not exist!'.format(
img_file))
continue
if not osp.exists(xml_file):
logging.warning('The annotation file {} does not exist!'.
format(xml_file))
continue
tree = ET.parse(xml_file)
if tree.find('id') is None:
im_id = np.asarray([ct])
else:
ct = int(tree.find('id').text)
im_id = np.asarray([int(tree.find('id').text)])
pattern = re.compile('<size>', re.IGNORECASE)
size_tag = pattern.findall(
str(ET.tostringlist(tree.getroot())))
if len(size_tag) > 0:
size_tag = size_tag[0][1:-1]
size_element = tree.find(size_tag)
pattern = re.compile('<width>', re.IGNORECASE)
width_tag = pattern.findall(
str(ET.tostringlist(size_element)))[0][1:-1]
im_w = float(size_element.find(width_tag).text)
pattern = re.compile('<height>', re.IGNORECASE)
height_tag = pattern.findall(
str(ET.tostringlist(size_element)))[0][1:-1]
im_h = float(size_element.find(height_tag).text)
else:
im_w = 0
im_h = 0
pattern = re.compile('<object>', re.IGNORECASE)
obj_match = pattern.findall(
str(ET.tostringlist(tree.getroot())))
if len(obj_match) > 0:
obj_tag = obj_match[0][1:-1]
objs = tree.findall(obj_tag)
else:
objs = list()
num_bbox, i = len(objs), 0
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
difficult = np.zeros((num_bbox, 1), dtype=np.int32)
for obj in objs:
pattern = re.compile('<name>', re.IGNORECASE)
name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
1:-1]
cname = obj.find(name_tag).text.strip()
pattern = re.compile('<difficult>', re.IGNORECASE)
diff_tag = pattern.findall(str(ET.tostringlist(obj)))
if len(diff_tag) == 0:
_difficult = 0
else:
diff_tag = diff_tag[0][1:-1]
try:
_difficult = int(obj.find(diff_tag).text)
except Exception:
_difficult = 0
pattern = re.compile('<bndbox>', re.IGNORECASE)
box_tag = pattern.findall(str(ET.tostringlist(obj)))
if len(box_tag) == 0:
logging.warning(
"There's no field '<bndbox>' in one of object, "
"so this object will be ignored. xml file: {}".
format(xml_file))
continue
box_tag = box_tag[0][1:-1]
box_element = obj.find(box_tag)
pattern = re.compile('<xmin>', re.IGNORECASE)
xmin_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
x1 = float(box_element.find(xmin_tag).text)
pattern = re.compile('<ymin>', re.IGNORECASE)
ymin_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
y1 = float(box_element.find(ymin_tag).text)
pattern = re.compile('<xmax>', re.IGNORECASE)
xmax_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
x2 = float(box_element.find(xmax_tag).text)
pattern = re.compile('<ymax>', re.IGNORECASE)
ymax_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
y2 = float(box_element.find(ymax_tag).text)
x1 = max(0, x1)
y1 = max(0, y1)
if im_w > 0.5 and im_h > 0.5:
x2 = min(im_w - 1, x2)
y2 = min(im_h - 1, y2)
if not (x2 >= x1 and y2 >= y1):
logging.warning(
"Bounding box for object {} does not satisfy xmin {} <= xmax {} and ymin {} <= ymax {}, "
"so this object is skipped. xml file: {}".format(i, x1, x2, y1, y2, xml_file))
continue
gt_bbox[i, :] = [x1, y1, x2, y2]
gt_class[i, 0] = cname2cid[cname]
gt_score[i, 0] = 1.
is_crowd[i, 0] = 0
difficult[i, 0] = _difficult
i += 1
annotations['annotations'].append({
'iscrowd': 0,
'image_id': int(im_id[0]),
'bbox': [x1, y1, x2 - x1, y2 - y1],
'area': float((x2 - x1) * (y2 - y1)),
'category_id': cname2cid[cname] + 1,
'id': ann_ct,
'difficult': _difficult
})
ann_ct += 1
gt_bbox = gt_bbox[:i, :]
gt_class = gt_class[:i, :]
gt_score = gt_score[:i, :]
is_crowd = is_crowd[:i, :]
difficult = difficult[:i, :]
im_info = {
'im_id': im_id,
'image_shape': np.array(
[im_h, im_w], dtype=np.int32)
}
label_info = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'difficult': difficult
}
if gt_bbox.size > 0:
self.file_list.append({
'image': img_file,
**
im_info,
**
label_info
})
annotations['images'].append({
'height': im_h,
'width': im_w,
'id': int(im_id[0]),
'file_name': osp.split(img_file)[1]
})
else:
neg_file_list.append({
'image': img_file,
**
im_info,
**
label_info
})
ct += 1
if self.use_mix:
self.num_max_boxes = max(self.num_max_boxes, 2 * len(objs))
else:
self.num_max_boxes = max(self.num_max_boxes, len(objs))
if not ct:
logging.error(
"No voc record found in %s' % (file_list)", exit=True)
self.pos_num = len(self.file_list)
if self.allow_empty and neg_file_list:
self.file_list += self._sample_empty(neg_file_list)
logging.info(
"{} samples in file {}, including {} positive samples and {} negative samples.".
format(
len(self.file_list), file_list, self.pos_num,
len(self.file_list) - self.pos_num))
self.num_samples = len(self.file_list)
self.coco_gt = COCO()
self.coco_gt.dataset = annotations
self.coco_gt.createIndex()
self._epoch = 0
def __getitem__(self, idx):
sample = copy.deepcopy(self.file_list[idx])
if self.data_fields is not None:
sample = {k: sample[k] for k in self.data_fields}
if self.use_mix and (self.mixup_op.mixup_epoch == -1 or
self._epoch < self.mixup_op.mixup_epoch):
if self.num_samples > 1:
mix_idx = random.randint(1, self.num_samples - 1)
mix_pos = (mix_idx + idx) % self.num_samples
else:
mix_pos = 0
sample_mix = copy.deepcopy(self.file_list[mix_pos])
if self.data_fields is not None:
sample_mix = {k: sample_mix[k] for k in self.data_fields}
sample = self.mixup_op(sample=[
Decode(to_rgb=False)(sample), Decode(to_rgb=False)(sample_mix)
])
sample = self.transforms(sample)
return sample
def __len__(self):
return self.num_samples
def set_epoch(self, epoch_id):
self._epoch = epoch_id
def cluster_yolo_anchor(self,
num_anchors,
image_size,
cache=True,
cache_path=None,
iters=300,
gen_iters=1000,
thresh=.25):
"""
Cluster YOLO anchors.
Reference:
https://github.com/ultralytics/yolov5/blob/master/utils/autoanchor.py
Args:
num_anchors (int): number of clusters
image_size (list or int): [h, w], being an int means image height and image width are the same.
cache (bool): whether using cache
cache_path (str or None, optional): cache directory path. If None, use `data_dir` of dataset.
iters (int, optional): iters of kmeans algorithm
gen_iters (int, optional): iters of genetic algorithm
threshold (float, optional): anchor scale threshold
verbose (bool, optional): whether print results
"""
if cache_path is None:
cache_path = self.data_dir
cluster = YOLOAnchorCluster(
num_anchors=num_anchors,
dataset=self,
image_size=image_size,
cache=cache,
cache_path=cache_path,
iters=iters,
gen_iters=gen_iters,
thresh=thresh)
anchors = cluster()
return anchors
def add_negative_samples(self, image_dir, empty_ratio=1):
"""将背景图片加入训练
Args:
image_dir (str)背景图片所在的文件夹目录
empty_ratio (float or None): 用于指定负样本占总样本数的比例如果为None保留数据集初始化是设置的`empty_ratio`
否则更新原有`empty_ratio`如果小于0或大于等于1则保留全部的负样本默认为1
"""
import cv2
if not osp.isdir(image_dir):
raise Exception("{} is not a valid image directory.".format(
image_dir))
if empty_ratio is not None:
self.empty_ratio = empty_ratio
image_list = os.listdir(image_dir)
max_img_id = max(
len(self.file_list) - 1, max(self.coco_gt.getImgIds()))
neg_file_list = list()
for image in image_list:
if not is_pic(image):
continue
gt_bbox = np.zeros((0, 4), dtype=np.float32)
gt_class = np.zeros((0, 1), dtype=np.int32)
gt_score = np.zeros((0, 1), dtype=np.float32)
is_crowd = np.zeros((0, 1), dtype=np.int32)
difficult = np.zeros((0, 1), dtype=np.int32)
max_img_id += 1
im_fname = osp.join(image_dir, image)
img_data = cv2.imread(im_fname, cv2.IMREAD_UNCHANGED)
im_h, im_w, im_c = img_data.shape
im_info = {
'im_id': np.asarray([max_img_id]),
'image_shape': np.array(
[im_h, im_w], dtype=np.int32)
}
label_info = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'difficult': difficult
}
if 'gt_poly' in self.file_list[0]:
label_info['gt_poly'] = []
neg_file_list.append({
'image': im_fname,
**
im_info,
**
label_info
})
if neg_file_list:
self.allow_empty = True
self.file_list += self._sample_empty(neg_file_list)
logging.info(
"{} negative samples added. Dataset contains {} positive samples and {} negative samples.".
format(
len(self.file_list) - self.num_samples, self.pos_num,
len(self.file_list) - self.pos_num))
self.num_samples = len(self.file_list)
def _sample_empty(self, neg_file_list):
if 0. <= self.empty_ratio < 1.:
import random
total_num = len(self.file_list)
neg_num = total_num - self.pos_num
sample_num = min((total_num * self.empty_ratio - neg_num) //
(1 - self.empty_ratio), len(neg_file_list))
return random.sample(neg_file_list, sample_num)
else:
return neg_file_list

@ -0,0 +1,15 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import ppcd, ppcls, ppdet, ppseg

@ -0,0 +1,16 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import (core, data, engine, modeling, model_zoo, optimizer, metrics,
utils, slim)

@ -0,0 +1,15 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import config

@ -0,0 +1,13 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -0,0 +1,248 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import inspect
import importlib
import re
try:
from docstring_parser import parse as doc_parse
except Exception:
def doc_parse(*args):
pass
try:
from typeguard import check_type
except Exception:
def check_type(*args):
pass
__all__ = ['SchemaValue', 'SchemaDict', 'SharedConfig', 'extract_schema']
class SchemaValue(object):
def __init__(self, name, doc='', type=None):
super(SchemaValue, self).__init__()
self.name = name
self.doc = doc
self.type = type
def set_default(self, value):
self.default = value
def has_default(self):
return hasattr(self, 'default')
class SchemaDict(dict):
def __init__(self, **kwargs):
super(SchemaDict, self).__init__()
self.schema = {}
self.strict = False
self.doc = ""
self.update(kwargs)
def __setitem__(self, key, value):
# XXX also update regular dict to SchemaDict??
if isinstance(value, dict) and key in self and isinstance(self[key],
SchemaDict):
self[key].update(value)
else:
super(SchemaDict, self).__setitem__(key, value)
def __missing__(self, key):
if self.has_default(key):
return self.schema[key].default
elif key in self.schema:
return self.schema[key]
else:
raise KeyError(key)
def copy(self):
newone = SchemaDict()
newone.__dict__.update(self.__dict__)
newone.update(self)
return newone
def set_schema(self, key, value):
assert isinstance(value, SchemaValue)
self.schema[key] = value
def set_strict(self, strict):
self.strict = strict
def has_default(self, key):
return key in self.schema and self.schema[key].has_default()
def is_default(self, key):
if not self.has_default(key):
return False
if hasattr(self[key], '__dict__'):
return True
else:
return key not in self or self[key] == self.schema[key].default
def find_default_keys(self):
return [
k for k in list(self.keys()) + list(self.schema.keys())
if self.is_default(k)
]
def mandatory(self):
return any([k for k in self.schema.keys() if not self.has_default(k)])
def find_missing_keys(self):
missing = [
k for k in self.schema.keys()
if k not in self and not self.has_default(k)
]
placeholders = [k for k in self if self[k] in ('<missing>', '<value>')]
return missing + placeholders
def find_extra_keys(self):
return list(set(self.keys()) - set(self.schema.keys()))
def find_mismatch_keys(self):
mismatch_keys = []
for arg in self.schema.values():
if arg.type is not None:
try:
check_type("{}.{}".format(self.name, arg.name),
self[arg.name], arg.type)
except Exception:
mismatch_keys.append(arg.name)
return mismatch_keys
def validate(self):
missing_keys = self.find_missing_keys()
if missing_keys:
raise ValueError("Missing param for class<{}>: {}".format(
self.name, ", ".join(missing_keys)))
extra_keys = self.find_extra_keys()
if extra_keys and self.strict:
raise ValueError("Extraneous param for class<{}>: {}".format(
self.name, ", ".join(extra_keys)))
mismatch_keys = self.find_mismatch_keys()
if mismatch_keys:
raise TypeError("Wrong param type for class<{}>: {}".format(
self.name, ", ".join(mismatch_keys)))
class SharedConfig(object):
"""
Representation class for `__shared__` annotations, which work as follows:
- if `key` is set for the module in config file, its value will take
precedence
- if `key` is not set for the module but present in the config file, its
value will be used
- otherwise, use the provided `default_value` as fallback
Args:
key: config[key] will be injected
default_value: fallback value
"""
def __init__(self, key, default_value=None):
super(SharedConfig, self).__init__()
self.key = key
self.default_value = default_value
def extract_schema(cls):
"""
Extract schema from a given class
Args:
cls (type): Class from which to extract.
Returns:
schema (SchemaDict): Extracted schema.
"""
ctor = cls.__init__
# python 2 compatibility
if hasattr(inspect, 'getfullargspec'):
argspec = inspect.getfullargspec(ctor)
annotations = argspec.annotations
has_kwargs = argspec.varkw is not None
else:
argspec = inspect.getfullargspec(ctor)
# python 2 type hinting workaround, see pep-3107
# however, since `typeguard` does not support python 2, type checking
# is still python 3 only for now
annotations = getattr(ctor, '__annotations__', {})
has_kwargs = argspec.varkw is not None
names = [arg for arg in argspec.args if arg != 'self']
defaults = argspec.defaults
num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0
num_required = len(names) - num_defaults
docs = cls.__doc__
if docs is None and getattr(cls, '__category__', None) == 'op':
docs = cls.__call__.__doc__
try:
docstring = doc_parse(docs)
except Exception:
docstring = None
if docstring is None:
comments = {}
else:
comments = {}
for p in docstring.params:
match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name)
if match_obj is not None:
comments[match_obj.group(1)] = p.description
schema = SchemaDict()
schema.name = cls.__name__
schema.doc = ""
if docs is not None:
start_pos = docs[0] == '\n' and 1 or 0
schema.doc = docs[start_pos:].split("\n")[0].strip()
# XXX handle paddle's weird doc convention
if '**' == schema.doc[:2] and '**' == schema.doc[-2:]:
schema.doc = schema.doc[2:-2].strip()
schema.category = hasattr(cls, '__category__') and getattr(
cls, '__category__') or 'module'
schema.strict = not has_kwargs
schema.pymodule = importlib.import_module(cls.__module__)
schema.inject = getattr(cls, '__inject__', [])
schema.shared = getattr(cls, '__shared__', [])
for idx, name in enumerate(names):
comment = name in comments and comments[name] or name
if name in schema.inject:
type_ = None
else:
type_ = name in annotations and annotations[name] or None
value_schema = SchemaValue(name, comment, type_)
if name in schema.shared:
assert idx >= num_required, "shared config must have default value"
default = defaults[idx - num_required]
value_schema.set_default(SharedConfig(name, default))
elif idx >= num_required:
default = defaults[idx - num_required]
value_schema.set_default(default)
schema.set_schema(name, value_schema)
return schema

@ -0,0 +1,118 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import inspect
import yaml
from .schema import SharedConfig
__all__ = ['serializable', 'Callable']
def represent_dictionary_order(self, dict_data):
return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
def setup_orderdict():
from collections import OrderedDict
yaml.add_representer(OrderedDict, represent_dictionary_order)
def _make_python_constructor(cls):
def python_constructor(loader, node):
if isinstance(node, yaml.SequenceNode):
args = loader.construct_sequence(node, deep=True)
return cls(*args)
else:
kwargs = loader.construct_mapping(node, deep=True)
try:
return cls(**kwargs)
except Exception as ex:
print("Error when construct {} instance from yaml config".
format(cls.__name__))
raise ex
return python_constructor
def _make_python_representer(cls):
# python 2 compatibility
if hasattr(inspect, 'getfullargspec'):
argspec = inspect.getfullargspec(cls)
else:
argspec = inspect.getfullargspec(cls.__init__)
argnames = [arg for arg in argspec.args if arg != 'self']
def python_representer(dumper, obj):
if argnames:
data = {name: getattr(obj, name) for name in argnames}
else:
data = obj.__dict__
if '_id' in data:
del data['_id']
return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
return python_representer
def serializable(cls):
"""
Add loader and dumper for given class, which must be
"trivially serializable"
Args:
cls: class to be serialized
Returns: cls
"""
yaml.add_constructor(u'!{}'.format(cls.__name__),
_make_python_constructor(cls))
yaml.add_representer(cls, _make_python_representer(cls))
return cls
yaml.add_representer(SharedConfig,
lambda d, o: d.represent_data(o.default_value))
@serializable
class Callable(object):
"""
Helper to be used in Yaml for creating arbitrary class objects
Args:
full_type (str): the full module path to target function
"""
def __init__(self, full_type, args=[], kwargs={}):
super(Callable, self).__init__()
self.full_type = full_type
self.args = args
self.kwargs = kwargs
def __call__(self):
if '.' in self.full_type:
idx = self.full_type.rfind('.')
module = importlib.import_module(self.full_type[:idx])
func_name = self.full_type[idx + 1:]
else:
try:
module = importlib.import_module('builtins')
except Exception:
module = importlib.import_module('__builtin__')
func_name = self.full_type
func = getattr(module, func_name)
return func(*self.args, **self.kwargs)

@ -0,0 +1,278 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import importlib
import os
import sys
import yaml
import collections
try:
collectionsAbc = collections.abc
except AttributeError:
collectionsAbc = collections
from .config.schema import SchemaDict, SharedConfig, extract_schema
from .config.yaml_helpers import serializable
__all__ = [
'global_config',
'load_config',
'merge_config',
'get_registered_modules',
'create',
'register',
'serializable',
'dump_value',
]
def dump_value(value):
# XXX this is hackish, but collections.abc is not available in python 2
if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
value = yaml.dump(value, default_flow_style=True)
value = value.replace('\n', '')
value = value.replace('...', '')
return "'{}'".format(value)
else:
# primitive types
return str(value)
class AttrDict(dict):
"""Single level attribute dict, NOT recursive"""
def __init__(self, **kwargs):
super(AttrDict, self).__init__()
super(AttrDict, self).update(kwargs)
def __getattr__(self, key):
if key in self:
return self[key]
raise AttributeError("object has no attribute '{}'".format(key))
global_config = AttrDict()
BASE_KEY = '_BASE_'
# parse and load _BASE_ recursively
def _load_config_with_base(file_path):
with open(file_path) as f:
file_cfg = yaml.load(f, Loader=yaml.Loader)
# NOTE: cfgs outside have higher priority than cfgs in _BASE_
if BASE_KEY in file_cfg:
all_base_cfg = AttrDict()
base_ymls = list(file_cfg[BASE_KEY])
for base_yml in base_ymls:
if base_yml.startswith("~"):
base_yml = os.path.expanduser(base_yml)
if not base_yml.startswith('/'):
base_yml = os.path.join(os.path.dirname(file_path), base_yml)
with open(base_yml) as f:
base_cfg = _load_config_with_base(base_yml)
all_base_cfg = merge_config(base_cfg, all_base_cfg)
del file_cfg[BASE_KEY]
return merge_config(file_cfg, all_base_cfg)
return file_cfg
def load_config(file_path):
"""
Load config from file.
Args:
file_path (str): Path of the config file to be loaded.
Returns: global config
"""
_, ext = os.path.splitext(file_path)
assert ext in ['.yml', '.yaml'], "only support yaml files for now"
# load config from file and merge into global config
cfg = _load_config_with_base(file_path)
cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
merge_config(cfg)
return global_config
def dict_merge(dct, merge_dct):
""" Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
updating only top-level keys, dict_merge recurses down into dicts nested
to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
``dct``.
Args:
dct: dict onto which the merge is executed
merge_dct: dct merged into dct
Returns: dct
"""
for k, v in merge_dct.items():
if (k in dct and isinstance(dct[k], dict) and
isinstance(merge_dct[k], collectionsAbc.Mapping)):
dict_merge(dct[k], merge_dct[k])
else:
dct[k] = merge_dct[k]
return dct
def merge_config(config, another_cfg=None):
"""
Merge config into global config or another_cfg.
Args:
config (dict): Config to be merged.
Returns: global config
"""
global global_config
dct = another_cfg or global_config
return dict_merge(dct, config)
def get_registered_modules():
return {
k: v
for k, v in global_config.items() if isinstance(v, SchemaDict)
}
def make_partial(cls):
op_module = importlib.import_module(cls.__op__.__module__)
op = getattr(op_module, cls.__op__.__name__)
cls.__category__ = getattr(cls, '__category__', None) or 'op'
def partial_apply(self, *args, **kwargs):
kwargs_ = self.__dict__.copy()
kwargs_.update(kwargs)
return op(*args, **kwargs_)
if getattr(cls, '__append_doc__', True): # XXX should default to True?
if sys.version_info[0] > 2:
cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
cls.__init__.__doc__ = op.__doc__
cls.__call__ = partial_apply
cls.__call__.__doc__ = op.__doc__
else:
# XXX work around for python 2
partial_apply.__doc__ = op.__doc__
cls.__call__ = partial_apply
return cls
def register(cls):
"""
Register a given module class.
Args:
cls (type): Module class to be registered.
Returns: cls
"""
if cls.__name__ in global_config:
raise ValueError("Module class already registered: {}".format(
cls.__name__))
if hasattr(cls, '__op__'):
cls = make_partial(cls)
global_config[cls.__name__] = extract_schema(cls)
return cls
def create(cls_or_name, **kwargs):
"""
Create an instance of given module class.
Args:
cls_or_name (type or str): Class of which to create instance.
Returns: instance of type `cls_or_name`
"""
assert type(cls_or_name) in [type, str
], "should be a class or name of a class"
name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
assert name in global_config and \
isinstance(global_config[name], SchemaDict), \
"the module {} is not registered".format(name)
config = global_config[name]
cls = getattr(config.pymodule, name)
cls_kwargs = {}
cls_kwargs.update(global_config[name])
# parse `shared` annoation of registered modules
if getattr(config, 'shared', None):
for k in config.shared:
target_key = config[k]
shared_conf = config.schema[k].default
assert isinstance(shared_conf, SharedConfig)
if target_key is not None and not isinstance(target_key,
SharedConfig):
continue # value is given for the module
elif shared_conf.key in global_config:
# `key` is present in config
cls_kwargs[k] = global_config[shared_conf.key]
else:
cls_kwargs[k] = shared_conf.default_value
# parse `inject` annoation of registered modules
if getattr(cls, 'from_config', None):
cls_kwargs.update(cls.from_config(config, **kwargs))
if getattr(config, 'inject', None):
for k in config.inject:
target_key = config[k]
# optional dependency
if target_key is None:
continue
if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
if 'name' not in target_key.keys():
continue
inject_name = str(target_key['name'])
if inject_name not in global_config:
raise ValueError(
"Missing injection name {} and check it's name in cfg file".
format(k))
target = global_config[inject_name]
for i, v in target_key.items():
if i == 'name':
continue
target[i] = v
if isinstance(target, SchemaDict):
cls_kwargs[k] = create(inject_name)
elif isinstance(target_key, str):
if target_key not in global_config:
raise ValueError("Missing injection config:", target_key)
target = global_config[target_key]
if isinstance(target, SchemaDict):
cls_kwargs[k] = create(target_key)
elif hasattr(target, '__dict__'): # serialized object
cls_kwargs[k] = target
else:
raise ValueError("Unsupported injection type:", target_key)
# prevent modification of global config values of reference types
# (e.g., list, dict) from within the created module instances
#kwargs = copy.deepcopy(kwargs)
return cls(**cls_kwargs)

@ -0,0 +1,21 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import source
from . import transform
from . import reader
from .source import *
from .transform import *
from .reader import *

@ -0,0 +1,13 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -0,0 +1,585 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import math
import random
import numpy as np
from copy import deepcopy
from typing import List, Tuple
from collections import defaultdict
from .chip_box_utils import nms, transform_chip_boxes2image_boxes
from .chip_box_utils import find_chips_to_cover_overlaped_boxes
from .chip_box_utils import transform_chip_box
from .chip_box_utils import intersection_over_box
class AnnoCropper(object):
def __init__(self,
image_target_sizes: List[int],
valid_box_ratio_ranges: List[List[float]],
chip_target_size: int,
chip_target_stride: int,
use_neg_chip: bool=False,
max_neg_num_per_im: int=8,
max_per_img: int=-1,
nms_thresh: int=0.5):
"""
Generate chips by chip_target_size and chip_target_stride.
These two parameters just like kernel_size and stride in cnn.
Each image has its raw size. After resizing, then get its target size.
The resizing scale = target_size / raw_size.
So are chips of the image.
box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size
The 'size' above mentioned is the size of long-side of image, box or chip.
:param image_target_sizes: [2000, 1000]
:param valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
:param chip_target_size: 500
:param chip_target_stride: 200
"""
self.target_sizes = image_target_sizes
self.valid_box_ratio_ranges = valid_box_ratio_ranges
assert len(self.target_sizes) == len(self.valid_box_ratio_ranges)
self.scale_num = len(self.target_sizes)
self.chip_target_size = chip_target_size # is target size
self.chip_target_stride = chip_target_stride # is target stride
self.use_neg_chip = use_neg_chip
self.max_neg_num_per_im = max_neg_num_per_im
self.max_per_img = max_per_img
self.nms_thresh = nms_thresh
def crop_anno_records(self, records: List[dict]):
"""
The main logic:
# foreach record(image):
# foreach scale:
# 1 generate chips by chip size and stride for each scale
# 2 get pos chips
# - validate boxes: current scale; h,w >= 1
# - find pos chips greedily by valid gt boxes in each scale
# - for every valid gt box, find its corresponding pos chips in each scale
# 3 get neg chips
# - If given proposals, find neg boxes in them which are not in pos chips
# - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2.
# 4 sample neg chips if too much each image
# transform this image-scale annotations to chips(pos chips&neg chips) annotations
:param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1
model and maybe have neg boxes in them.
:return: new_records, list of dict like
{
'im_file': 'fake_image1.jpg',
'im_id': np.array([1]), # new _global_chip_id as im_id
'h': h, # chip height
'w': w, # chip width
'is_crowd': is_crowd, # Nx1 -> Mx1
'gt_class': gt_class, # Nx1 -> Mx1
'gt_bbox': gt_bbox, # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2]
'gt_poly': gt_poly, # [None]xN -> [None]xM
'chip': [x1, y1, x2, y2] # added
}
Attention:
------------------------------>x
|
| (x1,y1)------
| | |
| | |
| | |
| | |
| | |
| ----------
| (x2,y2)
|
y
If we use [x1, y1, x2, y2] to represent boxes or chips,
(x1,y1) is the left-top point which is in the box,
but (x2,y2) is the right-bottom point which is not in the box.
So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h].
And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area.
"""
self.chip_records = []
self._global_chip_id = 1
for r in records:
self._cur_im_pos_chips = [
] # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
self._cur_im_neg_chips = [] # element: (chip, neg_box_num)
for scale_i in range(self.scale_num):
self._get_current_scale_parameters(scale_i, r)
# Cx4
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
# # dict: chipid->[box_id, ...]
pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
r['gt_bbox'], chips)
# dict: chipid->neg_box_num
neg_chip2box_num = self._get_neg_boxes_and_chips(
chips,
list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
neg_chip2box_num)
cur_image_records = self._trans_all_chips2annotations(r)
self.chip_records.extend(cur_image_records)
return self.chip_records
def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx,
neg_chip2box_num):
for pos_chipid, boxes_idx in pos_chip2boxes_idx.items():
chip = np.array(chips[pos_chipid]) # copy chips slice
self._cur_im_pos_chips.append((chip, boxes_idx))
if neg_chip2box_num is None:
return
for neg_chipid, neg_box_num in neg_chip2box_num.items():
chip = np.array(chips[neg_chipid])
self._cur_im_neg_chips.append((chip, neg_box_num))
def _trans_all_chips2annotations(self, r):
gt_bbox = r['gt_bbox']
im_file = r['im_file']
is_crowd = r['is_crowd']
gt_class = r['gt_class']
# gt_poly = r['gt_poly'] # [None]xN
# remaining keys: im_id, h, w
chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
is_crowd, gt_class)
if not self.use_neg_chip:
return chip_records
sampled_neg_chips = self._sample_neg_chips()
neg_chip_records = self._trans_neg_chips2annotations(im_file,
sampled_neg_chips)
chip_records.extend(neg_chip_records)
return chip_records
def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
gt_class):
chip_records = []
for chip, boxes_idx in self._cur_im_pos_chips:
chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
chip)
x1, y1, x2, y2 = chip
chip_h = y2 - y1
chip_w = x2 - x1
rec = {
'im_file': im_file,
'im_id': np.array([self._global_chip_id]),
'h': chip_h,
'w': chip_w,
'gt_bbox': chip_bbox,
'is_crowd': is_crowd[final_boxes_idx].copy(),
'gt_class': gt_class[final_boxes_idx].copy(),
# 'gt_poly': [None] * len(final_boxes_idx),
'chip': chip
}
self._global_chip_id += 1
chip_records.append(rec)
return chip_records
def _sample_neg_chips(self):
pos_num = len(self._cur_im_pos_chips)
neg_num = len(self._cur_im_neg_chips)
sample_num = min(pos_num + 2, self.max_neg_num_per_im)
assert sample_num >= 1
if neg_num <= sample_num:
return self._cur_im_neg_chips
candidate_num = int(sample_num * 1.5)
candidate_neg_chips = sorted(
self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
random.shuffle(candidate_neg_chips)
sampled_neg_chips = candidate_neg_chips[:sample_num]
return sampled_neg_chips
def _trans_neg_chips2annotations(self,
im_file: str,
sampled_neg_chips: List[Tuple]):
chip_records = []
for chip, neg_box_num in sampled_neg_chips:
x1, y1, x2, y2 = chip
chip_h = y2 - y1
chip_w = x2 - x1
rec = {
'im_file': im_file,
'im_id': np.array([self._global_chip_id]),
'h': chip_h,
'w': chip_w,
'gt_bbox': np.zeros(
(0, 4), dtype=np.float32),
'is_crowd': np.zeros(
(0, 1), dtype=np.int32),
'gt_class': np.zeros(
(0, 1), dtype=np.int32),
# 'gt_poly': [],
'chip': chip
}
self._global_chip_id += 1
chip_records.append(rec)
return chip_records
def _get_current_scale_parameters(self, scale_i, r):
im_size = max(r['h'], r['w'])
im_target_size = self.target_sizes[scale_i]
self._cur_im_size, self._cur_im_target_size = im_size, im_target_size
self._cur_scale = self._get_current_scale(im_target_size, im_size)
self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i]
def _get_current_scale(self, im_target_size, im_size):
return im_target_size / im_size
def _create_chips(self, h: int, w: int, scale: float):
"""
Generate chips by chip_target_size and chip_target_stride.
These two parameters just like kernel_size and stride in cnn.
:return: chips, Cx4, xy in raw size dimension
"""
chip_size = self.chip_target_size # omit target for simplicity
stride = self.chip_target_stride
width = int(scale * w)
height = int(scale * h)
min_chip_location_diff = 20 # in target size
assert chip_size >= stride
chip_overlap = chip_size - stride
if (width - chip_overlap
) % stride > min_chip_location_diff: # 不能被stride整除的部分比较大,则保留
w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
else: # 不能被stride整除的部分比较小,则丢弃
w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
if (height - chip_overlap) % stride > min_chip_location_diff:
h_steps = max(1, int(math.ceil((height - chip_overlap) / stride)))
else:
h_steps = max(1, int(math.floor((height - chip_overlap) / stride)))
chips = list()
for j in range(h_steps):
for i in range(w_steps):
x1 = i * stride
y1 = j * stride
x2 = min(x1 + chip_size, width)
y2 = min(y1 + chip_size, height)
chips.append([x1, y1, x2, y2])
# check chip size
for item in chips:
if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
1] > chip_size * 1.1:
raise ValueError(item)
chips = np.array(chips, dtype=np.float)
raw_size_chips = chips / scale
return raw_size_chips
def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips):
valid_ratio_range = self._cur_valid_ratio_range
im_size = self._cur_im_size
scale = self._cur_scale
# Nx4 N
valid_boxes, valid_boxes_idx = self._validate_boxes(
valid_ratio_range, im_size, gt_bbox, scale)
# dict: chipid->[box_id, ...]
pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
valid_boxes_idx)
return pos_chip2boxes_idx
def _validate_boxes(self,
valid_ratio_range: List[float],
im_size: int,
gt_boxes: 'np.array of Nx4',
scale: float):
"""
:return: valid_boxes: Nx4, valid_boxes_idx: N
"""
ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32)
hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32)
maxs = np.maximum(ws, hs)
box_ratio = maxs / im_size
mins = np.minimum(ws, hs)
target_mins = mins * scale
low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
np.float).max
valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) &
(target_mins >= 2))[0]
valid_boxes = gt_boxes[valid_boxes_idx]
return valid_boxes, valid_boxes_idx
def _find_pos_chips(self,
chips: 'Cx4',
valid_boxes: 'Bx4',
valid_boxes_idx: 'B'):
"""
:return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
"""
iob = intersection_over_box(chips, valid_boxes) # overlap, CxB
iob_threshold_to_find_chips = 1.
pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
iob, iob_threshold_to_find_chips)
pos_chip_ids = set(pos_chip_ids)
iob_threshold_to_assign_box = 0.5
pos_chip2boxes_idx = self._assign_boxes_to_pos_chips(
iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx)
return pos_chip2boxes_idx
def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
valid_boxes_idx):
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
pos_chip2boxes_idx = defaultdict(list)
for chip_id, box_id in zip(chip_ids, box_ids):
if chip_id not in pos_chip_ids:
continue
raw_gt_box_idx = valid_boxes_idx[box_id]
pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
return pos_chip2boxes_idx
def _get_neg_boxes_and_chips(self,
chips: 'Cx4',
pos_chip_ids: 'D',
proposals: 'Px4'):
"""
:param chips:
:param pos_chip_ids:
:param proposals:
:return: neg_chip2box_num, None or dict: chipid->neg_box_num
"""
if not self.use_neg_chip:
return None
# train proposals maybe None
if proposals is None or len(proposals) < 1:
return None
valid_ratio_range = self._cur_valid_ratio_range
im_size = self._cur_im_size
scale = self._cur_scale
valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
proposals, scale)
neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
return neg_chip2box_num
def _find_neg_boxes(self,
chips: 'Cx4',
pos_chip_ids: 'D',
valid_props: 'Px4'):
"""
:return: neg_boxes: Nx4
"""
if len(pos_chip_ids) == 0:
return valid_props
pos_chips = chips[pos_chip_ids]
iob = intersection_over_box(pos_chips, valid_props)
overlap_per_prop = np.max(iob, axis=0)
non_overlap_props_idx = overlap_per_prop < 0.5
neg_boxes = valid_props[non_overlap_props_idx]
return neg_boxes
def _find_neg_chips(self,
chips: 'Cx4',
pos_chip_ids: 'D',
neg_boxes: 'Nx4'):
"""
:return: neg_chip2box_num, dict: chipid->neg_box_num
"""
neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids)
neg_chips = chips[neg_chip_ids]
iob = intersection_over_box(neg_chips, neg_boxes)
iob_threshold_to_find_chips = 0.7
chosen_neg_chip_ids, chip_id2overlap_box_num = \
self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
neg_chipid2box_num = {}
for cid in chosen_neg_chip_ids:
box_num = chip_id2overlap_box_num[cid]
raw_chip_id = neg_chip_ids[cid]
neg_chipid2box_num[raw_chip_id] = box_num
return neg_chipid2box_num
def crop_infer_anno_records(self, records: List[dict]):
"""
transform image record to chips record
:param records:
:return: new_records, list of dict like
{
'im_file': 'fake_image1.jpg',
'im_id': np.array([1]), # new _global_chip_id as im_id
'h': h, # chip height
'w': w, # chip width
'chip': [x1, y1, x2, y2] # added
'ori_im_h': ori_im_h # added, origin image height
'ori_im_w': ori_im_w # added, origin image width
'scale_i': 0 # added,
}
"""
self.chip_records = []
self._global_chip_id = 1 # im_id start from 1
self._global_chip_id2img_id = {}
for r in records:
for scale_i in range(self.scale_num):
self._get_current_scale_parameters(scale_i, r)
# Cx4
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
cur_img_chip_record = self._get_chips_records(r, chips,
scale_i)
self.chip_records.extend(cur_img_chip_record)
return self.chip_records
def _get_chips_records(self, rec, chips, scale_i):
cur_img_chip_records = []
ori_im_h = rec["h"]
ori_im_w = rec["w"]
im_file = rec["im_file"]
ori_im_id = rec["im_id"]
for id, chip in enumerate(chips):
chip_rec = {}
x1, y1, x2, y2 = chip
chip_h = y2 - y1
chip_w = x2 - x1
chip_rec["im_file"] = im_file
chip_rec["im_id"] = self._global_chip_id
chip_rec["h"] = chip_h
chip_rec["w"] = chip_w
chip_rec["chip"] = chip
chip_rec["ori_im_h"] = ori_im_h
chip_rec["ori_im_w"] = ori_im_w
chip_rec["scale_i"] = scale_i
self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id)
self._global_chip_id += 1
cur_img_chip_records.append(chip_rec)
return cur_img_chip_records
def aggregate_chips_detections(self, results, records=None):
"""
# 1. transform chip dets to image dets
# 2. nms boxes per image;
# 3. format output results
:param results:
:param roidb:
:return:
"""
results = deepcopy(results)
records = records if records else self.chip_records
img_id2bbox = self._transform_chip2image_bboxes(results, records)
nms_img_id2bbox = self._nms_dets(img_id2bbox)
aggregate_results = self._reformat_results(nms_img_id2bbox)
return aggregate_results
def _transform_chip2image_bboxes(self, results, records):
# 1. Transform chip dets to image dets;
# 2. Filter valid range;
# 3. Reformat and Aggregate chip dets to Get scale_cls_dets
img_id2bbox = defaultdict(list)
for result in results:
bbox_locs = result['bbox']
bbox_nums = result['bbox_num']
if len(bbox_locs) == 1 and bbox_locs[0][
0] == -1: # current batch has no detections
# bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
# MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
continue
im_ids = result['im_id'] # replace with range(len(bbox_nums))
last_bbox_num = 0
for idx, im_id in enumerate(im_ids):
cur_bbox_len = bbox_nums[idx]
bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
last_bbox_num += cur_bbox_len
# box: [num_id, score, xmin, ymin, xmax, ymax]
if len(bboxes) == 0: # current image has no detections
continue
chip_rec = records[int(im_id) -
1] # im_id starts from 1, type is np.int64
image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
bboxes = transform_chip_boxes2image_boxes(
bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
chip_rec["ori_im_w"])
scale_i = chip_rec["scale_i"]
cur_scale = self._get_current_scale(self.target_sizes[scale_i],
image_size)
_, valid_boxes_idx = self._validate_boxes(
self.valid_box_ratio_ranges[scale_i], image_size,
bboxes[:, 2:], cur_scale)
ori_img_id = self._global_chip_id2img_id[int(im_id)]
img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
return img_id2bbox
def _nms_dets(self, img_id2bbox):
# 1. NMS on each image-class
# 2. Limit number of detections to MAX_PER_IMAGE if requested
max_per_img = self.max_per_img
nms_thresh = self.nms_thresh
for img_id in img_id2bbox:
box = img_id2bbox[
img_id] # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
box = np.concatenate(box, axis=0)
nms_dets = nms(box, nms_thresh)
if max_per_img > 0:
if len(nms_dets) > max_per_img:
keep = np.argsort(-nms_dets[:, 1])[:max_per_img]
nms_dets = nms_dets[keep]
img_id2bbox[img_id] = nms_dets
return img_id2bbox
def _reformat_results(self, img_id2bbox):
"""reformat results"""
im_ids = img_id2bbox.keys()
results = []
for img_id in im_ids: # output by original im_id order
if len(img_id2bbox[img_id]) == 0:
bbox = np.array(
[[-1., 0., 0., 0., 0., 0.]]) # edge case: no detections
bbox_num = np.array([0])
else:
# np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
bbox = img_id2bbox[img_id]
bbox_num = np.array([len(bbox)])
res = dict(
im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
results.append(res)
return results

@ -0,0 +1,170 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
def bbox_area(boxes):
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def intersection_over_box(chips, boxes):
"""
intersection area over box area
:param chips: C
:param boxes: B
:return: iob, CxB
"""
M = chips.shape[0]
N = boxes.shape[0]
if M * N == 0:
return np.zeros([M, N], dtype='float32')
box_area = bbox_area(boxes) # B
inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:],
boxes[:, 2:]) # CxBX2
inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2],
boxes[:, :2]) # CxBx2
inter_wh = inter_x2y2 - inter_x1y1
inter_wh = np.clip(inter_wh, a_min=0, a_max=None)
inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1] # CxB
iob = inter_area / np.expand_dims(box_area, 0)
return iob
def clip_boxes(boxes, im_shape):
"""
Clip boxes to image boundaries.
:param boxes: [N, 4]
:param im_shape: tuple of 2, [h, w]
:return: [N, 4]
"""
# x1 >= 0
boxes[:, 0] = np.clip(boxes[:, 0], 0, im_shape[1] - 1)
# y1 >= 0
boxes[:, 1] = np.clip(boxes[:, 1], 0, im_shape[0] - 1)
# x2 < im_shape[1]
boxes[:, 2] = np.clip(boxes[:, 2], 1, im_shape[1])
# y2 < im_shape[0]
boxes[:, 3] = np.clip(boxes[:, 3], 1, im_shape[0])
return boxes
def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'):
boxes_idx = np.array(boxes_idx)
cur_gt_bbox = gt_bbox[boxes_idx].copy() # Bx4
x1, y1, x2, y2 = chip
cur_gt_bbox[:, 0] -= x1
cur_gt_bbox[:, 1] -= y1
cur_gt_bbox[:, 2] -= x1
cur_gt_bbox[:, 3] -= y1
h = y2 - y1
w = x2 - x1
cur_gt_bbox = clip_boxes(cur_gt_bbox, (h, w))
ws = (cur_gt_bbox[:, 2] - cur_gt_bbox[:, 0]).astype(np.int32)
hs = (cur_gt_bbox[:, 3] - cur_gt_bbox[:, 1]).astype(np.int32)
valid_idx = (ws >= 2) & (hs >= 2)
return cur_gt_bbox[valid_idx], boxes_idx[valid_idx]
def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
chip_id2overlap_box_num = np.bincount(chip_ids) # 1d array
chip_id2overlap_box_num = np.pad(
chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
constant_values=0)
chosen_chip_ids = []
while len(box_ids) > 0:
value_counts = np.bincount(chip_ids) # 1d array
max_count_chip_id = np.argmax(value_counts)
assert max_count_chip_id not in chosen_chip_ids
chosen_chip_ids.append(max_count_chip_id)
box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id]
ids_not_in_cur_boxes_mask = np.logical_not(
np.isin(box_ids, box_ids_in_cur_chip))
chip_ids = chip_ids[ids_not_in_cur_boxes_mask]
box_ids = box_ids[ids_not_in_cur_boxes_mask]
return chosen_chip_ids, chip_id2overlap_box_num
def transform_chip_boxes2image_boxes(chip_boxes, chip, img_h, img_w):
chip_boxes = np.array(sorted(chip_boxes, key=lambda item: -item[1]))
xmin, ymin, _, _ = chip
# Transform to origin image loc
chip_boxes[:, 2] += xmin
chip_boxes[:, 4] += xmin
chip_boxes[:, 3] += ymin
chip_boxes[:, 5] += ymin
chip_boxes = clip_boxes(chip_boxes, (img_h, img_w))
return chip_boxes
def nms(dets, thresh):
"""Apply classic DPM-style greedy NMS."""
if dets.shape[0] == 0:
return dets[[], :]
scores = dets[:, 1]
x1 = dets[:, 2]
y1 = dets[:, 3]
x2 = dets[:, 4]
y2 = dets[:, 5]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int)
# nominal indices
# _i, _j
# sorted indices
# i, j
# temp variables for box i's (the box currently under consideration)
# ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
# xx1, yy1, xx2, yy2
# w, h
# inter, ovr
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
keep = np.where(suppressed == 0)[0]
dets = dets[keep, :]
return dets

@ -0,0 +1,302 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import traceback
import six
import sys
if sys.version_info >= (3, 0):
pass
else:
pass
import numpy as np
from paddle.io import DataLoader, DistributedBatchSampler
from paddle.fluid.dataloader.collate import default_collate_fn
from paddlers.models.ppdet.core.workspace import register
from . import transform
from .shm_utils import _get_shared_memory_size_in_M
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger('reader')
MAIN_PID = os.getpid()
class Compose(object):
def __init__(self, transforms, num_classes=80):
self.transforms = transforms
self.transforms_cls = []
for t in self.transforms:
for k, v in t.items():
op_cls = getattr(transform, k)
f = op_cls(**v)
if hasattr(f, 'num_classes'):
f.num_classes = num_classes
self.transforms_cls.append(f)
def __call__(self, data):
for f in self.transforms_cls:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map sample transform [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
return data
class BatchCompose(Compose):
def __init__(self, transforms, num_classes=80, collate_batch=True):
super(BatchCompose, self).__init__(transforms, num_classes)
self.collate_batch = collate_batch
def __call__(self, data):
for f in self.transforms_cls:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map batch transform [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
# remove keys which is not needed by model
extra_key = ['h', 'w', 'flipped']
for k in extra_key:
for sample in data:
if k in sample:
sample.pop(k)
# batch data, if user-define batch function needed
# use user-defined here
if self.collate_batch:
batch_data = default_collate_fn(data)
else:
batch_data = {}
for k in data[0].keys():
tmp_data = []
for i in range(len(data)):
tmp_data.append(data[i][k])
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
tmp_data = np.stack(tmp_data, axis=0)
batch_data[k] = tmp_data
return batch_data
class BaseDataLoader(object):
"""
Base DataLoader implementation for detection models
Args:
sample_transforms (list): a list of transforms to perform
on each sample
batch_transforms (list): a list of transforms to perform
on batch
batch_size (int): batch size for batch collating, default 1.
shuffle (bool): whether to shuffle samples
drop_last (bool): whether to drop the last incomplete,
default False
num_classes (int): class number of dataset, default 80
collate_batch (bool): whether to collate batch in dataloader.
If set to True, the samples will collate into batch according
to the batch size. Otherwise, the ground-truth will not collate,
which is used when the number of ground-truch is different in
samples.
use_shared_memory (bool): whether to use shared memory to
accelerate data loading, enable this only if you
are sure that the shared memory size of your OS
is larger than memory cost of input datas of model.
Note that shared memory will be automatically
disabled if the shared memory of OS is less than
1G, which is not enough for detection models.
Default False.
"""
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=80,
collate_batch=True,
use_shared_memory=False,
**kwargs):
# sample transform
self._sample_transforms = Compose(
sample_transforms, num_classes=num_classes)
# batch transfrom
self._batch_transforms = BatchCompose(batch_transforms, num_classes,
collate_batch)
self.batch_size = batch_size
self.shuffle = shuffle
self.drop_last = drop_last
self.use_shared_memory = use_shared_memory
self.kwargs = kwargs
def __call__(self,
dataset,
worker_num,
batch_sampler=None,
return_list=False):
self.dataset = dataset
self.dataset.check_or_download_dataset()
self.dataset.parse_dataset()
# get data
self.dataset.set_transform(self._sample_transforms)
# set kwargs
self.dataset.set_kwargs(**self.kwargs)
# batch sampler
if batch_sampler is None:
self._batch_sampler = DistributedBatchSampler(
self.dataset,
batch_size=self.batch_size,
shuffle=self.shuffle,
drop_last=self.drop_last)
else:
self._batch_sampler = batch_sampler
# DataLoader do not start sub-process in Windows and Mac
# system, do not need to use shared memory
use_shared_memory = self.use_shared_memory and \
sys.platform not in ['win32', 'darwin']
# check whether shared memory size is bigger than 1G(1024M)
if use_shared_memory:
shm_size = _get_shared_memory_size_in_M()
if shm_size is not None and shm_size < 1024.:
logger.warning("Shared memory size is less than 1G, "
"disable shared_memory in DataLoader")
use_shared_memory = False
self.dataloader = DataLoader(
dataset=self.dataset,
batch_sampler=self._batch_sampler,
collate_fn=self._batch_transforms,
num_workers=worker_num,
return_list=return_list,
use_shared_memory=use_shared_memory)
self.loader = iter(self.dataloader)
return self
def __len__(self):
return len(self._batch_sampler)
def __iter__(self):
return self
def __next__(self):
try:
return next(self.loader)
except StopIteration:
self.loader = iter(self.dataloader)
six.reraise(*sys.exc_info())
def next(self):
# python2 compatibility
return self.__next__()
@register
class TrainReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=True,
drop_last=True,
num_classes=80,
collate_batch=True,
**kwargs):
super(TrainReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, collate_batch, **kwargs)
@register
class EvalReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=True,
num_classes=80,
**kwargs):
super(EvalReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, **kwargs)
@register
class TestReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=80,
**kwargs):
super(TestReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, **kwargs)
@register
class EvalMOTReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=1,
**kwargs):
super(EvalMOTReader, self).__init__(
sample_transforms, batch_transforms, batch_size, shuffle,
drop_last, num_classes, **kwargs)
@register
class TestMOTReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=1,
**kwargs):
super(TestMOTReader, self).__init__(
sample_transforms, batch_transforms, batch_size, shuffle,
drop_last, num_classes, **kwargs)

@ -0,0 +1,67 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
SIZE_UNIT = ['K', 'M', 'G', 'T']
SHM_QUERY_CMD = 'df -h'
SHM_KEY = 'shm'
SHM_DEFAULT_MOUNT = '/dev/shm'
# [ shared memory size check ]
# In detection models, image/target data occupies a lot of memory, and
# will occupy lots of shared memory in multi-process DataLoader, we use
# following code to get shared memory size and perform a size check to
# disable shared memory use if shared memory size is not enough.
# Shared memory getting process as follows:
# 1. use `df -h` get all mount info
# 2. pick up spaces whose mount info contains 'shm'
# 3. if 'shm' space number is only 1, return its size
# 4. if there are multiple 'shm' space, try to find the default mount
# directory '/dev/shm' is Linux-like system, otherwise return the
# biggest space size.
def _parse_size_in_M(size_str):
num, unit = size_str[:-1], size_str[-1]
assert unit in SIZE_UNIT, \
"unknown shm size unit {}".format(unit)
return float(num) * \
(1024 ** (SIZE_UNIT.index(unit) - 1))
def _get_shared_memory_size_in_M():
try:
df_infos = os.popen(SHM_QUERY_CMD).readlines()
except:
return None
else:
shm_infos = []
for df_info in df_infos:
info = df_info.strip()
if info.find(SHM_KEY) >= 0:
shm_infos.append(info.split())
if len(shm_infos) == 0:
return None
elif len(shm_infos) == 1:
return _parse_size_in_M(shm_infos[0][3])
else:
default_mount_infos = [
si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
]
if default_mount_infos:
return _parse_size_in_M(default_mount_infos[0][3])
else:
return max([_parse_size_in_M(si[3]) for si in shm_infos])

@ -0,0 +1,29 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import coco
from . import voc
from . import widerface
from . import category
from . import keypoint_coco
from . import mot
from . import sniper_coco
from .coco import *
from .voc import *
from .widerface import *
from .category import *
from .keypoint_coco import *
from .mot import *
from .sniper_coco import SniperCOCODataSet

@ -0,0 +1,904 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from paddlers.models.ppdet.data.source.voc import pascalvoc_label
from paddlers.models.ppdet.data.source.widerface import widerface_label
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['get_categories']
def get_categories(metric_type, anno_file=None, arch=None):
"""
Get class id to category id map and category id
to category name map from annotation file.
Args:
metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
and 'widerface'.
anno_file (str): annotation file path
"""
if arch == 'keypoint_arch':
return (None, {'id': 'keypoint'})
if metric_type.lower() == 'coco' or metric_type.lower(
) == 'rbox' or metric_type.lower() == 'snipercoco':
if anno_file and os.path.isfile(anno_file):
# lazy import pycocotools here
from pycocotools.coco import COCO
coco = COCO(anno_file)
cats = coco.loadCats(coco.getCatIds())
clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
catid2name = {cat['id']: cat['name'] for cat in cats}
return clsid2catid, catid2name
# anno file not exist, load default categories of COCO17
else:
if metric_type.lower() == 'rbox':
return _dota_category()
return _coco17_category()
elif metric_type.lower() == 'voc':
if anno_file and os.path.isfile(anno_file):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# anno file not exist, load default categories of
# VOC all 20 categories
else:
return _vocall_category()
elif metric_type.lower() == 'oid':
if anno_file and os.path.isfile(anno_file):
logger.warning("only default categories support for OID19")
return _oid19_category()
elif metric_type.lower() == 'widerface':
return _widerface_category()
elif metric_type.lower() == 'keypointtopdowncocoeval' or metric_type.lower(
) == 'keypointtopdownmpiieval':
return (None, {'id': 'keypoint'})
elif metric_type.lower() in ['mot', 'motdet', 'reid']:
if anno_file and os.path.isfile(anno_file):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# anno file not exist, load default category 'pedestrian'.
else:
return _mot_category(category='pedestrian')
elif metric_type.lower() in ['kitti', 'bdd100kmot']:
return _mot_category(category='vehicle')
elif metric_type.lower() in ['mcmot']:
if anno_file and os.path.isfile(anno_file):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# anno file not exist, load default categories of visdrone all 10 categories
else:
return _visdrone_category()
else:
raise ValueError("unknown metric type {}".format(metric_type))
def _mot_category(category='pedestrian'):
"""
Get class id to category id map and category id
to category name map of mot dataset
"""
label_map = {category: 0}
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _coco17_category():
"""
Get class id to category id map and category id
to category name map of COCO2017 dataset
"""
clsid2catid = {
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
12: 13,
13: 14,
14: 15,
15: 16,
16: 17,
17: 18,
18: 19,
19: 20,
20: 21,
21: 22,
22: 23,
23: 24,
24: 25,
25: 27,
26: 28,
27: 31,
28: 32,
29: 33,
30: 34,
31: 35,
32: 36,
33: 37,
34: 38,
35: 39,
36: 40,
37: 41,
38: 42,
39: 43,
40: 44,
41: 46,
42: 47,
43: 48,
44: 49,
45: 50,
46: 51,
47: 52,
48: 53,
49: 54,
50: 55,
51: 56,
52: 57,
53: 58,
54: 59,
55: 60,
56: 61,
57: 62,
58: 63,
59: 64,
60: 65,
61: 67,
62: 70,
63: 72,
64: 73,
65: 74,
66: 75,
67: 76,
68: 77,
69: 78,
70: 79,
71: 80,
72: 81,
73: 82,
74: 84,
75: 85,
76: 86,
77: 87,
78: 88,
79: 89,
80: 90
}
catid2name = {
0: 'background',
1: 'person',
2: 'bicycle',
3: 'car',
4: 'motorcycle',
5: 'airplane',
6: 'bus',
7: 'train',
8: 'truck',
9: 'boat',
10: 'traffic light',
11: 'fire hydrant',
13: 'stop sign',
14: 'parking meter',
15: 'bench',
16: 'bird',
17: 'cat',
18: 'dog',
19: 'horse',
20: 'sheep',
21: 'cow',
22: 'elephant',
23: 'bear',
24: 'zebra',
25: 'giraffe',
27: 'backpack',
28: 'umbrella',
31: 'handbag',
32: 'tie',
33: 'suitcase',
34: 'frisbee',
35: 'skis',
36: 'snowboard',
37: 'sports ball',
38: 'kite',
39: 'baseball bat',
40: 'baseball glove',
41: 'skateboard',
42: 'surfboard',
43: 'tennis racket',
44: 'bottle',
46: 'wine glass',
47: 'cup',
48: 'fork',
49: 'knife',
50: 'spoon',
51: 'bowl',
52: 'banana',
53: 'apple',
54: 'sandwich',
55: 'orange',
56: 'broccoli',
57: 'carrot',
58: 'hot dog',
59: 'pizza',
60: 'donut',
61: 'cake',
62: 'chair',
63: 'couch',
64: 'potted plant',
65: 'bed',
67: 'dining table',
70: 'toilet',
72: 'tv',
73: 'laptop',
74: 'mouse',
75: 'remote',
76: 'keyboard',
77: 'cell phone',
78: 'microwave',
79: 'oven',
80: 'toaster',
81: 'sink',
82: 'refrigerator',
84: 'book',
85: 'clock',
86: 'vase',
87: 'scissors',
88: 'teddy bear',
89: 'hair drier',
90: 'toothbrush'
}
clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
catid2name.pop(0)
return clsid2catid, catid2name
def _dota_category():
"""
Get class id to category id map and category id
to category name map of dota dataset
"""
catid2name = {
0: 'background',
1: 'plane',
2: 'baseball-diamond',
3: 'bridge',
4: 'ground-track-field',
5: 'small-vehicle',
6: 'large-vehicle',
7: 'ship',
8: 'tennis-court',
9: 'basketball-court',
10: 'storage-tank',
11: 'soccer-ball-field',
12: 'roundabout',
13: 'harbor',
14: 'swimming-pool',
15: 'helicopter'
}
catid2name.pop(0)
clsid2catid = {i: i + 1 for i in range(len(catid2name))}
return clsid2catid, catid2name
def _vocall_category():
"""
Get class id to category id map and category id
to category name map of mixup voc dataset
"""
label_map = pascalvoc_label()
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _widerface_category():
label_map = widerface_label()
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _oid19_category():
clsid2catid = {k: k + 1 for k in range(500)}
catid2name = {
0: "background",
1: "Infant bed",
2: "Rose",
3: "Flag",
4: "Flashlight",
5: "Sea turtle",
6: "Camera",
7: "Animal",
8: "Glove",
9: "Crocodile",
10: "Cattle",
11: "House",
12: "Guacamole",
13: "Penguin",
14: "Vehicle registration plate",
15: "Bench",
16: "Ladybug",
17: "Human nose",
18: "Watermelon",
19: "Flute",
20: "Butterfly",
21: "Washing machine",
22: "Raccoon",
23: "Segway",
24: "Taco",
25: "Jellyfish",
26: "Cake",
27: "Pen",
28: "Cannon",
29: "Bread",
30: "Tree",
31: "Shellfish",
32: "Bed",
33: "Hamster",
34: "Hat",
35: "Toaster",
36: "Sombrero",
37: "Tiara",
38: "Bowl",
39: "Dragonfly",
40: "Moths and butterflies",
41: "Antelope",
42: "Vegetable",
43: "Torch",
44: "Building",
45: "Power plugs and sockets",
46: "Blender",
47: "Billiard table",
48: "Cutting board",
49: "Bronze sculpture",
50: "Turtle",
51: "Broccoli",
52: "Tiger",
53: "Mirror",
54: "Bear",
55: "Zucchini",
56: "Dress",
57: "Volleyball",
58: "Guitar",
59: "Reptile",
60: "Golf cart",
61: "Tart",
62: "Fedora",
63: "Carnivore",
64: "Car",
65: "Lighthouse",
66: "Coffeemaker",
67: "Food processor",
68: "Truck",
69: "Bookcase",
70: "Surfboard",
71: "Footwear",
72: "Bench",
73: "Necklace",
74: "Flower",
75: "Radish",
76: "Marine mammal",
77: "Frying pan",
78: "Tap",
79: "Peach",
80: "Knife",
81: "Handbag",
82: "Laptop",
83: "Tent",
84: "Ambulance",
85: "Christmas tree",
86: "Eagle",
87: "Limousine",
88: "Kitchen & dining room table",
89: "Polar bear",
90: "Tower",
91: "Football",
92: "Willow",
93: "Human head",
94: "Stop sign",
95: "Banana",
96: "Mixer",
97: "Binoculars",
98: "Dessert",
99: "Bee",
100: "Chair",
101: "Wood-burning stove",
102: "Flowerpot",
103: "Beaker",
104: "Oyster",
105: "Woodpecker",
106: "Harp",
107: "Bathtub",
108: "Wall clock",
109: "Sports uniform",
110: "Rhinoceros",
111: "Beehive",
112: "Cupboard",
113: "Chicken",
114: "Man",
115: "Blue jay",
116: "Cucumber",
117: "Balloon",
118: "Kite",
119: "Fireplace",
120: "Lantern",
121: "Missile",
122: "Book",
123: "Spoon",
124: "Grapefruit",
125: "Squirrel",
126: "Orange",
127: "Coat",
128: "Punching bag",
129: "Zebra",
130: "Billboard",
131: "Bicycle",
132: "Door handle",
133: "Mechanical fan",
134: "Ring binder",
135: "Table",
136: "Parrot",
137: "Sock",
138: "Vase",
139: "Weapon",
140: "Shotgun",
141: "Glasses",
142: "Seahorse",
143: "Belt",
144: "Watercraft",
145: "Window",
146: "Giraffe",
147: "Lion",
148: "Tire",
149: "Vehicle",
150: "Canoe",
151: "Tie",
152: "Shelf",
153: "Picture frame",
154: "Printer",
155: "Human leg",
156: "Boat",
157: "Slow cooker",
158: "Croissant",
159: "Candle",
160: "Pancake",
161: "Pillow",
162: "Coin",
163: "Stretcher",
164: "Sandal",
165: "Woman",
166: "Stairs",
167: "Harpsichord",
168: "Stool",
169: "Bus",
170: "Suitcase",
171: "Human mouth",
172: "Juice",
173: "Skull",
174: "Door",
175: "Violin",
176: "Chopsticks",
177: "Digital clock",
178: "Sunflower",
179: "Leopard",
180: "Bell pepper",
181: "Harbor seal",
182: "Snake",
183: "Sewing machine",
184: "Goose",
185: "Helicopter",
186: "Seat belt",
187: "Coffee cup",
188: "Microwave oven",
189: "Hot dog",
190: "Countertop",
191: "Serving tray",
192: "Dog bed",
193: "Beer",
194: "Sunglasses",
195: "Golf ball",
196: "Waffle",
197: "Palm tree",
198: "Trumpet",
199: "Ruler",
200: "Helmet",
201: "Ladder",
202: "Office building",
203: "Tablet computer",
204: "Toilet paper",
205: "Pomegranate",
206: "Skirt",
207: "Gas stove",
208: "Cookie",
209: "Cart",
210: "Raven",
211: "Egg",
212: "Burrito",
213: "Goat",
214: "Kitchen knife",
215: "Skateboard",
216: "Salt and pepper shakers",
217: "Lynx",
218: "Boot",
219: "Platter",
220: "Ski",
221: "Swimwear",
222: "Swimming pool",
223: "Drinking straw",
224: "Wrench",
225: "Drum",
226: "Ant",
227: "Human ear",
228: "Headphones",
229: "Fountain",
230: "Bird",
231: "Jeans",
232: "Television",
233: "Crab",
234: "Microphone",
235: "Home appliance",
236: "Snowplow",
237: "Beetle",
238: "Artichoke",
239: "Jet ski",
240: "Stationary bicycle",
241: "Human hair",
242: "Brown bear",
243: "Starfish",
244: "Fork",
245: "Lobster",
246: "Corded phone",
247: "Drink",
248: "Saucer",
249: "Carrot",
250: "Insect",
251: "Clock",
252: "Castle",
253: "Tennis racket",
254: "Ceiling fan",
255: "Asparagus",
256: "Jaguar",
257: "Musical instrument",
258: "Train",
259: "Cat",
260: "Rifle",
261: "Dumbbell",
262: "Mobile phone",
263: "Taxi",
264: "Shower",
265: "Pitcher",
266: "Lemon",
267: "Invertebrate",
268: "Turkey",
269: "High heels",
270: "Bust",
271: "Elephant",
272: "Scarf",
273: "Barrel",
274: "Trombone",
275: "Pumpkin",
276: "Box",
277: "Tomato",
278: "Frog",
279: "Bidet",
280: "Human face",
281: "Houseplant",
282: "Van",
283: "Shark",
284: "Ice cream",
285: "Swim cap",
286: "Falcon",
287: "Ostrich",
288: "Handgun",
289: "Whiteboard",
290: "Lizard",
291: "Pasta",
292: "Snowmobile",
293: "Light bulb",
294: "Window blind",
295: "Muffin",
296: "Pretzel",
297: "Computer monitor",
298: "Horn",
299: "Furniture",
300: "Sandwich",
301: "Fox",
302: "Convenience store",
303: "Fish",
304: "Fruit",
305: "Earrings",
306: "Curtain",
307: "Grape",
308: "Sofa bed",
309: "Horse",
310: "Luggage and bags",
311: "Desk",
312: "Crutch",
313: "Bicycle helmet",
314: "Tick",
315: "Airplane",
316: "Canary",
317: "Spatula",
318: "Watch",
319: "Lily",
320: "Kitchen appliance",
321: "Filing cabinet",
322: "Aircraft",
323: "Cake stand",
324: "Candy",
325: "Sink",
326: "Mouse",
327: "Wine",
328: "Wheelchair",
329: "Goldfish",
330: "Refrigerator",
331: "French fries",
332: "Drawer",
333: "Treadmill",
334: "Picnic basket",
335: "Dice",
336: "Cabbage",
337: "Football helmet",
338: "Pig",
339: "Person",
340: "Shorts",
341: "Gondola",
342: "Honeycomb",
343: "Doughnut",
344: "Chest of drawers",
345: "Land vehicle",
346: "Bat",
347: "Monkey",
348: "Dagger",
349: "Tableware",
350: "Human foot",
351: "Mug",
352: "Alarm clock",
353: "Pressure cooker",
354: "Human hand",
355: "Tortoise",
356: "Baseball glove",
357: "Sword",
358: "Pear",
359: "Miniskirt",
360: "Traffic sign",
361: "Girl",
362: "Roller skates",
363: "Dinosaur",
364: "Porch",
365: "Human beard",
366: "Submarine sandwich",
367: "Screwdriver",
368: "Strawberry",
369: "Wine glass",
370: "Seafood",
371: "Racket",
372: "Wheel",
373: "Sea lion",
374: "Toy",
375: "Tea",
376: "Tennis ball",
377: "Waste container",
378: "Mule",
379: "Cricket ball",
380: "Pineapple",
381: "Coconut",
382: "Doll",
383: "Coffee table",
384: "Snowman",
385: "Lavender",
386: "Shrimp",
387: "Maple",
388: "Cowboy hat",
389: "Goggles",
390: "Rugby ball",
391: "Caterpillar",
392: "Poster",
393: "Rocket",
394: "Organ",
395: "Saxophone",
396: "Traffic light",
397: "Cocktail",
398: "Plastic bag",
399: "Squash",
400: "Mushroom",
401: "Hamburger",
402: "Light switch",
403: "Parachute",
404: "Teddy bear",
405: "Winter melon",
406: "Deer",
407: "Musical keyboard",
408: "Plumbing fixture",
409: "Scoreboard",
410: "Baseball bat",
411: "Envelope",
412: "Adhesive tape",
413: "Briefcase",
414: "Paddle",
415: "Bow and arrow",
416: "Telephone",
417: "Sheep",
418: "Jacket",
419: "Boy",
420: "Pizza",
421: "Otter",
422: "Office supplies",
423: "Couch",
424: "Cello",
425: "Bull",
426: "Camel",
427: "Ball",
428: "Duck",
429: "Whale",
430: "Shirt",
431: "Tank",
432: "Motorcycle",
433: "Accordion",
434: "Owl",
435: "Porcupine",
436: "Sun hat",
437: "Nail",
438: "Scissors",
439: "Swan",
440: "Lamp",
441: "Crown",
442: "Piano",
443: "Sculpture",
444: "Cheetah",
445: "Oboe",
446: "Tin can",
447: "Mango",
448: "Tripod",
449: "Oven",
450: "Mouse",
451: "Barge",
452: "Coffee",
453: "Snowboard",
454: "Common fig",
455: "Salad",
456: "Marine invertebrates",
457: "Umbrella",
458: "Kangaroo",
459: "Human arm",
460: "Measuring cup",
461: "Snail",
462: "Loveseat",
463: "Suit",
464: "Teapot",
465: "Bottle",
466: "Alpaca",
467: "Kettle",
468: "Trousers",
469: "Popcorn",
470: "Centipede",
471: "Spider",
472: "Sparrow",
473: "Plate",
474: "Bagel",
475: "Personal care",
476: "Apple",
477: "Brassiere",
478: "Bathroom cabinet",
479: "studio couch",
480: "Computer keyboard",
481: "Table tennis racket",
482: "Sushi",
483: "Cabinetry",
484: "Street light",
485: "Towel",
486: "Nightstand",
487: "Rabbit",
488: "Dolphin",
489: "Dog",
490: "Jug",
491: "Wok",
492: "Fire hydrant",
493: "Human eye",
494: "Skyscraper",
495: "Backpack",
496: "Potato",
497: "Paper towel",
498: "Lifejacket",
499: "Bicycle wheel",
500: "Toilet",
}
return clsid2catid, catid2name
def _visdrone_category():
clsid2catid = {i: i for i in range(10)}
catid2name = {
0: 'pedestrian',
1: 'people',
2: 'bicycle',
3: 'car',
4: 'van',
5: 'truck',
6: 'tricycle',
7: 'awning-tricycle',
8: 'bus',
9: 'motor'
}
return clsid2catid, catid2name

@ -0,0 +1,251 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from paddlers.models.ppdet.core.workspace import register, serializable
from .dataset import DetDataset
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class COCODataSet(DetDataset):
"""
Load dataset with COCO format.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): coco annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
load_crowd (bool): whether to load crowded ground-truth.
False as default
allow_empty (bool): whether to load empty entry. False as default
empty_ratio (float): the ratio of empty record number to total
record's, if empty_ratio is out of [0. ,1.), do not sample the
records and use all the empty entries. 1. as default
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
load_crowd=False,
allow_empty=False,
empty_ratio=1.):
super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
data_fields, sample_num)
self.load_image_only = False
self.load_semantic = False
self.load_crowd = load_crowd
self.allow_empty = allow_empty
self.empty_ratio = empty_ratio
def _sample_empty(self, records, num):
# if empty_ratio is out of [0. ,1.), do not sample the records
if self.empty_ratio < 0. or self.empty_ratio >= 1.:
return records
import random
sample_num = min(
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
records = random.sample(records, sample_num)
return records
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
assert anno_path.endswith('.json'), \
'invalid coco annotation file: ' + anno_path
from pycocotools.coco import COCO
coco = COCO(anno_path)
img_ids = coco.getImgIds()
img_ids.sort()
cat_ids = coco.getCatIds()
records = []
empty_records = []
ct = 0
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
self.cname2cid = dict({
coco.loadCats(catid)[0]['name']: clsid
for catid, clsid in self.catid2clsid.items()
})
if 'annotations' not in coco.dataset:
self.load_image_only = True
logger.warning(
'Annotation file: {} does not contains ground truth '
'and load image information only.'.format(anno_path))
for img_id in img_ids:
img_anno = coco.loadImgs([img_id])[0]
im_fname = img_anno['file_name']
im_w = float(img_anno['width'])
im_h = float(img_anno['height'])
im_path = os.path.join(image_dir,
im_fname) if image_dir else im_fname
is_empty = False
if not os.path.exists(im_path):
logger.warning('Illegal image file: {}, and it will be '
'ignored'.format(im_path))
continue
if im_w < 0 or im_h < 0:
logger.warning(
'Illegal width: {} or height: {} in annotation, '
'and im_id: {} will be ignored'.format(im_w, im_h, img_id))
continue
coco_rec = {
'im_file': im_path,
'im_id': np.array([img_id]),
'h': im_h,
'w': im_w,
} if 'image' in self.data_fields else {}
if not self.load_image_only:
ins_anno_ids = coco.getAnnIds(
imgIds=[img_id],
iscrowd=None if self.load_crowd else False)
instances = coco.loadAnns(ins_anno_ids)
bboxes = []
is_rbox_anno = False
for inst in instances:
# check gt bbox
if inst.get('ignore', False):
continue
if 'bbox' not in inst.keys():
continue
else:
if not any(np.array(inst['bbox'])):
continue
# read rbox anno or not
is_rbox_anno = True if len(inst['bbox']) == 5 else False
if is_rbox_anno:
xc, yc, box_w, box_h, angle = inst['bbox']
x1 = xc - box_w / 2.0
y1 = yc - box_h / 2.0
x2 = x1 + box_w
y2 = y1 + box_h
else:
x1, y1, box_w, box_h = inst['bbox']
x2 = x1 + box_w
y2 = y1 + box_h
eps = 1e-5
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
inst['clean_bbox'] = [
round(float(x), 3) for x in [x1, y1, x2, y2]
]
if is_rbox_anno:
inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
bboxes.append(inst)
else:
logger.warning(
'Found an invalid bbox in annotations: im_id: {}, '
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
img_id, float(inst['area']), x1, y1, x2, y2))
num_bbox = len(bboxes)
if num_bbox <= 0 and not self.allow_empty:
continue
elif num_bbox <= 0:
is_empty = True
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
if is_rbox_anno:
gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = [None] * num_bbox
has_segmentation = False
for i, box in enumerate(bboxes):
catid = box['category_id']
gt_class[i][0] = self.catid2clsid[catid]
gt_bbox[i, :] = box['clean_bbox']
# xc, yc, w, h, theta
if is_rbox_anno:
gt_rbox[i, :] = box['clean_rbox']
is_crowd[i][0] = box['iscrowd']
# check RLE format
if 'segmentation' in box and box['iscrowd'] == 1:
gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
elif 'segmentation' in box and box['segmentation']:
if not np.array(box['segmentation']
).size > 0 and not self.allow_empty:
bboxes.pop(i)
gt_poly.pop(i)
np.delete(is_crowd, i)
np.delete(gt_class, i)
np.delete(gt_bbox, i)
else:
gt_poly[i] = box['segmentation']
has_segmentation = True
if has_segmentation and not any(
gt_poly) and not self.allow_empty:
continue
if is_rbox_anno:
gt_rec = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_rbox': gt_rbox,
'gt_poly': gt_poly,
}
else:
gt_rec = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_poly': gt_poly,
}
for k, v in gt_rec.items():
if k in self.data_fields:
coco_rec[k] = v
# TODO: remove load_semantic
if self.load_semantic and 'semantic' in self.data_fields:
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
'train2017', im_fname[:-3] + 'png')
coco_rec.update({'semantic': seg_path})
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
im_path, img_id, im_h, im_w))
if is_empty:
empty_records.append(coco_rec)
else:
records.append(coco_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert ct > 0, 'not found any coco record in %s' % (anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
if self.allow_empty and len(empty_records) > 0:
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs = records

@ -0,0 +1,197 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from paddle.io import Dataset
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.utils.download import get_dataset_path
import copy
@serializable
class DetDataset(Dataset):
"""
Load detection dataset.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
use_default_label (bool): whether to load default label list.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
use_default_label=None,
**kwargs):
super(DetDataset, self).__init__()
self.dataset_dir = dataset_dir if dataset_dir is not None else ''
self.anno_path = anno_path
self.image_dir = image_dir if image_dir is not None else ''
self.data_fields = data_fields
self.sample_num = sample_num
self.use_default_label = use_default_label
self._epoch = 0
self._curr_iter = 0
def __len__(self, ):
return len(self.roidbs)
def __getitem__(self, idx):
# data batch
roidb = copy.deepcopy(self.roidbs[idx])
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
n = len(self.roidbs)
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
n = len(self.roidbs)
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
n = len(self.roidbs)
roidb = [roidb, ] + [
copy.deepcopy(self.roidbs[np.random.randint(n)])
for _ in range(3)
]
if isinstance(roidb, Sequence):
for r in roidb:
r['curr_iter'] = self._curr_iter
else:
roidb['curr_iter'] = self._curr_iter
self._curr_iter += 1
return self.transform(roidb)
def check_or_download_dataset(self):
self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
self.image_dir)
def set_kwargs(self, **kwargs):
self.mixup_epoch = kwargs.get('mixup_epoch', -1)
self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
def set_transform(self, transform):
self.transform = transform
def set_epoch(self, epoch_id):
self._epoch = epoch_id
def parse_dataset(self, ):
raise NotImplementedError(
"Need to implement parse_dataset method of Dataset")
def get_anno(self):
if self.anno_path is None:
return
return os.path.join(self.dataset_dir, self.anno_path)
def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
return f.lower().endswith(extensions)
def _make_dataset(dir):
dir = os.path.expanduser(dir)
if not os.path.isdir(dir):
raise ('{} should be a dir'.format(dir))
images = []
for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
for fname in sorted(fnames):
path = os.path.join(root, fname)
if _is_valid_file(path):
images.append(path)
return images
@register
@serializable
class ImageFolder(DetDataset):
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
sample_num=-1,
use_default_label=None,
**kwargs):
super(ImageFolder, self).__init__(
dataset_dir,
image_dir,
anno_path,
sample_num=sample_num,
use_default_label=use_default_label)
self._imid2path = {}
self.roidbs = None
self.sample_num = sample_num
def check_or_download_dataset(self):
if self.dataset_dir:
# NOTE: ImageFolder is only used for prediction, in
# infer mode, image_dir is set by set_images
# so we only check anno_path here
self.dataset_dir = get_dataset_path(self.dataset_dir,
self.anno_path, None)
def parse_dataset(self, ):
if not self.roidbs:
self.roidbs = self._load_images()
def _parse(self):
image_dir = self.image_dir
if not isinstance(image_dir, Sequence):
image_dir = [image_dir]
images = []
for im_dir in image_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.dataset_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._parse()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def set_images(self, images):
self.image_dir = images
self.roidbs = self._load_images()

@ -0,0 +1,669 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
this code is base on https://github.com/open-mmlab/mmpose
"""
import os
import cv2
import numpy as np
import json
import copy
import pycocotools
from pycocotools.coco import COCO
from .dataset import DetDataset
from paddlers.models.ppdet.core.workspace import register, serializable
@serializable
class KeypointBottomUpBaseDataset(DetDataset):
"""Base class for bottom-up datasets.
All datasets should subclass it.
All subclasses should overwrite:
Methods:`_get_imganno`
Args:
dataset_dir (str): Root path to the dataset.
anno_path (str): Relative path to the annotation file.
image_dir (str): Path to a directory where images are held.
Default: None.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
shard (list): [rank, worldsize], the distributed env params
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[],
shard=[0, 1],
test_mode=False):
super().__init__(dataset_dir, image_dir, anno_path)
self.image_info = {}
self.ann_info = {}
self.img_prefix = os.path.join(dataset_dir, image_dir)
self.transform = transform
self.test_mode = test_mode
self.ann_info['num_joints'] = num_joints
self.img_ids = []
def parse_dataset(self):
pass
def __len__(self):
"""Get dataset length."""
return len(self.img_ids)
def _get_imganno(self, idx):
"""Get anno for a single image."""
raise NotImplementedError
def __getitem__(self, idx):
"""Prepare image for training given the index."""
records = copy.deepcopy(self._get_imganno(idx))
records['image'] = cv2.imread(records['image_file'])
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
records['mask'] = (records['mask'] + 0).astype('uint8')
records = self.transform(records)
return records
def parse_dataset(self):
return
@register
@serializable
class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
"""COCO dataset for bottom-up pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes::
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
dataset_dir (str): Root path to the dataset.
anno_path (str): Relative path to the annotation file.
image_dir (str): Path to a directory where images are held.
Default: None.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
shard (list): [rank, worldsize], the distributed env params
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[],
shard=[0, 1],
test_mode=False):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform, shard, test_mode)
self.ann_file = os.path.join(dataset_dir, anno_path)
self.shard = shard
self.test_mode = test_mode
def parse_dataset(self):
self.coco = COCO(self.ann_file)
self.img_ids = self.coco.getImgIds()
if not self.test_mode:
self.img_ids = [
img_id for img_id in self.img_ids
if len(self.coco.getAnnIds(
imgIds=img_id, iscrowd=None)) > 0
]
blocknum = int(len(self.img_ids) / self.shard[1])
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
self.shard[0] + 1))]
self.num_images = len(self.img_ids)
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
self.dataset_name = 'coco'
cat_ids = self.coco.getCatIds()
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
print('=> num_images: {}'.format(self.num_images))
@staticmethod
def _get_mapping_id_name(imgs):
"""
Args:
imgs (dict): dict of image info.
Returns:
tuple: Image name & id mapping dicts.
- id2name (dict): Mapping image id to name.
- name2id (dict): Mapping image name to id.
"""
id2name = {}
name2id = {}
for image_id, image in imgs.items():
file_name = image['file_name']
id2name[image_id] = file_name
name2id[file_name] = image_id
return id2name, name2id
def _get_imganno(self, idx):
"""Get anno for a single image.
Args:
idx (int): image idx
Returns:
dict: info for model training
"""
coco = self.coco
img_id = self.img_ids[idx]
ann_ids = coco.getAnnIds(imgIds=img_id)
anno = coco.loadAnns(ann_ids)
mask = self._get_mask(anno, idx)
anno = [
obj for obj in anno
if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
]
joints, orgsize = self._get_joints(anno, idx)
db_rec = {}
db_rec['im_id'] = img_id
db_rec['image_file'] = os.path.join(self.img_prefix,
self.id2name[img_id])
db_rec['mask'] = mask
db_rec['joints'] = joints
db_rec['im_shape'] = orgsize
return db_rec
def _get_joints(self, anno, idx):
"""Get joints for all people in an image."""
num_people = len(anno)
joints = np.zeros(
(num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
for i, obj in enumerate(anno):
joints[i, :self.ann_info['num_joints'], :3] = \
np.array(obj['keypoints']).reshape([-1, 3])
img_info = self.coco.loadImgs(self.img_ids[idx])[0]
joints[..., 0] /= img_info['width']
joints[..., 1] /= img_info['height']
orgsize = np.array([img_info['height'], img_info['width']])
return joints, orgsize
def _get_mask(self, anno, idx):
"""Get ignore masks to mask out losses."""
coco = self.coco
img_info = coco.loadImgs(self.img_ids[idx])[0]
m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
for obj in anno:
if 'segmentation' in obj:
if obj['iscrowd']:
rle = pycocotools.mask.frPyObjects(obj['segmentation'],
img_info['height'],
img_info['width'])
m += pycocotools.mask.decode(rle)
elif obj['num_keypoints'] == 0:
rles = pycocotools.mask.frPyObjects(obj['segmentation'],
img_info['height'],
img_info['width'])
for rle in rles:
m += pycocotools.mask.decode(rle)
return m < 0.5
@register
@serializable
class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
"""CrowdPose dataset for bottom-up pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
CrowdPose keypoint indexes::
0: 'left_shoulder',
1: 'right_shoulder',
2: 'left_elbow',
3: 'right_elbow',
4: 'left_wrist',
5: 'right_wrist',
6: 'left_hip',
7: 'right_hip',
8: 'left_knee',
9: 'right_knee',
10: 'left_ankle',
11: 'right_ankle',
12: 'top_head',
13: 'neck'
Args:
dataset_dir (str): Root path to the dataset.
anno_path (str): Relative path to the annotation file.
image_dir (str): Path to a directory where images are held.
Default: None.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
shard (list): [rank, worldsize], the distributed env params
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[],
shard=[0, 1],
test_mode=False):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform, shard, test_mode)
self.ann_file = os.path.join(dataset_dir, anno_path)
self.shard = shard
self.test_mode = test_mode
def parse_dataset(self):
self.coco = COCO(self.ann_file)
self.img_ids = self.coco.getImgIds()
if not self.test_mode:
self.img_ids = [
img_id for img_id in self.img_ids
if len(self.coco.getAnnIds(
imgIds=img_id, iscrowd=None)) > 0
]
blocknum = int(len(self.img_ids) / self.shard[1])
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
self.shard[0] + 1))]
self.num_images = len(self.img_ids)
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
self.dataset_name = 'crowdpose'
print('=> num_images: {}'.format(self.num_images))
@serializable
class KeypointTopDownBaseDataset(DetDataset):
"""Base class for top_down datasets.
All datasets should subclass it.
All subclasses should overwrite:
Methods:`_get_db`
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[]):
super().__init__(dataset_dir, image_dir, anno_path)
self.image_info = {}
self.ann_info = {}
self.img_prefix = os.path.join(dataset_dir, image_dir)
self.transform = transform
self.ann_info['num_joints'] = num_joints
self.db = []
def __len__(self):
"""Get dataset length."""
return len(self.db)
def _get_db(self):
"""Get a sample"""
raise NotImplementedError
def __getitem__(self, idx):
"""Prepare sample for training given the index."""
records = copy.deepcopy(self.db[idx])
records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
cv2.IMREAD_IGNORE_ORIENTATION)
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
records['score'] = records['score'] if 'score' in records else 1
records = self.transform(records)
# print('records', records)
return records
@register
@serializable
class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
"""COCO dataset for top-down pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes:
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): Keypoint numbers
trainsize (list):[w, h] Image target size
transform (composed(operators)): A sequence of data transforms.
bbox_file (str): Path to a detection bbox file
Default: None.
use_gt_bbox (bool): Whether to use ground truth bbox
Default: True.
pixel_std (int): The pixel std of the scale
Default: 200.
image_thre (float): The threshold to filter the detection box
Default: 0.0.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
trainsize,
transform=[],
bbox_file=None,
use_gt_bbox=True,
pixel_std=200,
image_thre=0.0):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform)
self.bbox_file = bbox_file
self.use_gt_bbox = use_gt_bbox
self.trainsize = trainsize
self.pixel_std = pixel_std
self.image_thre = image_thre
self.dataset_name = 'coco'
def parse_dataset(self):
if self.use_gt_bbox:
self.db = self._load_coco_keypoint_annotations()
else:
self.db = self._load_coco_person_detection_results()
def _load_coco_keypoint_annotations(self):
coco = COCO(self.get_anno())
img_ids = coco.getImgIds()
gt_db = []
for index in img_ids:
im_ann = coco.loadImgs(index)[0]
width = im_ann['width']
height = im_ann['height']
file_name = im_ann['file_name']
im_id = int(im_ann["id"])
annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
objs = coco.loadAnns(annIds)
valid_objs = []
for obj in objs:
x, y, w, h = obj['bbox']
x1 = np.max((0, x))
y1 = np.max((0, y))
x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
valid_objs.append(obj)
objs = valid_objs
rec = []
for obj in objs:
if max(obj['keypoints']) == 0:
continue
joints = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float)
joints_vis = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float)
for ipt in range(self.ann_info['num_joints']):
joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
joints[ipt, 2] = 0
t_vis = obj['keypoints'][ipt * 3 + 2]
if t_vis > 1:
t_vis = 1
joints_vis[ipt, 0] = t_vis
joints_vis[ipt, 1] = t_vis
joints_vis[ipt, 2] = 0
center, scale = self._box2cs(obj['clean_bbox'][:4])
rec.append({
'image_file': os.path.join(self.img_prefix, file_name),
'center': center,
'scale': scale,
'joints': joints,
'joints_vis': joints_vis,
'im_id': im_id,
})
gt_db.extend(rec)
return gt_db
def _box2cs(self, box):
x, y, w, h = box[:4]
center = np.zeros((2), dtype=np.float32)
center[0] = x + w * 0.5
center[1] = y + h * 0.5
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio
scale = np.array(
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
dtype=np.float32)
if center[0] != -1:
scale = scale * 1.25
return center, scale
def _load_coco_person_detection_results(self):
all_boxes = None
bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
with open(bbox_file_path, 'r') as f:
all_boxes = json.load(f)
if not all_boxes:
print('=> Load %s fail!' % bbox_file_path)
return None
kpt_db = []
for n_img in range(0, len(all_boxes)):
det_res = all_boxes[n_img]
if det_res['category_id'] != 1:
continue
file_name = det_res[
'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
'image_id']
img_name = os.path.join(self.img_prefix, file_name)
box = det_res['bbox']
score = det_res['score']
im_id = int(det_res['image_id'])
if score < self.image_thre:
continue
center, scale = self._box2cs(box)
joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
joints_vis = np.ones(
(self.ann_info['num_joints'], 3), dtype=np.float)
kpt_db.append({
'image_file': img_name,
'im_id': im_id,
'center': center,
'scale': scale,
'score': score,
'joints': joints,
'joints_vis': joints_vis,
})
return kpt_db
@register
@serializable
class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
"""MPII dataset for topdown pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
MPII keypoint indexes::
0: 'right_ankle',
1: 'right_knee',
2: 'right_hip',
3: 'left_hip',
4: 'left_knee',
5: 'left_ankle',
6: 'pelvis',
7: 'thorax',
8: 'upper_neck',
9: 'head_top',
10: 'right_wrist',
11: 'right_elbow',
12: 'right_shoulder',
13: 'left_shoulder',
14: 'left_elbow',
15: 'left_wrist',
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): Keypoint numbers
trainsize (list):[w, h] Image target size
transform (composed(operators)): A sequence of data transforms.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[]):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform)
self.dataset_name = 'mpii'
def parse_dataset(self):
with open(self.get_anno()) as anno_file:
anno = json.load(anno_file)
gt_db = []
for a in anno:
image_name = a['image']
im_id = a['image_id'] if 'image_id' in a else int(
os.path.splitext(image_name)[0])
c = np.array(a['center'], dtype=np.float)
s = np.array([a['scale'], a['scale']], dtype=np.float)
# Adjust center/scale slightly to avoid cropping limbs
if c[0] != -1:
c[1] = c[1] + 15 * s[1]
s = s * 1.25
c = c - 1
joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
joints_vis = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float)
if 'joints' in a:
joints_ = np.array(a['joints'])
joints_[:, 0:2] = joints_[:, 0:2] - 1
joints_vis_ = np.array(a['joints_vis'])
assert len(joints_) == self.ann_info[
'num_joints'], 'joint num diff: {} vs {}'.format(
len(joints_), self.ann_info['num_joints'])
joints[:, 0:2] = joints_[:, 0:2]
joints_vis[:, 0] = joints_vis_[:]
joints_vis[:, 1] = joints_vis_[:]
gt_db.append({
'image_file': os.path.join(self.img_prefix, image_name),
'im_id': im_id,
'center': c,
'scale': s,
'joints': joints,
'joints_vis': joints_vis
})
print("number length: {}".format(len(gt_db)))
self.db = gt_db

@ -0,0 +1,636 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import cv2
import glob
import numpy as np
from collections import OrderedDict, defaultdict
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from .dataset import DetDataset, _make_dataset, _is_valid_file
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class MOTDataSet(DetDataset):
"""
Load dataset with MOT format, only support single class MOT.
Args:
dataset_dir (str): root directory for dataset.
image_lists (str|list): mot data image lists, muiti-source mot dataset.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
Notes:
MOT datasets root directory following this:
dataset/mot
|image_lists
| |caltech.train
| |caltech.val
| |mot16.train
| |mot17.train
| ......
|Caltech
|MOT17
|......
All the MOT datasets have the following structure:
Caltech
|images
| 00001.jpg
| | ...
| 0000N.jpg
labels_with_ids
00001.txt
| ...
0000N.txt
or
MOT17
|images
| train
| test
labels_with_ids
train
"""
def __init__(self,
dataset_dir=None,
image_lists=[],
data_fields=['image'],
sample_num=-1):
super(MOTDataSet, self).__init__(
dataset_dir=dataset_dir,
data_fields=data_fields,
sample_num=sample_num)
self.dataset_dir = dataset_dir
self.image_lists = image_lists
if isinstance(self.image_lists, str):
self.image_lists = [self.image_lists]
self.roidbs = None
self.cname2cid = None
def get_anno(self):
if self.image_lists == []:
return
# only used to get categories and metric
# only check first data, but the label_list of all data should be same.
first_mot_data = self.image_lists[0].split('.')[0]
anno_file = os.path.join(self.dataset_dir, first_mot_data,
'label_list.txt')
return anno_file
def parse_dataset(self):
self.img_files = OrderedDict()
self.img_start_index = OrderedDict()
self.label_files = OrderedDict()
self.tid_num = OrderedDict()
self.tid_start_index = OrderedDict()
img_index = 0
for data_name in self.image_lists:
# check every data image list
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
assert os.path.isdir(image_lists_dir), \
"The {} is not a directory.".format(image_lists_dir)
list_path = os.path.join(image_lists_dir, data_name)
assert os.path.exists(list_path), \
"The list path {} does not exist.".format(list_path)
# record img_files, filter out empty ones
with open(list_path, 'r') as file:
self.img_files[data_name] = file.readlines()
self.img_files[data_name] = [
os.path.join(self.dataset_dir, x.strip())
for x in self.img_files[data_name]
]
self.img_files[data_name] = list(
filter(lambda x: len(x) > 0, self.img_files[data_name]))
self.img_start_index[data_name] = img_index
img_index += len(self.img_files[data_name])
# record label_files
self.label_files[data_name] = [
x.replace('images', 'labels_with_ids').replace(
'.png', '.txt').replace('.jpg', '.txt')
for x in self.img_files[data_name]
]
for data_name, label_paths in self.label_files.items():
max_index = -1
for lp in label_paths:
lb = np.loadtxt(lp)
if len(lb) < 1:
continue
if len(lb.shape) < 2:
img_max = lb[1]
else:
img_max = np.max(lb[:, 1])
if img_max > max_index:
max_index = img_max
self.tid_num[data_name] = int(max_index + 1)
last_index = 0
for i, (k, v) in enumerate(self.tid_num.items()):
self.tid_start_index[k] = last_index
last_index += v
self.num_identities_dict = defaultdict(int)
self.num_identities_dict[0] = int(last_index + 1) # single class
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
self.total_imgs = sum(self.num_imgs_each_data)
logger.info('MOT dataset summary: ')
logger.info(self.tid_num)
logger.info('Total images: {}'.format(self.total_imgs))
logger.info('Image start index: {}'.format(self.img_start_index))
logger.info('Total identities: {}'.format(self.num_identities_dict[0]))
logger.info('Identity start index: {}'.format(self.tid_start_index))
records = []
cname2cid = mot_label()
for img_index in range(self.total_imgs):
for i, (k, v) in enumerate(self.img_start_index.items()):
if img_index >= v:
data_name = list(self.label_files.keys())[i]
start_index = v
img_file = self.img_files[data_name][img_index - start_index]
lbl_file = self.label_files[data_name][img_index - start_index]
if not os.path.exists(img_file):
logger.warning(
'Illegal image file: {}, and it will be ignored'.format(
img_file))
continue
if not os.path.isfile(lbl_file):
logger.warning(
'Illegal label file: {}, and it will be ignored'.format(
lbl_file))
continue
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
cx, cy = labels[:, 2], labels[:, 3]
w, h = labels[:, 4], labels[:, 5]
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
gt_class = labels[:, 0:1].astype('int32')
gt_score = np.ones((len(labels), 1)).astype('float32')
gt_ide = labels[:, 1:2].astype('int32')
for i, _ in enumerate(gt_ide):
if gt_ide[i] > -1:
gt_ide[i] += self.tid_start_index[data_name]
mot_rec = {
'im_file': img_file,
'im_id': img_index,
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'gt_ide': gt_ide,
}
for k, v in gt_rec.items():
if k in self.data_fields:
mot_rec[k] = v
records.append(mot_rec)
if self.sample_num > 0 and img_index >= self.sample_num:
break
assert len(records) > 0, 'not found any mot record in %s' % (
self.image_lists)
self.roidbs, self.cname2cid = records, cname2cid
@register
@serializable
class MCMOTDataSet(DetDataset):
"""
Load dataset with MOT format, support multi-class MOT.
Args:
dataset_dir (str): root directory for dataset.
image_lists (list(str)): mcmot data image lists, muiti-source mcmot dataset.
data_fields (list): key name of data dictionary, at least have 'image'.
label_list (str): if use_default_label is False, will load
mapping between category and class index.
sample_num (int): number of samples to load, -1 means all.
Notes:
MCMOT datasets root directory following this:
dataset/mot
|image_lists
| |visdrone_mcmot.train
| |visdrone_mcmot.val
visdrone_mcmot
|images
| train
| val
labels_with_ids
train
"""
def __init__(self,
dataset_dir=None,
image_lists=[],
data_fields=['image'],
label_list=None,
sample_num=-1):
super(MCMOTDataSet, self).__init__(
dataset_dir=dataset_dir,
data_fields=data_fields,
sample_num=sample_num)
self.dataset_dir = dataset_dir
self.image_lists = image_lists
if isinstance(self.image_lists, str):
self.image_lists = [self.image_lists]
self.label_list = label_list
self.roidbs = None
self.cname2cid = None
def get_anno(self):
if self.image_lists == []:
return
# only used to get categories and metric
# only check first data, but the label_list of all data should be same.
first_mot_data = self.image_lists[0].split('.')[0]
anno_file = os.path.join(self.dataset_dir, first_mot_data,
'label_list.txt')
return anno_file
def parse_dataset(self):
self.img_files = OrderedDict()
self.img_start_index = OrderedDict()
self.label_files = OrderedDict()
self.tid_num = OrderedDict()
self.tid_start_idx_of_cls_ids = defaultdict(dict) # for MCMOT
img_index = 0
for data_name in self.image_lists:
# check every data image list
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
assert os.path.isdir(image_lists_dir), \
"The {} is not a directory.".format(image_lists_dir)
list_path = os.path.join(image_lists_dir, data_name)
assert os.path.exists(list_path), \
"The list path {} does not exist.".format(list_path)
# record img_files, filter out empty ones
with open(list_path, 'r') as file:
self.img_files[data_name] = file.readlines()
self.img_files[data_name] = [
os.path.join(self.dataset_dir, x.strip())
for x in self.img_files[data_name]
]
self.img_files[data_name] = list(
filter(lambda x: len(x) > 0, self.img_files[data_name]))
self.img_start_index[data_name] = img_index
img_index += len(self.img_files[data_name])
# record label_files
self.label_files[data_name] = [
x.replace('images', 'labels_with_ids').replace(
'.png', '.txt').replace('.jpg', '.txt')
for x in self.img_files[data_name]
]
for data_name, label_paths in self.label_files.items():
# using max_ids_dict rather than max_index
max_ids_dict = defaultdict(int)
for lp in label_paths:
lb = np.loadtxt(lp)
if len(lb) < 1:
continue
lb = lb.reshape(-1, 6)
for item in lb:
if item[1] > max_ids_dict[int(item[0])]:
# item[0]: cls_id
# item[1]: track id
max_ids_dict[int(item[0])] = int(item[1])
# track id number
self.tid_num[data_name] = max_ids_dict
last_idx_dict = defaultdict(int)
for i, (k, v) in enumerate(self.tid_num.items()): # each sub dataset
for cls_id, id_num in v.items(): # v is a max_ids_dict
self.tid_start_idx_of_cls_ids[k][cls_id] = last_idx_dict[
cls_id]
last_idx_dict[cls_id] += id_num
self.num_identities_dict = defaultdict(int)
for k, v in last_idx_dict.items():
self.num_identities_dict[k] = int(v) # total ids of each category
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
self.total_imgs = sum(self.num_imgs_each_data)
# cname2cid and cid2cname
cname2cid = {}
if self.label_list is not None:
# if use label_list for multi source mix dataset,
# please make sure label_list in the first sub_dataset at least.
sub_dataset = self.image_lists[0].split('.')[0]
label_path = os.path.join(self.dataset_dir, sub_dataset,
self.label_list)
if not os.path.exists(label_path):
logger.info(
"Note: label_list {} does not exists, use VisDrone 10 classes labels as default.".
format(label_path))
cname2cid = visdrone_mcmot_label()
else:
with open(label_path, 'r') as fr:
label_id = 0
for line in fr.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
else:
cname2cid = visdrone_mcmot_label()
cid2cname = dict([(v, k) for (k, v) in cname2cid.items()])
logger.info('MCMOT dataset summary: ')
logger.info(self.tid_num)
logger.info('Total images: {}'.format(self.total_imgs))
logger.info('Image start index: {}'.format(self.img_start_index))
logger.info('Total identities of each category: ')
num_identities_dict = sorted(
self.num_identities_dict.items(), key=lambda x: x[0])
total_IDs_all_cats = 0
for (k, v) in num_identities_dict:
logger.info('Category {} [{}] has {} IDs.'.format(k, cid2cname[k],
v))
total_IDs_all_cats += v
logger.info('Total identities of all categories: {}'.format(
total_IDs_all_cats))
logger.info('Identity start index of each category: ')
for k, v in self.tid_start_idx_of_cls_ids.items():
sorted_v = sorted(v.items(), key=lambda x: x[0])
for (cls_id, start_idx) in sorted_v:
logger.info('Start index of dataset {} category {:d} is {:d}'
.format(k, cls_id, start_idx))
records = []
for img_index in range(self.total_imgs):
for i, (k, v) in enumerate(self.img_start_index.items()):
if img_index >= v:
data_name = list(self.label_files.keys())[i]
start_index = v
img_file = self.img_files[data_name][img_index - start_index]
lbl_file = self.label_files[data_name][img_index - start_index]
if not os.path.exists(img_file):
logger.warning(
'Illegal image file: {}, and it will be ignored'.format(
img_file))
continue
if not os.path.isfile(lbl_file):
logger.warning(
'Illegal label file: {}, and it will be ignored'.format(
lbl_file))
continue
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
cx, cy = labels[:, 2], labels[:, 3]
w, h = labels[:, 4], labels[:, 5]
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
gt_class = labels[:, 0:1].astype('int32')
gt_score = np.ones((len(labels), 1)).astype('float32')
gt_ide = labels[:, 1:2].astype('int32')
for i, _ in enumerate(gt_ide):
if gt_ide[i] > -1:
cls_id = int(gt_class[i])
start_idx = self.tid_start_idx_of_cls_ids[data_name][
cls_id]
gt_ide[i] += start_idx
mot_rec = {
'im_file': img_file,
'im_id': img_index,
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'gt_ide': gt_ide,
}
for k, v in gt_rec.items():
if k in self.data_fields:
mot_rec[k] = v
records.append(mot_rec)
if self.sample_num > 0 and img_index >= self.sample_num:
break
assert len(records) > 0, 'not found any mot record in %s' % (
self.image_lists)
self.roidbs, self.cname2cid = records, cname2cid
@register
@serializable
class MOTImageFolder(DetDataset):
"""
Load MOT dataset with MOT format from image folder or video .
Args:
video_file (str): path of the video file, default ''.
frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
dataset_dir (str): root directory for dataset.
keep_ori_im (bool): whether to keep original image, default False.
Set True when used during MOT model inference while saving
images or video, or used in DeepSORT.
"""
def __init__(self,
video_file=None,
frame_rate=-1,
dataset_dir=None,
data_root=None,
image_dir=None,
sample_num=-1,
keep_ori_im=False,
**kwargs):
super(MOTImageFolder, self).__init__(
dataset_dir, image_dir, sample_num=sample_num)
self.video_file = video_file
self.data_root = data_root
self.keep_ori_im = keep_ori_im
self._imid2path = {}
self.roidbs = None
self.frame_rate = frame_rate
def check_or_download_dataset(self):
return
def parse_dataset(self, ):
if not self.roidbs:
if self.video_file is None:
self.frame_rate = 30 # set as default if infer image folder
self.roidbs = self._load_images()
else:
self.roidbs = self._load_video_images()
def _load_video_images(self):
if self.frame_rate == -1:
# if frame_rate is not set for video, use cv2.VideoCapture
cap = cv2.VideoCapture(self.video_file)
self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
extension = self.video_file.split('.')[-1]
output_path = self.video_file.replace('.{}'.format(extension), '')
frames_path = video2frames(self.video_file, output_path,
self.frame_rate)
self.video_frames = sorted(
glob.glob(os.path.join(frames_path, '*.png')))
self.video_length = len(self.video_frames)
logger.info('Length of the video: {:d} frames.'.format(
self.video_length))
ct = 0
records = []
for image in self.video_frames:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
if self.keep_ori_im:
rec.update({'keep_ori_im': 1})
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def _find_images(self):
image_dir = self.image_dir
if not isinstance(image_dir, Sequence):
image_dir = [image_dir]
images = []
for im_dir in image_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.dataset_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._find_images()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
if self.keep_ori_im:
rec.update({'keep_ori_im': 1})
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def set_images(self, images):
self.image_dir = images
self.roidbs = self._load_images()
def set_video(self, video_file, frame_rate):
# update video_file and frame_rate by command line of tools/infer_mot.py
self.video_file = video_file
self.frame_rate = frame_rate
assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
"wrong or unsupported file format: {}".format(self.video_file)
self.roidbs = self._load_video_images()
def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
return f.lower().endswith(extensions)
def video2frames(video_path, outpath, frame_rate, **kargs):
def _dict2str(kargs):
cmd_str = ''
for k, v in kargs.items():
cmd_str += (' ' + str(k) + ' ' + str(v))
return cmd_str
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = os.path.basename(video_path).split('.')[0]
out_full_path = os.path.join(outpath, vid_name)
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = os.path.join(out_full_path, '%08d.png')
cmd = ffmpeg
cmd = ffmpeg + [
' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
]
cmd = ''.join(cmd) + _dict2str(kargs)
if os.system(cmd) != 0:
raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
sys.exit(-1)
sys.stdout.flush()
return out_full_path
def mot_label():
labels_map = {'person': 0}
return labels_map
def visdrone_mcmot_label():
labels_map = {
'pedestrian': 0,
'people': 1,
'bicycle': 2,
'car': 3,
'van': 4,
'truck': 5,
'tricycle': 6,
'awning-tricycle': 7,
'bus': 8,
'motor': 9,
}
return labels_map

@ -0,0 +1,191 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import json
import copy
import numpy as np
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.data.crop_utils.annotation_cropper import AnnoCropper
from .coco import COCODataSet
from .dataset import _make_dataset, _is_valid_file
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger('sniper_coco_dataset')
@register
@serializable
class SniperCOCODataSet(COCODataSet):
"""SniperCOCODataSet"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
proposals_file=None,
data_fields=['image'],
sample_num=-1,
load_crowd=False,
allow_empty=True,
empty_ratio=1.,
is_trainset=True,
image_target_sizes=[2000, 1000],
valid_box_ratio_ranges=[[-1, 0.1], [0.08, -1]],
chip_target_size=500,
chip_target_stride=200,
use_neg_chip=False,
max_neg_num_per_im=8,
max_per_img=-1,
nms_thresh=0.5):
super(SniperCOCODataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num,
load_crowd=load_crowd,
allow_empty=allow_empty,
empty_ratio=empty_ratio)
self.proposals_file = proposals_file
self.proposals = None
self.anno_cropper = None
self.is_trainset = is_trainset
self.image_target_sizes = image_target_sizes
self.valid_box_ratio_ranges = valid_box_ratio_ranges
self.chip_target_size = chip_target_size
self.chip_target_stride = chip_target_stride
self.use_neg_chip = use_neg_chip
self.max_neg_num_per_im = max_neg_num_per_im
self.max_per_img = max_per_img
self.nms_thresh = nms_thresh
def parse_dataset(self):
if not hasattr(self, "roidbs"):
super(SniperCOCODataSet, self).parse_dataset()
if self.is_trainset:
self._parse_proposals()
self._merge_anno_proposals()
self.ori_roidbs = copy.deepcopy(self.roidbs)
self.init_anno_cropper()
self.roidbs = self.generate_chips_roidbs(self.roidbs, self.is_trainset)
def set_proposals_file(self, file_path):
self.proposals_file = file_path
def init_anno_cropper(self):
logger.info("Init AnnoCropper...")
self.anno_cropper = AnnoCropper(
image_target_sizes=self.image_target_sizes,
valid_box_ratio_ranges=self.valid_box_ratio_ranges,
chip_target_size=self.chip_target_size,
chip_target_stride=self.chip_target_stride,
use_neg_chip=self.use_neg_chip,
max_neg_num_per_im=self.max_neg_num_per_im,
max_per_img=self.max_per_img,
nms_thresh=self.nms_thresh)
def generate_chips_roidbs(self, roidbs, is_trainset):
if is_trainset:
roidbs = self.anno_cropper.crop_anno_records(roidbs)
else:
roidbs = self.anno_cropper.crop_infer_anno_records(roidbs)
return roidbs
def _parse_proposals(self):
if self.proposals_file:
self.proposals = {}
logger.info("Parse proposals file:{}".format(self.proposals_file))
with open(self.proposals_file, 'r') as f:
proposals = json.load(f)
for prop in proposals:
image_id = prop["image_id"]
if image_id not in self.proposals:
self.proposals[image_id] = []
x, y, w, h = prop["bbox"]
self.proposals[image_id].append([x, y, x + w, y + h])
def _merge_anno_proposals(self):
assert self.roidbs
if self.proposals and len(self.proposals.keys()) > 0:
logger.info("merge proposals to annos")
for id, record in enumerate(self.roidbs):
image_id = int(record["im_id"])
if image_id not in self.proposals.keys():
logger.info("image id :{} no proposals".format(image_id))
record["proposals"] = np.array(
self.proposals.get(image_id, []), dtype=np.float32)
self.roidbs[id] = record
def get_ori_roidbs(self):
if not hasattr(self, "ori_roidbs"):
return None
return self.ori_roidbs
def get_roidbs(self):
if not hasattr(self, "roidbs"):
self.parse_dataset()
return self.roidbs
def set_roidbs(self, roidbs):
self.roidbs = roidbs
def check_or_download_dataset(self):
return
def _parse(self):
image_dir = self.image_dir
if not isinstance(image_dir, Sequence):
image_dir = [image_dir]
images = []
for im_dir in image_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.dataset_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._parse()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
im = cv2.imread(image)
h, w, c = im.shape
rec = {'im_id': np.array([ct]), 'im_file': image, "h": h, "w": w}
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def set_images(self, images):
self._imid2path = {}
self.image_dir = images
self.roidbs = self._load_images()

@ -0,0 +1,231 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import xml.etree.ElementTree as ET
from paddlers.models.ppdet.core.workspace import register, serializable
from .dataset import DetDataset
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class VOCDataSet(DetDataset):
"""
Load dataset with PascalVOC format.
Notes:
`anno_path` must contains xml file and image file path for annotations.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): voc annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
label_list (str): if use_default_label is False, will load
mapping between category and class index.
allow_empty (bool): whether to load empty entry. False as default
empty_ratio (float): the ratio of empty record number to total
record's, if empty_ratio is out of [0. ,1.), do not sample the
records and use all the empty entries. 1. as default
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
label_list=None,
allow_empty=False,
empty_ratio=1.):
super(VOCDataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num)
self.label_list = label_list
self.allow_empty = allow_empty
self.empty_ratio = empty_ratio
def _sample_empty(self, records, num):
# if empty_ratio is out of [0. ,1.), do not sample the records
if self.empty_ratio < 0. or self.empty_ratio >= 1.:
return records
import random
sample_num = min(
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
records = random.sample(records, sample_num)
return records
def parse_dataset(self, ):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
# mapping category name to class id
# first_class:0, second_class:1, ...
records = []
empty_records = []
ct = 0
cname2cid = {}
if self.label_list:
label_path = os.path.join(self.dataset_dir, self.label_list)
if not os.path.exists(label_path):
raise ValueError("label_list {} does not exists".format(
label_path))
with open(label_path, 'r') as fr:
label_id = 0
for line in fr.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
else:
cname2cid = pascalvoc_label()
with open(anno_path, 'r') as fr:
while True:
line = fr.readline()
if not line:
break
img_file, xml_file = [os.path.join(image_dir, x) \
for x in line.strip().split()[:2]]
if not os.path.exists(img_file):
logger.warning(
'Illegal image file: {}, and it will be ignored'.
format(img_file))
continue
if not os.path.isfile(xml_file):
logger.warning(
'Illegal xml file: {}, and it will be ignored'.format(
xml_file))
continue
tree = ET.parse(xml_file)
if tree.find('id') is None:
im_id = np.array([ct])
else:
im_id = np.array([int(tree.find('id').text)])
objs = tree.findall('object')
im_w = float(tree.find('size').find('width').text)
im_h = float(tree.find('size').find('height').text)
if im_w < 0 or im_h < 0:
logger.warning(
'Illegal width: {} or height: {} in annotation, '
'and {} will be ignored'.format(im_w, im_h, xml_file))
continue
num_bbox, i = len(objs), 0
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
difficult = np.zeros((num_bbox, 1), dtype=np.int32)
for obj in objs:
cname = obj.find('name').text
# user dataset may not contain difficult field
_difficult = obj.find('difficult')
_difficult = int(
_difficult.text) if _difficult is not None else 0
x1 = float(obj.find('bndbox').find('xmin').text)
y1 = float(obj.find('bndbox').find('ymin').text)
x2 = float(obj.find('bndbox').find('xmax').text)
y2 = float(obj.find('bndbox').find('ymax').text)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(im_w - 1, x2)
y2 = min(im_h - 1, y2)
if x2 > x1 and y2 > y1:
gt_bbox[i, :] = [x1, y1, x2, y2]
gt_class[i, 0] = cname2cid[cname]
gt_score[i, 0] = 1.
difficult[i, 0] = _difficult
i += 1
else:
logger.warning(
'Found an invalid bbox in annotations: xml_file: {}'
', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
xml_file, x1, y1, x2, y2))
gt_bbox = gt_bbox[:i, :]
gt_class = gt_class[:i, :]
gt_score = gt_score[:i, :]
difficult = difficult[:i, :]
voc_rec = {
'im_file': img_file,
'im_id': im_id,
'h': im_h,
'w': im_w
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'difficult': difficult
}
for k, v in gt_rec.items():
if k in self.data_fields:
voc_rec[k] = v
if len(objs) == 0:
empty_records.append(voc_rec)
else:
records.append(voc_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
if self.allow_empty and len(empty_records) > 0:
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs, self.cname2cid = records, cname2cid
def get_label_list(self):
return os.path.join(self.dataset_dir, self.label_list)
def pascalvoc_label():
labels_map = {
'aeroplane': 0,
'bicycle': 1,
'bird': 2,
'boat': 3,
'bottle': 4,
'bus': 5,
'car': 6,
'cat': 7,
'chair': 8,
'cow': 9,
'diningtable': 10,
'dog': 11,
'horse': 12,
'motorbike': 13,
'person': 14,
'pottedplant': 15,
'sheep': 16,
'sofa': 17,
'train': 18,
'tvmonitor': 19
}
return labels_map

@ -0,0 +1,180 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from paddlers.models.ppdet.core.workspace import register, serializable
from .dataset import DetDataset
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class WIDERFaceDataSet(DetDataset):
"""
Load WiderFace records with 'anno_path'
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): WiderFace annotation data.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
with_lmk (bool): whether to load face landmark keypoint labels.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
with_lmk=False):
super(WIDERFaceDataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num,
with_lmk=with_lmk)
self.anno_path = anno_path
self.sample_num = sample_num
self.roidbs = None
self.cname2cid = None
self.with_lmk = with_lmk
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
txt_file = anno_path
records = []
ct = 0
file_lists = self._load_file_list(txt_file)
cname2cid = widerface_label()
for item in file_lists:
im_fname = item[0]
im_id = np.array([ct])
gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32)
gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
for index_box in range(len(item)):
if index_box < 1:
continue
gt_bbox[index_box - 1] = item[index_box][0]
if self.with_lmk:
gt_lmk_labels[index_box - 1] = item[index_box][1]
lmk_ignore_flag[index_box - 1] = item[index_box][2]
im_fname = os.path.join(image_dir,
im_fname) if image_dir else im_fname
widerface_rec = {
'im_file': im_fname,
'im_id': im_id,
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_bbox': gt_bbox,
'gt_class': gt_class,
}
for k, v in gt_rec.items():
if k in self.data_fields:
widerface_rec[k] = v
if self.with_lmk:
widerface_rec['gt_keypoint'] = gt_lmk_labels
widerface_rec['keypoint_ignore'] = lmk_ignore_flag
if len(item) != 0:
records.append(widerface_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
self.roidbs, self.cname2cid = records, cname2cid
def _load_file_list(self, input_txt):
with open(input_txt, 'r') as f_dir:
lines_input_txt = f_dir.readlines()
file_dict = {}
num_class = 0
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for i in range(len(lines_input_txt)):
line_txt = lines_input_txt[i].strip('\n\t\r')
split_str = line_txt.split(' ')
if len(split_str) == 1:
img_file_name = os.path.split(split_str[0])[1]
split_txt = img_file_name.split('.')
if len(split_txt) < 2:
continue
elif split_txt[-1] in exts:
if i != 0:
num_class += 1
file_dict[num_class] = [line_txt]
else:
if len(line_txt) <= 6:
continue
result_boxs = []
xmin = float(split_str[0])
ymin = float(split_str[1])
w = float(split_str[2])
h = float(split_str[3])
# Filter out wrong labels
if w < 0 or h < 0:
logger.warning('Illegal box with w: {}, h: {} in '
'img: {}, and it will be ignored'.format(
w, h, file_dict[num_class][0]))
continue
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = xmin + w
ymax = ymin + h
gt_bbox = [xmin, ymin, xmax, ymax]
result_boxs.append(gt_bbox)
if self.with_lmk:
assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
'of characters per line in the annotation file should' \
'exceed 18.'
lmk0_x = float(split_str[5])
lmk0_y = float(split_str[6])
lmk1_x = float(split_str[8])
lmk1_y = float(split_str[9])
lmk2_x = float(split_str[11])
lmk2_y = float(split_str[12])
lmk3_x = float(split_str[14])
lmk3_y = float(split_str[15])
lmk4_x = float(split_str[17])
lmk4_y = float(split_str[18])
lmk_ignore_flag = 0 if lmk0_x == -1 else 1
gt_lmk_label = [
lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
lmk3_y, lmk4_x, lmk4_y
]
result_boxs.append(gt_lmk_label)
result_boxs.append(lmk_ignore_flag)
file_dict[num_class].append(result_boxs)
return list(file_dict.values())
def widerface_label():
labels_map = {'face': 0}
return labels_map

@ -0,0 +1,28 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import operators
from . import batch_operators
from . import keypoint_operators
from . import mot_operators
from .operators import *
from .batch_operators import *
from .keypoint_operators import *
from .mot_operators import *
__all__ = []
__all__ += registered_ops
__all__ += keypoint_operators.__all__
__all__ += mot_operators.__all__

@ -0,0 +1,270 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
"""Calculate overlap between two set of bboxes.
If ``is_aligned `` is ``False``, then calculate the overlaps between each
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
pair of bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
B indicates the batch dim, in shape (B1, B2, ..., Bn).
If ``is_aligned `` is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union) or "iof" (intersection over
foreground).
is_aligned (bool, optional): If True, then m and n must be equal.
Default False.
eps (float, optional): A value added to the denominator for numerical
stability. Default 1e-6.
Returns:
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
"""
assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode)
# Either the boxes are empty or the length of boxes's last dimenstion is 4
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
# Batch dim must be the same
# Batch dim: (B1, B2, ... Bn)
assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
batch_shape = bboxes1.shape[:-2]
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
if is_aligned:
assert rows == cols
if rows * cols == 0:
if is_aligned:
return np.random.random(batch_shape + (rows, ))
else:
return np.random.random(batch_shape + (rows, cols))
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
bboxes1[..., 3] - bboxes1[..., 1])
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
bboxes2[..., 3] - bboxes2[..., 1])
if is_aligned:
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2]
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2]
wh = (rb - lt).clip(min=0) # [B, rows, 2]
overlap = wh[..., 0] * wh[..., 1]
if mode in ['iou', 'giou']:
union = area1 + area2 - overlap
else:
union = area1
if mode == 'giou':
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
else:
lt = np.maximum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2]) # [B, rows, cols, 2]
rb = np.minimum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2]
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2]
overlap = wh[..., 0] * wh[..., 1]
if mode in ['iou', 'giou']:
union = area1[..., None] + area2[..., None, :] - overlap
else:
union = area1[..., None]
if mode == 'giou':
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2])
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:])
eps = np.array([eps])
union = np.maximum(union, eps)
ious = overlap / union
if mode in ['iou', 'iof']:
return ious
# calculate gious
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
enclose_area = np.maximum(enclose_area, eps)
gious = ious - (enclose_area - union) / enclose_area
return gious
def topk_(input, k, axis=1, largest=True):
x = -input if largest else input
if axis == 0:
row_index = np.arange(input.shape[1 - axis])
topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
topk_data = x[topk_index, row_index]
topk_index_sort = np.argsort(topk_data, axis=axis)
topk_data_sort = topk_data[topk_index_sort, row_index]
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
else:
column_index = np.arange(x.shape[1 - axis])[:, None]
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
topk_data = x[column_index, topk_index]
topk_data = -topk_data if largest else topk_data
topk_index_sort = np.argsort(topk_data, axis=axis)
topk_data_sort = topk_data[column_index, topk_index_sort]
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
return topk_data_sort, topk_index_sort
class ATSSAssigner(object):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `0` or a positive integer
indicating the ground truth index.
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
topk (float): number of bbox selected in each level
"""
def __init__(self, topk=9):
self.topk = topk
def __call__(self,
bboxes,
num_level_bboxes,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""Assign gt to bboxes.
The assignment is done in following steps
1. compute iou between all bbox (bbox of all pyramid levels) and gt
2. compute center distance between all bbox and gt
3. on each pyramid level, for each gt, select k bbox whose center
are closest to the gt center, so we total select k*l bbox as
candidates for each gt
4. get corresponding iou for the these candidates, and compute the
mean and std, set mean + std as the iou threshold
5. select these candidates whose iou are greater than or equal to
the threshold as postive
6. limit the positive sample's center in gt
Args:
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
num_level_bboxes (List): num of bboxes in each level
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
"""
bboxes = bboxes[:, :4]
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
# assign 0 by default
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = np.zeros((num_bboxes, ))
if num_gt == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if not np.any(gt_labels):
assigned_labels = None
else:
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
return assigned_gt_inds, max_overlaps
# compute iou between all bbox and gt
overlaps = bbox_overlaps(bboxes, gt_bboxes)
# compute center distance between all bbox and gt
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
gt_points = np.stack((gt_cx, gt_cy), axis=1)
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
distances = np.sqrt(
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
.sum(-1))
# Selecting candidates based on the center distance
candidate_idxs = []
start_idx = 0
for bboxes_per_level in num_level_bboxes:
# on each pyramid level, for each gt,
# select k bbox whose center are closest to the gt center
end_idx = start_idx + bboxes_per_level
distances_per_level = distances[start_idx:end_idx, :]
selectable_k = min(self.topk, bboxes_per_level)
_, topk_idxs_per_level = topk_(
distances_per_level, selectable_k, axis=0, largest=False)
candidate_idxs.append(topk_idxs_per_level + start_idx)
start_idx = end_idx
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
# get corresponding iou for the these candidates, and compute the
# mean and std, set mean + std as the iou threshold
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
overlaps_mean_per_gt = candidate_overlaps.mean(0)
overlaps_std_per_gt = candidate_overlaps.std(0)
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
# limit the positive sample's center in gt
for gt_idx in range(num_gt):
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
ep_bboxes_cx = np.broadcast_to(
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
ep_bboxes_cy = np.broadcast_to(
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
candidate_idxs = candidate_idxs.reshape(-1)
# calculate the left, top, right, bottom distance between positive
# bbox center and gt side
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
is_pos = is_pos & is_in_gts
# if an anchor box is assigned to multiple gts,
# the one with the highest IoU will be selected.
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
max_overlaps = overlaps_inf.max(axis=1)
argmax_overlaps = overlaps_inf.argmax(axis=1)
assigned_gt_inds[max_overlaps !=
-np.inf] = argmax_overlaps[max_overlaps !=
-np.inf] + 1
return assigned_gt_inds, max_overlaps

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,86 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import numpy as np
from PIL import Image
class Gridmask(object):
def __init__(self,
use_h=True,
use_w=True,
rotate=1,
offset=False,
ratio=0.5,
mode=1,
prob=0.7,
upper_iter=360000):
super(Gridmask, self).__init__()
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode = mode
self.prob = prob
self.st_prob = prob
self.upper_iter = upper_iter
def __call__(self, x, curr_iter):
self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
if np.random.rand() > self.prob:
return x
h, w, _ = x.shape
hh = int(1.5 * h)
ww = int(1.5 * w)
d = np.random.randint(2, h)
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh // d):
s = d * i + st_h
t = min(s + self.l, hh)
mask[s:t, :] *= 0
if self.use_w:
for i in range(ww // d):
s = d * i + st_w
t = min(s + self.l, ww)
mask[:, s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) //
2 + w].astype(np.float32)
if self.mode == 1:
mask = 1 - mask
mask = np.expand_dims(mask, axis=-1)
if self.offset:
offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
x = (x * mask + offset * (1 - mask)).astype(x.dtype)
else:
x = (x * mask).astype(x.dtype)
return x

@ -0,0 +1,868 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# operators to process sample,
# eg: decode/resize/crop image
from __future__ import absolute_import
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import cv2
import numpy as np
import math
import copy
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
from paddlers.models.ppdet.core.workspace import serializable
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
registered_ops = []
__all__ = [
'RandomAffine',
'KeyPointFlip',
'TagGenerate',
'ToHeatmaps',
'NormalizePermute',
'EvalAffine',
'RandomFlipHalfBodyTransform',
'TopDownAffine',
'ToHeatmapsTopDown',
'ToHeatmapsTopDown_DARK',
'ToHeatmapsTopDown_UDP',
'TopDownEvalAffine',
'AugmentationbyInformantionDropping',
]
def register_keypointop(cls):
return serializable(cls)
@register_keypointop
class KeyPointFlip(object):
"""Get the fliped image by flip_prob. flip the coords also
the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped
Args:
flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
flip_prob (float): the ratio whether to flip the image
records(dict): the dict contained the image, mask and coords
Returns:
records(dict): contain the image, mask and coords after tranformed
"""
def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
super(KeyPointFlip, self).__init__()
assert isinstance(flip_permutation, Sequence)
self.flip_permutation = flip_permutation
self.flip_prob = flip_prob
self.hmsize = hmsize
def __call__(self, records):
image = records['image']
kpts_lst = records['joints']
mask_lst = records['mask']
flip = np.random.random() < self.flip_prob
if flip:
image = image[:, ::-1]
for idx, hmsize in enumerate(self.hmsize):
if len(mask_lst) > idx:
mask_lst[idx] = mask_lst[idx][:, ::-1]
if kpts_lst[idx].ndim == 3:
kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
else:
kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
records['image'] = image
records['joints'] = kpts_lst
records['mask'] = mask_lst
return records
@register_keypointop
class RandomAffine(object):
"""apply affine transform to image, mask and coords
to achieve the rotate, scale and shift effect for training image
Args:
max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
max_scale (list[2]): the scale range to apply, transform range is [min, max]
max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
records(dict): the dict contained the image, mask and coords
Returns:
records(dict): contain the image, mask and coords after tranformed
"""
def __init__(self,
max_degree=30,
scale=[0.75, 1.5],
max_shift=0.2,
hmsize=[128, 256],
trainsize=512,
scale_type='short'):
super(RandomAffine, self).__init__()
self.max_degree = max_degree
self.min_scale = scale[0]
self.max_scale = scale[1]
self.max_shift = max_shift
self.hmsize = hmsize
self.trainsize = trainsize
self.scale_type = scale_type
def _get_affine_matrix(self, center, scale, res, rot=0):
"""Generate transformation matrix."""
h = scale
t = np.zeros((3, 3), dtype=np.float32)
t[0, 0] = float(res[1]) / h
t[1, 1] = float(res[0]) / h
t[0, 2] = res[1] * (-float(center[0]) / h + .5)
t[1, 2] = res[0] * (-float(center[1]) / h + .5)
t[2, 2] = 1
if rot != 0:
rot = -rot # To match direction of rotation from cropping
rot_mat = np.zeros((3, 3), dtype=np.float32)
rot_rad = rot * np.pi / 180
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
rot_mat[0, :2] = [cs, -sn]
rot_mat[1, :2] = [sn, cs]
rot_mat[2, 2] = 1
# Need to rotate around center
t_mat = np.eye(3)
t_mat[0, 2] = -res[1] / 2
t_mat[1, 2] = -res[0] / 2
t_inv = t_mat.copy()
t_inv[:2, 2] *= -1
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
return t
def __call__(self, records):
image = records['image']
keypoints = records['joints']
heatmap_mask = records['mask']
degree = (np.random.random() * 2 - 1) * self.max_degree
shape = np.array(image.shape[:2][::-1])
center = center = np.array((np.array(shape) / 2))
aug_scale = np.random.random() * (self.max_scale - self.min_scale
) + self.min_scale
if self.scale_type == 'long':
scale = max(shape[0], shape[1]) / 1.0
elif self.scale_type == 'short':
scale = min(shape[0], shape[1]) / 1.0
else:
raise ValueError('Unknown scale type: {}'.format(self.scale_type))
roi_size = aug_scale * scale
dx = int(0)
dy = int(0)
if self.max_shift > 0:
dx = np.random.randint(-self.max_shift * roi_size,
self.max_shift * roi_size)
dy = np.random.randint(-self.max_shift * roi_size,
self.max_shift * roi_size)
center += np.array([dx, dy])
input_size = 2 * center
keypoints[..., :2] *= shape
heatmap_mask *= 255
kpts_lst = []
mask_lst = []
image_affine_mat = self._get_affine_matrix(
center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
image = cv2.warpAffine(
image,
image_affine_mat, (self.trainsize, self.trainsize),
flags=cv2.INTER_LINEAR)
for hmsize in self.hmsize:
kpts = copy.deepcopy(keypoints)
mask_affine_mat = self._get_affine_matrix(
center, roi_size, (hmsize, hmsize), degree)[:2]
if heatmap_mask is not None:
mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
(hmsize, hmsize))
mask = ((mask / 255) > 0.5).astype(np.float32)
kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
mask_affine_mat)
kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
kpts_lst.append(kpts)
mask_lst.append(mask)
records['image'] = image
records['joints'] = kpts_lst
records['mask'] = mask_lst
return records
@register_keypointop
class EvalAffine(object):
"""apply affine transform to image
resize the short of [h,w] to standard size for eval
Args:
size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
records(dict): the dict contained the image, mask and coords
Returns:
records(dict): contain the image, mask and coords after tranformed
"""
def __init__(self, size, stride=64):
super(EvalAffine, self).__init__()
self.size = size
self.stride = stride
def __call__(self, records):
image = records['image']
mask = records['mask'] if 'mask' in records else None
s = self.size
h, w, _ = image.shape
trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
image_resized = cv2.warpAffine(image, trans, size_resized)
if mask is not None:
mask = cv2.warpAffine(mask, trans, size_resized)
records['mask'] = mask
if 'joints' in records:
del records['joints']
records['image'] = image_resized
return records
@register_keypointop
class NormalizePermute(object):
def __init__(self,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.120, 57.375],
is_scale=True):
super(NormalizePermute, self).__init__()
self.mean = mean
self.std = std
self.is_scale = is_scale
def __call__(self, records):
image = records['image']
image = image.astype(np.float32)
if self.is_scale:
image /= 255.
image = image.transpose((2, 0, 1))
mean = np.array(self.mean, dtype=np.float32)
std = np.array(self.std, dtype=np.float32)
invstd = 1. / std
for v, m, s in zip(image, mean, invstd):
v.__isub__(m).__imul__(s)
records['image'] = image
return records
@register_keypointop
class TagGenerate(object):
"""record gt coords for aeloss to sample coords value in tagmaps
Args:
num_joints (int): the keypoint numbers of dataset to train
num_people (int): maxmum people to support for sample aeloss
records(dict): the dict contained the image, mask and coords
Returns:
records(dict): contain the gt coords used in tagmap
"""
def __init__(self, num_joints, max_people=30):
super(TagGenerate, self).__init__()
self.max_people = max_people
self.num_joints = num_joints
def __call__(self, records):
kpts_lst = records['joints']
kpts = kpts_lst[0]
tagmap = np.zeros(
(self.max_people, self.num_joints, 4), dtype=np.int64)
inds = np.where(kpts[..., 2] > 0)
p, j = inds[0], inds[1]
visible = kpts[inds]
# tagmap is [p, j, 3], where last dim is j, y, x
tagmap[p, j, 0] = j
tagmap[p, j, 1] = visible[..., 1] # y
tagmap[p, j, 2] = visible[..., 0] # x
tagmap[p, j, 3] = 1
records['tagmap'] = tagmap
del records['joints']
return records
@register_keypointop
class ToHeatmaps(object):
"""to generate the gaussin heatmaps of keypoint for heatmap loss
Args:
num_joints (int): the keypoint numbers of dataset to train
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
sigma (float): the std of gaussin kernel genereted
records(dict): the dict contained the image, mask and coords
Returns:
records(dict): contain the heatmaps used to heatmaploss
"""
def __init__(self, num_joints, hmsize, sigma=None):
super(ToHeatmaps, self).__init__()
self.num_joints = num_joints
self.hmsize = np.array(hmsize)
if sigma is None:
sigma = hmsize[0] // 64
self.sigma = sigma
r = 6 * sigma + 3
x = np.arange(0, r, 1, np.float32)
y = x[:, None]
x0, y0 = 3 * sigma + 1, 3 * sigma + 1
self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
def __call__(self, records):
kpts_lst = records['joints']
mask_lst = records['mask']
for idx, hmsize in enumerate(self.hmsize):
mask = mask_lst[idx]
kpts = kpts_lst[idx]
heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
inds = np.where(kpts[..., 2] > 0)
visible = kpts[inds].astype(np.int64)[..., :2]
ul = np.round(visible - 3 * self.sigma - 1)
br = np.round(visible + 3 * self.sigma + 2)
sul = np.maximum(0, -ul)
sbr = np.minimum(hmsize, br) - ul
dul = np.clip(ul, 0, hmsize - 1)
dbr = np.clip(br, 0, hmsize)
for i in range(len(visible)):
if visible[i][0] < 0 or visible[i][1] < 0 or visible[i][
0] >= hmsize or visible[i][1] >= hmsize:
continue
dx1, dy1 = dul[i]
dx2, dy2 = dbr[i]
sx1, sy1 = sul[i]
sx2, sy2 = sbr[i]
heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
self.gaussian[sy1:sy2, sx1:sx2],
heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
records['mask_{}x'.format(idx + 1)] = mask
del records['mask']
return records
@register_keypointop
class RandomFlipHalfBodyTransform(object):
"""apply data augment to image and coords
to achieve the flip, scale, rotate and half body transform effect for training image
Args:
trainsize (list):[w, h], Image target size
upper_body_ids (list): The upper body joint ids
flip_pairs (list): The left-right joints exchange order list
pixel_std (int): The pixel std of the scale
scale (float): The scale factor to transform the image
rot (int): The rotate factor to transform the image
num_joints_half_body (int): The joints threshold of the half body transform
prob_half_body (float): The threshold of the half body transform
flip (bool): Whether to flip the image
Returns:
records(dict): contain the image and coords after tranformed
"""
def __init__(self,
trainsize,
upper_body_ids,
flip_pairs,
pixel_std,
scale=0.35,
rot=40,
num_joints_half_body=8,
prob_half_body=0.3,
flip=True,
rot_prob=0.6):
super(RandomFlipHalfBodyTransform, self).__init__()
self.trainsize = trainsize
self.upper_body_ids = upper_body_ids
self.flip_pairs = flip_pairs
self.pixel_std = pixel_std
self.scale = scale
self.rot = rot
self.num_joints_half_body = num_joints_half_body
self.prob_half_body = prob_half_body
self.flip = flip
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
self.rot_prob = rot_prob
def halfbody_transform(self, joints, joints_vis):
upper_joints = []
lower_joints = []
for joint_id in range(joints.shape[0]):
if joints_vis[joint_id][0] > 0:
if joint_id in self.upper_body_ids:
upper_joints.append(joints[joint_id])
else:
lower_joints.append(joints[joint_id])
if np.random.randn() < 0.5 and len(upper_joints) > 2:
selected_joints = upper_joints
else:
selected_joints = lower_joints if len(
lower_joints) > 2 else upper_joints
if len(selected_joints) < 2:
return None, None
selected_joints = np.array(selected_joints, dtype=np.float32)
center = selected_joints.mean(axis=0)[:2]
left_top = np.amin(selected_joints, axis=0)
right_bottom = np.amax(selected_joints, axis=0)
w = right_bottom[0] - left_top[0]
h = right_bottom[1] - left_top[1]
if w > self.aspect_ratio * h:
h = w * 1.0 / self.aspect_ratio
elif w < self.aspect_ratio * h:
w = h * self.aspect_ratio
scale = np.array(
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
dtype=np.float32)
scale = scale * 1.5
return center, scale
def flip_joints(self, joints, joints_vis, width, matched_parts):
joints[:, 0] = width - joints[:, 0] - 1
for pair in matched_parts:
joints[pair[0], :], joints[pair[1], :] = \
joints[pair[1], :], joints[pair[0], :].copy()
joints_vis[pair[0], :], joints_vis[pair[1], :] = \
joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
return joints * joints_vis, joints_vis
def __call__(self, records):
image = records['image']
joints = records['joints']
joints_vis = records['joints_vis']
c = records['center']
s = records['scale']
r = 0
if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
np.random.rand() < self.prob_half_body):
c_half_body, s_half_body = self.halfbody_transform(joints,
joints_vis)
if c_half_body is not None and s_half_body is not None:
c, s = c_half_body, s_half_body
sf = self.scale
rf = self.rot
s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
r = np.clip(np.random.randn() * rf, -rf * 2,
rf * 2) if np.random.random() <= self.rot_prob else 0
if self.flip and np.random.random() <= 0.5:
image = image[:, ::-1, :]
joints, joints_vis = self.flip_joints(
joints, joints_vis, image.shape[1], self.flip_pairs)
c[0] = image.shape[1] - c[0] - 1
records['image'] = image
records['joints'] = joints
records['joints_vis'] = joints_vis
records['center'] = c
records['scale'] = s
records['rotate'] = r
return records
@register_keypointop
class AugmentationbyInformantionDropping(object):
"""AID: Augmentation by Informantion Dropping. Please refer
to https://arxiv.org/abs/2008.07139
Args:
prob_cutout (float): The probability of the Cutout augmentation.
offset_factor (float): Offset factor of cutout center.
num_patch (int): Number of patches to be cutout.
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the image and coords after tranformed
"""
def __init__(self,
trainsize,
prob_cutout=0.0,
offset_factor=0.2,
num_patch=1):
self.prob_cutout = prob_cutout
self.offset_factor = offset_factor
self.num_patch = num_patch
self.trainsize = trainsize
def _cutout(self, img, joints, joints_vis):
height, width, _ = img.shape
img = img.reshape((height * width, -1))
feat_x_int = np.arange(0, width)
feat_y_int = np.arange(0, height)
feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
feat_x_int = feat_x_int.reshape((-1, ))
feat_y_int = feat_y_int.reshape((-1, ))
for _ in range(self.num_patch):
vis_idx, _ = np.where(joints_vis > 0)
occlusion_joint_id = np.random.choice(vis_idx)
center = joints[occlusion_joint_id, 0:2]
offset = np.random.randn(2) * self.trainsize[
0] * self.offset_factor
center = center + offset
radius = np.random.uniform(0.1, 0.2) * self.trainsize[0]
x_offset = (center[0] - feat_x_int) / radius
y_offset = (center[1] - feat_y_int) / radius
dis = x_offset**2 + y_offset**2
keep_pos = np.where((dis <= 1) & (dis >= 0))[0]
img[keep_pos, :] = 0
img = img.reshape((height, width, -1))
return img
def __call__(self, records):
img = records['image']
joints = records['joints']
joints_vis = records['joints_vis']
if np.random.rand() < self.prob_cutout:
img = self._cutout(img, joints, joints_vis)
records['image'] = img
return records
@register_keypointop
class TopDownAffine(object):
"""apply affine transform to image and coords
Args:
trainsize (list): [w, h], the standard size used to train
use_udp (bool): whether to use Unbiased Data Processing.
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the image and coords after tranformed
"""
def __init__(self, trainsize, use_udp=False):
self.trainsize = trainsize
self.use_udp = use_udp
def __call__(self, records):
image = records['image']
joints = records['joints']
joints_vis = records['joints_vis']
rot = records['rotate'] if "rotate" in records else 0
if self.use_udp:
trans = get_warp_matrix(
rot, records['center'] * 2.0,
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
records['scale'] * 200.0)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans)
else:
trans = get_affine_transform(records['center'], records['scale'] *
200, rot, self.trainsize)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
for i in range(joints.shape[0]):
if joints_vis[i, 0] > 0.0:
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
records['image'] = image
records['joints'] = joints
return records
@register_keypointop
class TopDownEvalAffine(object):
"""apply affine transform to image and coords
Args:
trainsize (list): [w, h], the standard size used to train
use_udp (bool): whether to use Unbiased Data Processing.
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the image and coords after tranformed
"""
def __init__(self, trainsize, use_udp=False):
self.trainsize = trainsize
self.use_udp = use_udp
def __call__(self, records):
image = records['image']
rot = 0
imshape = records['im_shape'][::-1]
center = imshape / 2.
scale = imshape
if self.use_udp:
trans = get_warp_matrix(
rot, center * 2.0,
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
else:
trans = get_affine_transform(center, scale, rot, self.trainsize)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
records['image'] = image
return records
@register_keypointop
class ToHeatmapsTopDown(object):
"""to generate the gaussin heatmaps of keypoint for heatmap loss
Args:
hmsize (list): [w, h] output heatmap's size
sigma (float): the std of gaussin kernel genereted
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the heatmaps used to heatmaploss
"""
def __init__(self, hmsize, sigma):
super(ToHeatmapsTopDown, self).__init__()
self.hmsize = np.array(hmsize)
self.sigma = sigma
def __call__(self, records):
"""refer to
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
Copyright (c) Microsoft, under the MIT License.
"""
joints = records['joints']
joints_vis = records['joints_vis']
num_joints = joints.shape[0]
image_size = np.array(
[records['image'].shape[1], records['image'].shape[0]])
target_weight = np.ones((num_joints, 1), dtype=np.float32)
target_weight[:, 0] = joints_vis[:, 0]
target = np.zeros(
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
tmp_size = self.sigma * 3
feat_stride = image_size / self.hmsize
for joint_id in range(num_joints):
mu_x = int(joints[joint_id][0] + 0.5) / feat_stride[0]
mu_y = int(joints[joint_id][1] + 0.5) / feat_stride[1]
# Check that any part of the gaussian is in-bounds
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
0] < 0 or br[1] < 0:
# If not, just return the image as is
target_weight[joint_id] = 0
continue
# # Generate gaussian
size = 2 * tmp_size + 1
x = np.arange(0, size, 1, np.float32)
y = x[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
v = target_weight[joint_id]
if v > 0.5:
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
0]:g_y[1], g_x[0]:g_x[1]]
records['target'] = target
records['target_weight'] = target_weight
del records['joints'], records['joints_vis']
return records
@register_keypointop
class ToHeatmapsTopDown_DARK(object):
"""to generate the gaussin heatmaps of keypoint for heatmap loss
Args:
hmsize (list): [w, h] output heatmap's size
sigma (float): the std of gaussin kernel genereted
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the heatmaps used to heatmaploss
"""
def __init__(self, hmsize, sigma):
super(ToHeatmapsTopDown_DARK, self).__init__()
self.hmsize = np.array(hmsize)
self.sigma = sigma
def __call__(self, records):
joints = records['joints']
joints_vis = records['joints_vis']
num_joints = joints.shape[0]
image_size = np.array(
[records['image'].shape[1], records['image'].shape[0]])
target_weight = np.ones((num_joints, 1), dtype=np.float32)
target_weight[:, 0] = joints_vis[:, 0]
target = np.zeros(
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
tmp_size = self.sigma * 3
feat_stride = image_size / self.hmsize
for joint_id in range(num_joints):
mu_x = joints[joint_id][0] / feat_stride[0]
mu_y = joints[joint_id][1] / feat_stride[1]
# Check that any part of the gaussian is in-bounds
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
0] < 0 or br[1] < 0:
# If not, just return the image as is
target_weight[joint_id] = 0
continue
x = np.arange(0, self.hmsize[0], 1, np.float32)
y = np.arange(0, self.hmsize[1], 1, np.float32)
y = y[:, np.newaxis]
v = target_weight[joint_id]
if v > 0.5:
target[joint_id] = np.exp(-(
(x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2))
records['target'] = target
records['target_weight'] = target_weight
del records['joints'], records['joints_vis']
return records
@register_keypointop
class ToHeatmapsTopDown_UDP(object):
"""This code is based on:
https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
to generate the gaussian heatmaps of keypoint for heatmap loss.
ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
for Human Pose Estimation (CVPR 2020).
Args:
hmsize (list): [w, h] output heatmap's size
sigma (float): the std of gaussin kernel genereted
records(dict): the dict contained the image and coords
Returns:
records (dict): contain the heatmaps used to heatmaploss
"""
def __init__(self, hmsize, sigma):
super(ToHeatmapsTopDown_UDP, self).__init__()
self.hmsize = np.array(hmsize)
self.sigma = sigma
def __call__(self, records):
joints = records['joints']
joints_vis = records['joints_vis']
num_joints = joints.shape[0]
image_size = np.array(
[records['image'].shape[1], records['image'].shape[0]])
target_weight = np.ones((num_joints, 1), dtype=np.float32)
target_weight[:, 0] = joints_vis[:, 0]
target = np.zeros(
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
tmp_size = self.sigma * 3
size = 2 * tmp_size + 1
x = np.arange(0, size, 1, np.float32)
y = x[:, None]
feat_stride = (image_size - 1.0) / (self.hmsize - 1.0)
for joint_id in range(num_joints):
mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
# Check that any part of the gaussian is in-bounds
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
0] < 0 or br[1] < 0:
# If not, just return the image as is
target_weight[joint_id] = 0
continue
mu_x_ac = joints[joint_id][0] / feat_stride[0]
mu_y_ac = joints[joint_id][1] / feat_stride[1]
x0 = y0 = size // 2
x0 += mu_x_ac - mu_x
y0 += mu_y_ac - mu_y
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
v = target_weight[joint_id]
if v > 0.5:
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
0]:g_y[1], g_x[0]:g_x[1]]
records['target'] = target
records['target_weight'] = target_weight
del records['joints'], records['joints_vis']
return records

@ -0,0 +1,628 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Integral
import cv2
import copy
import numpy as np
import random
import math
from .operators import BaseOperator, register_op
from .batch_operators import Gt2TTFTarget
from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
from paddlers.models.ppdet.utils.logger import setup_logger
from .op_helper import gaussian_radius
logger = setup_logger(__name__)
__all__ = [
'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
'Gt2JDETargetMax', 'Gt2FairMOTTarget'
]
@register_op
class RGBReverse(BaseOperator):
"""RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
"""
def __init__(self):
super(RGBReverse, self).__init__()
def apply(self, sample, context=None):
im = sample['image']
sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
return sample
@register_op
class LetterBoxResize(BaseOperator):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
bboxes = bbox0.copy()
bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
return bboxes
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
h, w = sample['im_shape']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
from PIL import UnidentifiedImageError
raise UnidentifiedImageError(
'{}: image is not 3-dimensional.'.format(self))
# apply image
height, width = self.target_size
img, ratio, padw, padh = self.apply_image(
im, height=height, width=width)
sample['image'] = img
new_shape = (round(h * ratio), round(w * ratio))
sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
padw, padh)
return sample
@register_op
class MOTRandomAffine(BaseOperator):
"""
Affine transform to image and coords to achieve the rotate, scale and
shift effect for training image.
Args:
degrees (list[2]): the rotate range to apply, transform range is [min, max]
translate (list[2]): the translate range to apply, transform range is [min, max]
scale (list[2]): the scale range to apply, transform range is [min, max]
shear (list[2]): the shear range to apply, transform range is [min, max]
borderValue (list[3]): value used in case of a constant border when appling
the perspective transformation
reject_outside (bool): reject warped bounding bboxes outside of image
Returns:
records(dict): contain the image and coords after tranformed
"""
def __init__(self,
degrees=(-5, 5),
translate=(0.10, 0.10),
scale=(0.50, 1.20),
shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5),
reject_outside=True):
super(MOTRandomAffine, self).__init__()
self.degrees = degrees
self.translate = translate
self.scale = scale
self.shear = shear
self.borderValue = borderValue
self.reject_outside = reject_outside
def apply(self, sample, context=None):
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
border = 0 # width of added border (optional)
img = sample['image']
height, width = img.shape[0], img.shape[1]
# Rotation and Scale
R = np.eye(3)
a = random.random() * (self.degrees[1] - self.degrees[0]
) + self.degrees[0]
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
R[:2] = cv2.getRotationMatrix2D(
angle=a, center=(width / 2, height / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (
random.random() * 2 - 1
) * self.translate[0] * height + border # x translation (pixels)
T[1, 2] = (
random.random() * 2 - 1
) * self.translate[1] * width + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() *
(self.shear[1] - self.shear[0]) + self.shear[0]) *
math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() *
(self.shear[1] - self.shear[0]) + self.shear[0]) *
math.pi / 180) # y shear (deg)
M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(
img,
M,
dsize=(width, height),
flags=cv2.INTER_LINEAR,
borderValue=self.borderValue) # BGR order borderValue
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
targets = sample['gt_bbox']
n = targets.shape[0]
points = targets.copy()
area0 = (points[:, 2] - points[:, 0]) * (
points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)),
abs(math.cos(radians)))**0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate(
(x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
if self.reject_outside:
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
if sum(i) > 0:
sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
sample['gt_class'] = sample['gt_class'][i]
if 'difficult' in sample:
sample['difficult'] = sample['difficult'][i]
if 'gt_ide' in sample:
sample['gt_ide'] = sample['gt_ide'][i]
if 'is_crowd' in sample:
sample['is_crowd'] = sample['is_crowd'][i]
sample['image'] = imw
return sample
else:
return sample
@register_op
class Gt2JDETargetThres(BaseOperator):
__shared__ = ['num_classes']
"""
Generate JDE targets by groud truth data when training
Args:
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
ide_thresh (float): thresh of identity, higher is groud truth
fg_thresh (float): thresh of foreground, higher is foreground
bg_thresh (float): thresh of background, lower is background
num_classes (int): number of classes
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
ide_thresh=0.5,
fg_thresh=0.5,
bg_thresh=0.4,
num_classes=1):
super(Gt2JDETargetThres, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.ide_thresh = ide_thresh
self.fg_thresh = fg_thresh
self.bg_thresh = bg_thresh
self.num_classes = num_classes
def generate_anchor(self, nGh, nGw, anchor_hw):
nA = len(anchor_hw)
yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw]
mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw]
anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
anchor_mesh = np.concatenate(
[mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw]
return anchor_mesh
def encode_delta(self, gt_box_list, fg_anchor_list):
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
fg_anchor_list[:, 2], fg_anchor_list[:,3]
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
gt_box_list[:, 2], gt_box_list[:, 3]
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = np.log(gw / pw)
dh = np.log(gh / ph)
return np.stack([dx, dy, dw, dh], axis=1)
def pad_box(self, sample, num_max):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
gt_num = len(bbox)
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox
if 'gt_score' in sample:
pad_score = np.zeros((num_max, ), dtype=np.float32)
if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
sample['gt_score'] = pad_score
if 'difficult' in sample:
pad_diff = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
sample['difficult'] = pad_diff
if 'is_crowd' in sample:
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
sample['is_crowd'] = pad_crowd
if 'gt_ide' in sample:
pad_ide = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
sample['gt_ide'] = pad_ide
return sample
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
num_max = 0
for sample in samples:
num_max = max(num_max, len(sample['gt_bbox']))
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_ide = sample['gt_ide']
for i, (anchor_hw, downsample_ratio
) in enumerate(zip(self.anchors, self.downsample_ratios)):
anchor_hw = np.array(
anchor_hw, dtype=np.float32) / downsample_ratio
nA = len(anchor_hw)
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
tboxes = np.concatenate([gxy, gwh], axis=1)
anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
anchor_list = np.transpose(anchor_mesh,
(0, 2, 3, 1)).reshape(-1, 4)
iou_pdist = bbox_iou_np_expand(
anchor_list, tboxes, x1y1x2y2=False)
iou_max = np.max(iou_pdist, axis=1)
max_gt_index = np.argmax(iou_pdist, axis=1)
iou_map = iou_max.reshape(nA, nGh, nGw)
gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
id_index = iou_map > self.ide_thresh
fg_index = iou_map > self.fg_thresh
bg_index = iou_map < self.bg_thresh
ign_index = (iou_map < self.fg_thresh) * (
iou_map > self.bg_thresh)
tconf[fg_index] = 1
tconf[bg_index] = 0
tconf[ign_index] = -1
gt_index = gt_index_map[fg_index]
gt_box_list = tboxes[gt_index]
gt_id_list = gt_ide[gt_index_map[id_index]]
if np.sum(fg_index) > 0:
tid[id_index] = gt_id_list
fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
4)[fg_index]
delta_target = self.encode_delta(gt_box_list,
fg_anchor_list)
tbox[fg_index] = delta_target
sample['tbox{}'.format(i)] = tbox
sample['tconf{}'.format(i)] = tconf
sample['tide{}'.format(i)] = tid
sample.pop('gt_class')
sample = self.pad_box(sample, num_max)
return samples
@register_op
class Gt2JDETargetMax(BaseOperator):
__shared__ = ['num_classes']
"""
Generate JDE targets by groud truth data when evaluating
Args:
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
max_iou_thresh (float): iou thresh for high quality anchor
num_classes (int): number of classes
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
max_iou_thresh=0.60,
num_classes=1):
super(Gt2JDETargetMax, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.max_iou_thresh = max_iou_thresh
self.num_classes = num_classes
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_ide = sample['gt_ide']
for i, (anchor_hw, downsample_ratio
) in enumerate(zip(self.anchors, self.downsample_ratios)):
anchor_hw = np.array(
anchor_hw, dtype=np.float32) / downsample_ratio
nA = len(anchor_hw)
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
# iou of targets-anchors (using wh only)
box1 = gwh
box2 = anchor_hw[:, None, :]
inter_area = np.minimum(box1, box2).prod(2)
iou = inter_area / (
box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
# Select best iou_pred and anchor
iou_best = iou.max(0) # best anchor [0-2] for each target
a = np.argmax(iou, axis=0)
# Select best unique target-anchor combinations
iou_order = np.argsort(-iou_best) # best to worst
# Unique anchor selection
u = np.stack((gi, gj, a), 0)[:, iou_order]
_, first_unique = np.unique(u, axis=1, return_index=True)
mask = iou_order[first_unique]
# best anchor must share significant commonality (iou) with target
# TODO: examine arbitrary threshold
idx = mask[iou_best[mask] > self.max_iou_thresh]
if len(idx) > 0:
a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
t_box = gt_bbox[idx]
t_id = gt_ide[idx]
if len(t_box.shape) == 1:
t_box = t_box.reshape(1, 4)
gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
# XY coordinates
tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
# Width and height in yolo method
tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(
gwh / anchor_hw[a_i])
tconf[a_i, gj_i, gi_i] = 1
tid[a_i, gj_i, gi_i] = t_id
sample['tbox{}'.format(i)] = tbox
sample['tconf{}'.format(i)] = tconf
sample['tide{}'.format(i)] = tid
class Gt2FairMOTTarget(Gt2TTFTarget):
__shared__ = ['num_classes']
"""
Generate FairMOT targets by ground truth data.
Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
1. the gaussian kernal radius to generate a heatmap.
2. the targets needed during traing.
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
"""
def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
super(Gt2TTFTarget, self).__init__()
self.down_ratio = down_ratio
self.num_classes = num_classes
self.max_objs = max_objs
def __call__(self, samples, context=None):
for b_id, sample in enumerate(samples):
output_h = sample['image'].shape[1] // self.down_ratio
output_w = sample['image'].shape[2] // self.down_ratio
heatmap = np.zeros(
(self.num_classes, output_h, output_w), dtype='float32')
bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
index = np.zeros((self.max_objs, ), dtype=np.int64)
index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
reid = np.zeros((self.max_objs, ), dtype=np.int64)
bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
if self.num_classes > 1:
# each category corresponds to a set of track ids
cls_tr_ids = np.zeros(
(self.num_classes, output_h, output_w), dtype=np.int64)
cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
gt_ide = sample['gt_ide']
for k in range(len(gt_bbox)):
cls_id = gt_class[k][0]
bbox = gt_bbox[k]
ide = gt_ide[k][0]
bbox[[0, 2]] = bbox[[0, 2]] * output_w
bbox[[1, 3]] = bbox[[1, 3]] * output_h
bbox_amodal = copy.deepcopy(bbox)
bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
bbox[0] = np.clip(bbox[0], 0, output_w - 1)
bbox[1] = np.clip(bbox[1], 0, output_h - 1)
h = bbox[3]
w = bbox[2]
bbox_xy = copy.deepcopy(bbox)
bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
radius = max(0, int(radius))
ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
ct_int = ct.astype(np.int32)
self.draw_truncate_gaussian(heatmap[cls_id], ct_int,
radius, radius)
bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
index[k] = ct_int[1] * output_w + ct_int[0]
center_offset[k] = ct - ct_int
index_mask[k] = 1
reid[k] = ide
bbox_xys[k] = bbox_xy
if self.num_classes > 1:
cls_id_map[ct_int[1], ct_int[0]] = cls_id
cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
# track id start from 0
sample['heatmap'] = heatmap
sample['index'] = index
sample['offset'] = center_offset
sample['size'] = bbox_size
sample['index_mask'] = index_mask
sample['reid'] = reid
if self.num_classes > 1:
sample['cls_id_map'] = cls_id_map
sample['cls_tr_ids'] = cls_tr_ids
sample['bbox_xys'] = bbox_xys
sample.pop('is_crowd', None)
sample.pop('difficult', None)
sample.pop('gt_class', None)
sample.pop('gt_bbox', None)
sample.pop('gt_score', None)
sample.pop('gt_ide', None)
return samples

@ -0,0 +1,498 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains helper methods for BBOX processing
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import random
import math
import cv2
def meet_emit_constraint(src_bbox, sample_bbox):
center_x = (src_bbox[2] + src_bbox[0]) / 2
center_y = (src_bbox[3] + src_bbox[1]) / 2
if center_x >= sample_bbox[0] and \
center_x <= sample_bbox[2] and \
center_y >= sample_bbox[1] and \
center_y <= sample_bbox[3]:
return True
return False
def clip_bbox(src_bbox):
src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
return src_bbox
def bbox_area(src_bbox):
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
return 0.
else:
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
def is_overlap(object_bbox, sample_bbox):
if object_bbox[0] >= sample_bbox[2] or \
object_bbox[2] <= sample_bbox[0] or \
object_bbox[1] >= sample_bbox[3] or \
object_bbox[3] <= sample_bbox[1]:
return False
else:
return True
def filter_and_process(sample_bbox,
bboxes,
labels,
scores=None,
keypoints=None):
new_bboxes = []
new_labels = []
new_scores = []
new_keypoints = []
new_kp_ignore = []
for i in range(len(bboxes)):
new_bbox = [0, 0, 0, 0]
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
if not meet_emit_constraint(obj_bbox, sample_bbox):
continue
if not is_overlap(obj_bbox, sample_bbox):
continue
sample_width = sample_bbox[2] - sample_bbox[0]
sample_height = sample_bbox[3] - sample_bbox[1]
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
new_bbox = clip_bbox(new_bbox)
if bbox_area(new_bbox) > 0:
new_bboxes.append(new_bbox)
new_labels.append([labels[i][0]])
if scores is not None:
new_scores.append([scores[i][0]])
if keypoints is not None:
sample_keypoint = keypoints[0][i]
for j in range(len(sample_keypoint)):
kp_len = sample_height if j % 2 else sample_width
sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
sample_keypoint[j] = (
sample_keypoint[j] - sample_coord) / kp_len
sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
new_keypoints.append(sample_keypoint)
new_kp_ignore.append(keypoints[1][i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
if keypoints is not None:
keypoints = np.array(new_keypoints)
new_kp_ignore = np.array(new_kp_ignore)
return bboxes, labels, scores, (keypoints, new_kp_ignore)
return bboxes, labels, scores
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
new_bboxes = []
new_labels = []
new_scores = []
for i, bbox in enumerate(bboxes):
w = float((bbox[2] - bbox[0]) * target_size)
h = float((bbox[3] - bbox[1]) * target_size)
if w * h < float(min_size * min_size):
continue
else:
new_bboxes.append(bbox)
new_labels.append(labels[i])
if scores is not None and scores.size != 0:
new_scores.append(scores[i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
return bboxes, labels, scores
def generate_sample_bbox(sampler):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def generate_sample_bbox_square(sampler, image_width, image_height):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
if image_height < image_width:
bbox_width = bbox_height * image_height / image_width
else:
bbox_height = bbox_width * image_width / image_height
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
resize_width):
num_gt = len(bbox_labels)
# np.random.randint range: [low, high)
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
if num_gt != 0:
norm_xmin = bbox_labels[rand_idx][0]
norm_ymin = bbox_labels[rand_idx][1]
norm_xmax = bbox_labels[rand_idx][2]
norm_ymax = bbox_labels[rand_idx][3]
xmin = norm_xmin * image_width
ymin = norm_ymin * image_height
wid = image_width * (norm_xmax - norm_xmin)
hei = image_height * (norm_ymax - norm_ymin)
range_size = 0
area = wid * hei
for scale_ind in range(0, len(scale_array) - 1):
if area > scale_array[scale_ind] ** 2 and area < \
scale_array[scale_ind + 1] ** 2:
range_size = scale_ind + 1
break
if area > scale_array[len(scale_array) - 2]**2:
range_size = len(scale_array) - 2
scale_choose = 0.0
if range_size == 0:
rand_idx_size = 0
else:
# np.random.randint range: [low, high)
rng_rand_size = np.random.randint(0, range_size + 1)
rand_idx_size = rng_rand_size % (range_size + 1)
if rand_idx_size == range_size:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = min(2.0 * scale_array[rand_idx_size],
2 * math.sqrt(wid * hei))
scale_choose = random.uniform(min_resize_val, max_resize_val)
else:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = 2.0 * scale_array[rand_idx_size]
scale_choose = random.uniform(min_resize_val, max_resize_val)
sample_bbox_size = wid * resize_width / scale_choose
w_off_orig = 0.0
h_off_orig = 0.0
if sample_bbox_size < max(image_height, image_width):
if wid <= sample_bbox_size:
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
xmin)
else:
w_off_orig = np.random.uniform(xmin,
xmin + wid - sample_bbox_size)
if hei <= sample_bbox_size:
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
ymin)
else:
h_off_orig = np.random.uniform(ymin,
ymin + hei - sample_bbox_size)
else:
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
h_off_orig = np.random.uniform(image_height - sample_bbox_size,
0.0)
w_off_orig = math.floor(w_off_orig)
h_off_orig = math.floor(h_off_orig)
# Figure out top left coordinates.
w_off = float(w_off_orig / image_width)
h_off = float(h_off_orig / image_height)
sampled_bbox = [
w_off, h_off, w_off + float(sample_bbox_size / image_width),
h_off + float(sample_bbox_size / image_height)
]
return sampled_bbox
else:
return 0
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox[0] >= object_bbox[2] or \
sample_bbox[2] <= object_bbox[0] or \
sample_bbox[1] >= object_bbox[3] or \
sample_bbox[3] <= object_bbox[1]:
return 0
intersect_xmin = max(sample_bbox[0], object_bbox[0])
intersect_ymin = max(sample_bbox[1], object_bbox[1])
intersect_xmax = min(sample_bbox[2], object_bbox[2])
intersect_ymax = min(sample_bbox[3], object_bbox[3])
intersect_size = (intersect_xmax - intersect_xmin) * (
intersect_ymax - intersect_ymin)
sample_bbox_size = bbox_area(sample_bbox)
object_bbox_size = bbox_area(object_bbox)
overlap = intersect_size / (
sample_bbox_size + object_bbox_size - intersect_size)
return overlap
def intersect_bbox(bbox1, bbox2):
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
intersection_box = [0.0, 0.0, 0.0, 0.0]
else:
intersection_box = [
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
]
return intersection_box
def bbox_coverage(bbox1, bbox2):
inter_box = intersect_bbox(bbox1, bbox2)
intersect_size = bbox_area(inter_box)
if intersect_size > 0:
bbox1_size = bbox_area(bbox1)
return intersect_size / bbox1_size
else:
return 0.
def satisfy_sample_constraint(sampler,
sample_bbox,
gt_bboxes,
satisfy_all=False):
if sampler[6] == 0 and sampler[7] == 0:
return True
satisfied = []
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
satisfied.append(False)
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
satisfied.append(False)
continue
satisfied.append(True)
if not satisfy_all:
return True
if satisfy_all:
return np.all(satisfied)
else:
return False
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
if sampler[6] == 0 and sampler[7] == 0:
has_jaccard_overlap = False
else:
has_jaccard_overlap = True
if sampler[8] == 0 and sampler[9] == 0:
has_object_coverage = False
else:
has_object_coverage = True
if not has_jaccard_overlap and not has_object_coverage:
return True
found = False
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
if has_jaccard_overlap:
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
continue
found = True
if has_object_coverage:
object_coverage = bbox_coverage(object_bbox, sample_bbox)
if sampler[8] != 0 and \
object_coverage < sampler[8]:
continue
if sampler[9] != 0 and \
object_coverage > sampler[9]:
continue
found = True
if found:
return True
return found
def crop_image_sampling(img, sample_bbox, image_width, image_height,
target_size):
# no clipping here
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
w_off = xmin
h_off = ymin
width = xmax - xmin
height = ymax - ymin
cross_xmin = max(0.0, float(w_off))
cross_ymin = max(0.0, float(h_off))
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
cross_width = cross_xmax - cross_xmin
cross_height = cross_ymax - cross_ymin
roi_xmin = 0 if w_off >= 0 else abs(w_off)
roi_ymin = 0 if h_off >= 0 else abs(h_off)
roi_width = cross_width
roi_height = cross_height
roi_y1 = int(roi_ymin)
roi_y2 = int(roi_ymin + roi_height)
roi_x1 = int(roi_xmin)
roi_x2 = int(roi_xmin + roi_width)
cross_y1 = int(cross_ymin)
cross_y2 = int(cross_ymin + cross_height)
cross_x1 = int(cross_xmin)
cross_x2 = int(cross_xmin + cross_width)
sample_img = np.zeros((height, width, 3))
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
img[cross_y1: cross_y2, cross_x1: cross_x2]
sample_img = cv2.resize(
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
return sample_img
def is_poly(segm):
assert isinstance(segm, (list, dict)), \
"Invalid segm type: {}".format(type(segm))
return isinstance(segm, list)
def gaussian_radius(bbox_size, min_overlap):
height, width = bbox_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
radius1 = (b1 + sq1) / (2 * a1)
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
radius2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
radius3 = (b3 + sq3) / 2
return min(radius1, radius2, radius3)
def draw_gaussian(heatmap, center, radius, k=1, delte=6):
diameter = 2 * radius + 1
sigma = diameter / delte
gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
x, y = center
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
radius + right]
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
def gaussian2D(shape, sigma_x=1, sigma_y=1):
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m + 1, -n:n + 1]
h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
sigma_y)))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def draw_umich_gaussian(heatmap, center, radius, k=1):
"""
draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
"""
diameter = 2 * radius + 1
gaussian = gaussian2D(
(diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
return heatmap
def get_border(border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i

File diff suppressed because it is too large Load Diff

@ -0,0 +1,30 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import trainer
from .trainer import *
from . import callbacks
from .callbacks import *
from . import env
from .env import *
__all__ = trainer.__all__ \
+ callbacks.__all__ \
+ env.__all__
from . import tracker
from .tracker import *
__all__ = __all__ + tracker.__all__

@ -0,0 +1,340 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import datetime
import six
import copy
import json
import paddle
import paddle.distributed as dist
from paddlers.models.ppdet.utils.checkpoint import save_model
from paddlers.models.ppdet.metrics import get_infer_results
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
__all__ = [
'Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer',
'VisualDLWriter', 'SniperProposalsGenerator'
]
class Callback(object):
def __init__(self, model):
self.model = model
def on_step_begin(self, status):
pass
def on_step_end(self, status):
pass
def on_epoch_begin(self, status):
pass
def on_epoch_end(self, status):
pass
def on_train_begin(self, status):
pass
def on_train_end(self, status):
pass
class ComposeCallback(object):
def __init__(self, callbacks):
callbacks = [c for c in list(callbacks) if c is not None]
for c in callbacks:
assert isinstance(
c, Callback), "callback should be subclass of Callback"
self._callbacks = callbacks
def on_step_begin(self, status):
for c in self._callbacks:
c.on_step_begin(status)
def on_step_end(self, status):
for c in self._callbacks:
c.on_step_end(status)
def on_epoch_begin(self, status):
for c in self._callbacks:
c.on_epoch_begin(status)
def on_epoch_end(self, status):
for c in self._callbacks:
c.on_epoch_end(status)
def on_train_begin(self, status):
for c in self._callbacks:
c.on_train_begin(status)
def on_train_end(self, status):
for c in self._callbacks:
c.on_train_end(status)
class LogPrinter(Callback):
def __init__(self, model):
super(LogPrinter, self).__init__(model)
def on_step_end(self, status):
if dist.get_world_size() < 2 or dist.get_rank() == 0:
mode = status['mode']
if mode == 'train':
epoch_id = status['epoch_id']
step_id = status['step_id']
steps_per_epoch = status['steps_per_epoch']
training_staus = status['training_staus']
batch_time = status['batch_time']
data_time = status['data_time']
epoches = self.model.cfg.epoch
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
))]['batch_size']
logs = training_staus.log()
space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
if step_id % self.model.cfg.log_iter == 0:
eta_steps = (epoches - epoch_id
) * steps_per_epoch - step_id
eta_sec = eta_steps * batch_time.global_avg
eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
ips = float(batch_size) / batch_time.avg
fmt = ' '.join([
'Epoch: [{}]',
'[{' + space_fmt + '}/{}]',
'learning_rate: {lr:.6f}',
'{meters}',
'eta: {eta}',
'batch_cost: {btime}',
'data_cost: {dtime}',
'ips: {ips:.4f} images/s',
])
fmt = fmt.format(
epoch_id,
step_id,
steps_per_epoch,
lr=status['learning_rate'],
meters=logs,
eta=eta_str,
btime=str(batch_time),
dtime=str(data_time),
ips=ips)
logger.info(fmt)
if mode == 'eval':
step_id = status['step_id']
if step_id % 100 == 0:
logger.info("Eval iter: {}".format(step_id))
def on_epoch_end(self, status):
if dist.get_world_size() < 2 or dist.get_rank() == 0:
mode = status['mode']
if mode == 'eval':
sample_num = status['sample_num']
cost_time = status['cost_time']
logger.info('Total sample number: {}, averge FPS: {}'.format(
sample_num, sample_num / cost_time))
class Checkpointer(Callback):
def __init__(self, model):
super(Checkpointer, self).__init__(model)
cfg = self.model.cfg
self.best_ap = 0.
self.save_dir = os.path.join(self.model.cfg.save_dir,
self.model.cfg.filename)
if hasattr(self.model.model, 'student_model'):
self.weight = self.model.model.student_model
else:
self.weight = self.model.model
def on_epoch_end(self, status):
# Checkpointer only performed during training
mode = status['mode']
epoch_id = status['epoch_id']
weight = None
save_name = None
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'train':
end_epoch = self.model.cfg.epoch
if (
epoch_id + 1
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
save_name = str(
epoch_id
) if epoch_id != end_epoch - 1 else "model_final"
weight = self.weight
elif mode == 'eval':
if 'save_best_model' in status and status['save_best_model']:
for metric in self.model._metrics:
map_res = metric.get_results()
if 'bbox' in map_res:
key = 'bbox'
elif 'keypoint' in map_res:
key = 'keypoint'
else:
key = 'mask'
if key not in map_res:
logger.warning("Evaluation results empty, this may be due to " \
"training iterations being too few or not " \
"loading the correct weights.")
return
if map_res[key][0] > self.best_ap:
self.best_ap = map_res[key][0]
save_name = 'best_model'
weight = self.weight
logger.info("Best test {} ap is {:0.3f}.".format(
key, self.best_ap))
if weight:
save_model(weight, self.model.optimizer, self.save_dir,
save_name, epoch_id + 1)
class WiferFaceEval(Callback):
def __init__(self, model):
super(WiferFaceEval, self).__init__(model)
def on_epoch_begin(self, status):
assert self.model.mode == 'eval', \
"WiferFaceEval can only be set during evaluation"
for metric in self.model._metrics:
metric.update(self.model.model)
sys.exit()
class VisualDLWriter(Callback):
"""
Use VisualDL to log data or image
"""
def __init__(self, model):
super(VisualDLWriter, self).__init__(model)
assert six.PY3, "VisualDL requires Python >= 3.5"
try:
from visualdl import LogWriter
except Exception as e:
logger.error('visualdl not found, plaese install visualdl. '
'for example: `pip install visualdl`.')
raise e
self.vdl_writer = LogWriter(
model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
self.vdl_loss_step = 0
self.vdl_mAP_step = 0
self.vdl_image_step = 0
self.vdl_image_frame = 0
def on_step_end(self, status):
mode = status['mode']
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'train':
training_staus = status['training_staus']
for loss_name, loss_value in training_staus.get().items():
self.vdl_writer.add_scalar(loss_name, loss_value,
self.vdl_loss_step)
self.vdl_loss_step += 1
elif mode == 'test':
ori_image = status['original_image']
result_image = status['result_image']
self.vdl_writer.add_image(
"original/frame_{}".format(self.vdl_image_frame),
ori_image, self.vdl_image_step)
self.vdl_writer.add_image(
"result/frame_{}".format(self.vdl_image_frame),
result_image, self.vdl_image_step)
self.vdl_image_step += 1
# each frame can display ten pictures at most.
if self.vdl_image_step % 10 == 0:
self.vdl_image_step = 0
self.vdl_image_frame += 1
def on_epoch_end(self, status):
mode = status['mode']
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'eval':
for metric in self.model._metrics:
for key, map_value in metric.get_results().items():
self.vdl_writer.add_scalar("{}-mAP".format(key),
map_value[0],
self.vdl_mAP_step)
self.vdl_mAP_step += 1
class SniperProposalsGenerator(Callback):
def __init__(self, model):
super(SniperProposalsGenerator, self).__init__(model)
ori_dataset = self.model.dataset
self.dataset = self._create_new_dataset(ori_dataset)
self.loader = self.model.loader
self.cfg = self.model.cfg
self.infer_model = self.model.model
def _create_new_dataset(self, ori_dataset):
dataset = copy.deepcopy(ori_dataset)
# init anno_cropper
dataset.init_anno_cropper()
# generate infer roidbs
ori_roidbs = dataset.get_ori_roidbs()
roidbs = dataset.anno_cropper.crop_infer_anno_records(ori_roidbs)
# set new roidbs
dataset.set_roidbs(roidbs)
return dataset
def _eval_with_loader(self, loader):
results = []
with paddle.no_grad():
self.infer_model.eval()
for step_id, data in enumerate(loader):
outs = self.infer_model(data)
for key in ['im_shape', 'scale_factor', 'im_id']:
outs[key] = data[key]
for key, value in outs.items():
if hasattr(value, 'numpy'):
outs[key] = value.numpy()
results.append(outs)
return results
def on_train_end(self, status):
self.loader.dataset = self.dataset
results = self._eval_with_loader(self.loader)
results = self.dataset.anno_cropper.aggregate_chips_detections(results)
# sniper
proposals = []
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()}
for outs in results:
batch_res = get_infer_results(outs, clsid2catid)
start = 0
for i, im_id in enumerate(outs['im_id']):
bbox_num = outs['bbox_num']
end = start + bbox_num[i]
bbox_res = batch_res['bbox'][start:end] \
if 'bbox' in batch_res else None
if bbox_res:
proposals += bbox_res
logger.info("save proposals in {}".format(self.cfg.proposals_path))
with open(self.cfg.proposals_path, 'w') as f:
json.dump(proposals, f)

@ -0,0 +1,50 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import random
import numpy as np
import paddle
from paddle.distributed import fleet
__all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
def init_fleet_env(find_unused_parameters=False):
strategy = fleet.DistributedStrategy()
strategy.find_unused_parameters = find_unused_parameters
fleet.init(is_collective=True, strategy=strategy)
def init_parallel_env():
env = os.environ
dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
if dist:
trainer_id = int(env['PADDLE_TRAINER_ID'])
local_seed = (99 + trainer_id)
random.seed(local_seed)
np.random.seed(local_seed)
paddle.distributed.init_parallel_env()
def set_random_seed(seed):
paddle.seed(seed)
random.seed(seed)
np.random.seed(seed)

@ -0,0 +1,177 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import yaml
from collections import OrderedDict
import paddle
from paddlers.models.ppdet.data.source.category import get_categories
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
# Global dictionary
TRT_MIN_SUBGRAPH = {
'YOLO': 3,
'SSD': 60,
'RCNN': 40,
'RetinaNet': 40,
'S2ANet': 80,
'EfficientDet': 40,
'Face': 3,
'TTFNet': 60,
'FCOS': 16,
'SOLOv2': 60,
'HigherHRNet': 3,
'HRNet': 3,
'DeepSORT': 3,
'JDE': 10,
'FairMOT': 5,
'GFL': 16,
'PicoDet': 3,
'CenterNet': 5,
'TOOD': 5,
}
KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
def _prune_input_spec(input_spec, program, targets):
# try to prune static program to figure out pruned input spec
# so we perform following operations in static mode
paddle.enable_static()
pruned_input_spec = [{}]
program = program.clone()
program = program._prune(targets=targets)
global_block = program.global_block()
for name, spec in input_spec[0].items():
try:
v = global_block.var(name)
pruned_input_spec[0][name] = spec
except Exception:
pass
paddle.disable_static()
return pruned_input_spec
def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
preprocess_list = []
anno_file = dataset_cfg.get_anno()
clsid2catid, catid2name = get_categories(metric, anno_file, arch)
label_list = [str(cat) for cat in catid2name.values()]
fuse_normalize = reader_cfg.get('fuse_normalize', False)
sample_transforms = reader_cfg['sample_transforms']
for st in sample_transforms[1:]:
for key, value in st.items():
p = {'type': key}
if key == 'Resize':
if int(image_shape[1]) != -1:
value['target_size'] = image_shape[1:]
if fuse_normalize and key == 'NormalizeImage':
continue
p.update(value)
preprocess_list.append(p)
batch_transforms = reader_cfg.get('batch_transforms', None)
if batch_transforms:
for bt in batch_transforms:
for key, value in bt.items():
# for deploy/infer, use PadStride(stride) instead PadBatch(pad_to_stride)
if key == 'PadBatch':
preprocess_list.append({
'type': 'PadStride',
'stride': value['pad_to_stride']
})
break
return preprocess_list, label_list
def _parse_tracker(tracker_cfg):
tracker_params = {}
for k, v in tracker_cfg.items():
tracker_params.update({k: v})
return tracker_params
def _dump_infer_config(config, path, image_shape, model):
arch_state = False
from paddlers.models.ppdet.core.config.yaml_helpers import setup_orderdict
setup_orderdict()
use_dynamic_shape = True if image_shape[2] == -1 else False
infer_cfg = OrderedDict({
'mode': 'fluid',
'draw_threshold': 0.5,
'metric': config['metric'],
'use_dynamic_shape': use_dynamic_shape
})
infer_arch = config['architecture']
if infer_arch in MOT_ARCH:
if infer_arch == 'DeepSORT':
tracker_cfg = config['DeepSORTTracker']
else:
tracker_cfg = config['JDETracker']
infer_cfg['tracker'] = _parse_tracker(tracker_cfg)
for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items():
if arch in infer_arch:
infer_cfg['arch'] = arch
infer_cfg['min_subgraph_size'] = min_subgraph_size
arch_state = True
break
if not arch_state:
logger.error(
'Architecture: {} is not supported for exporting model now.\n'.
format(infer_arch) +
'Please set TRT_MIN_SUBGRAPH in ppdet/engine/export_utils.py')
os._exit(0)
if 'mask_head' in config[config['architecture']] and config[config[
'architecture']]['mask_head']:
infer_cfg['mask'] = True
label_arch = 'detection_arch'
if infer_arch in KEYPOINT_ARCH:
label_arch = 'keypoint_arch'
if infer_arch in MOT_ARCH:
label_arch = 'mot_arch'
reader_cfg = config['TestMOTReader']
dataset_cfg = config['TestMOTDataset']
else:
reader_cfg = config['TestReader']
dataset_cfg = config['TestDataset']
infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader(
reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
if infer_arch == 'PicoDet':
infer_cfg['NMS'] = config['PicoHead']['nms']
# In order to speed up the prediction, the threshold of nms
# is adjusted here, which can be changed in infer_cfg.yml
config['PicoHead']['nms']["score_threshold"] = 0.3
config['PicoHead']['nms']["nms_threshold"] = 0.5
infer_cfg['fpn_stride'] = config['PicoHead']['fpn_stride']
yaml.dump(infer_cfg, open(path, 'w'))
logger.info("Export inference config file to {}".format(
os.path.join(path)))

@ -0,0 +1,538 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import cv2
import glob
import re
import paddle
import numpy as np
import os.path as osp
from collections import defaultdict
from paddlers.models.ppdet.core.workspace import create
from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric
from paddlers.models.ppdet.metrics import MCMOTMetric
from .callbacks import Callback, ComposeCallback
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['Tracker']
class Tracker(object):
def __init__(self, cfg, mode='eval'):
self.cfg = cfg
assert mode.lower() in ['test', 'eval'], \
"mode should be 'test' or 'eval'"
self.mode = mode.lower()
self.optimizer = None
# build MOT data loader
self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
# build model
self.model = create(cfg.architecture)
self.status = {}
self.start_epoch = 0
# initial default callbacks
self._init_callbacks()
# initial default metrics
self._init_metrics()
self._reset_metrics()
def _init_callbacks(self):
self._callbacks = []
self._compose_callback = None
def _init_metrics(self):
if self.mode in ['test']:
self._metrics = []
return
if self.cfg.metric == 'MOT':
self._metrics = [MOTMetric(), ]
elif self.cfg.metric == 'MCMOT':
self._metrics = [MCMOTMetric(self.cfg.num_classes), ]
elif self.cfg.metric == 'KITTI':
self._metrics = [KITTIMOTMetric(), ]
else:
logger.warning("Metric not support for metric type {}".format(
self.cfg.metric))
self._metrics = []
def _reset_metrics(self):
for metric in self._metrics:
metric.reset()
def register_callbacks(self, callbacks):
callbacks = [h for h in list(callbacks) if h is not None]
for c in callbacks:
assert isinstance(c, Callback), \
"metrics shoule be instances of subclass of Metric"
self._callbacks.extend(callbacks)
self._compose_callback = ComposeCallback(self._callbacks)
def register_metrics(self, metrics):
metrics = [m for m in list(metrics) if m is not None]
for m in metrics:
assert isinstance(m, Metric), \
"metrics shoule be instances of subclass of Metric"
self._metrics.extend(metrics)
def load_weights_jde(self, weights):
load_weight(self.model, weights, self.optimizer)
def load_weights_sde(self, det_weights, reid_weights):
if self.model.detector:
load_weight(self.model.detector, det_weights)
load_weight(self.model.reid, reid_weights)
else:
load_weight(self.model.reid, reid_weights, self.optimizer)
def _eval_seq_jde(self,
dataloader,
save_dir=None,
show_image=False,
frame_rate=30,
draw_threshold=0):
if save_dir:
if not os.path.exists(save_dir): os.makedirs(save_dir)
tracker = self.model.tracker
tracker.max_time_lost = int(frame_rate / 30.0 * tracker.track_buffer)
timer = MOTTimer()
frame_id = 0
self.status['mode'] = 'track'
self.model.eval()
results = defaultdict(list) # support single class and multi classes
for step_id, data in enumerate(dataloader):
self.status['step_id'] = step_id
if frame_id % 40 == 0:
logger.info('Processing frame {} ({:.2f} fps)'.format(
frame_id, 1. / max(1e-5, timer.average_time)))
# forward
timer.tic()
pred_dets, pred_embs = self.model(data)
pred_dets, pred_embs = pred_dets.numpy(), pred_embs.numpy()
online_targets_dict = self.model.tracker.update(pred_dets,
pred_embs)
online_tlwhs = defaultdict(list)
online_scores = defaultdict(list)
online_ids = defaultdict(list)
for cls_id in range(self.cfg.num_classes):
online_targets = online_targets_dict[cls_id]
for t in online_targets:
tlwh = t.tlwh
tid = t.track_id
tscore = t.score
if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
3] > tracker.vertical_ratio:
continue
online_tlwhs[cls_id].append(tlwh)
online_ids[cls_id].append(tid)
online_scores[cls_id].append(tscore)
# save results
results[cls_id].append(
(frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id],
online_ids[cls_id]))
timer.toc()
save_vis_results(data, frame_id, online_ids, online_tlwhs,
online_scores, timer.average_time, show_image,
save_dir, self.cfg.num_classes)
frame_id += 1
return results, frame_id, timer.average_time, timer.calls
def _eval_seq_sde(self,
dataloader,
save_dir=None,
show_image=False,
frame_rate=30,
seq_name='',
scaled=False,
det_file='',
draw_threshold=0):
if save_dir:
if not os.path.exists(save_dir): os.makedirs(save_dir)
use_detector = False if not self.model.detector else True
timer = MOTTimer()
results = defaultdict(list)
frame_id = 0
self.status['mode'] = 'track'
self.model.eval()
self.model.reid.eval()
if not use_detector:
dets_list = load_det_results(det_file, len(dataloader))
logger.info('Finish loading detection results file {}.'.format(
det_file))
for step_id, data in enumerate(dataloader):
self.status['step_id'] = step_id
if frame_id % 40 == 0:
logger.info('Processing frame {} ({:.2f} fps)'.format(
frame_id, 1. / max(1e-5, timer.average_time)))
ori_image = data['ori_image'] # [bs, H, W, 3]
ori_image_shape = data['ori_image'].shape[1:3]
# ori_image_shape: [H, W]
input_shape = data['image'].shape[2:]
# input_shape: [h, w], before data transforms, set in model config
im_shape = data['im_shape'][0].numpy()
# im_shape: [new_h, new_w], after data transforms
scale_factor = data['scale_factor'][0].numpy()
empty_detections = False
# when it has no detected bboxes, will not inference reid model
# and if visualize, use original image instead
# forward
timer.tic()
if not use_detector:
dets = dets_list[frame_id]
bbox_tlwh = np.array(dets['bbox'], dtype='float32')
if bbox_tlwh.shape[0] > 0:
# detector outputs: pred_cls_ids, pred_scores, pred_bboxes
pred_cls_ids = np.array(dets['cls_id'], dtype='float32')
pred_scores = np.array(dets['score'], dtype='float32')
pred_bboxes = np.concatenate(
(bbox_tlwh[:, 0:2],
bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]),
axis=1)
else:
logger.warning(
'Frame {} has not object, try to modify score threshold.'.
format(frame_id))
empty_detections = True
else:
outs = self.model.detector(data)
outs['bbox'] = outs['bbox'].numpy()
outs['bbox_num'] = outs['bbox_num'].numpy()
if outs['bbox_num'] > 0 and empty_detections == False:
# detector outputs: pred_cls_ids, pred_scores, pred_bboxes
pred_cls_ids = outs['bbox'][:, 0:1]
pred_scores = outs['bbox'][:, 1:2]
if not scaled:
# Note: scaled=False only in JDE YOLOv3 or other detectors
# with LetterBoxResize and JDEBBoxPostProcess.
#
# 'scaled' means whether the coords after detector outputs
# have been scaled back to the original image, set True
# in general detector, set False in JDE YOLOv3.
pred_bboxes = scale_coords(outs['bbox'][:, 2:],
input_shape, im_shape,
scale_factor)
else:
pred_bboxes = outs['bbox'][:, 2:]
else:
logger.warning(
'Frame {} has not detected object, try to modify score threshold.'.
format(frame_id))
empty_detections = True
if not empty_detections:
pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)
if len(keep_idx[0]) == 0:
logger.warning(
'Frame {} has not detected object left after clip_box.'.
format(frame_id))
empty_detections = True
if empty_detections:
timer.toc()
# if visualize, use original image instead
online_ids, online_tlwhs, online_scores = None, None, None
save_vis_results(data, frame_id, online_ids, online_tlwhs,
online_scores, timer.average_time, show_image,
save_dir, self.cfg.num_classes)
frame_id += 1
# thus will not inference reid model
continue
pred_scores = pred_scores[keep_idx[0]]
pred_cls_ids = pred_cls_ids[keep_idx[0]]
pred_tlwhs = np.concatenate(
(pred_xyxys[:, 0:2],
pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
axis=1)
pred_dets = np.concatenate(
(pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
tracker = self.model.tracker
crops = get_crops(
pred_xyxys,
ori_image,
w=tracker.input_size[0],
h=tracker.input_size[1])
crops = paddle.to_tensor(crops)
data.update({'crops': crops})
pred_embs = self.model(data).numpy()
tracker.predict()
online_targets = tracker.update(pred_dets, pred_embs)
online_tlwhs, online_scores, online_ids = [], [], []
for t in online_targets:
if not t.is_confirmed() or t.time_since_update > 1:
continue
tlwh = t.to_tlwh()
tscore = t.score
tid = t.track_id
if tscore < draw_threshold: continue
if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
3] > tracker.vertical_ratio:
continue
online_tlwhs.append(tlwh)
online_scores.append(tscore)
online_ids.append(tid)
timer.toc()
# save results
results[0].append(
(frame_id + 1, online_tlwhs, online_scores, online_ids))
save_vis_results(data, frame_id, online_ids, online_tlwhs,
online_scores, timer.average_time, show_image,
save_dir, self.cfg.num_classes)
frame_id += 1
return results, frame_id, timer.average_time, timer.calls
def mot_evaluate(self,
data_root,
seqs,
output_dir,
data_type='mot',
model_type='JDE',
save_images=False,
save_videos=False,
show_image=False,
scaled=False,
det_results_dir=''):
if not os.path.exists(output_dir): os.makedirs(output_dir)
result_root = os.path.join(output_dir, 'mot_results')
if not os.path.exists(result_root): os.makedirs(result_root)
assert data_type in ['mot', 'mcmot', 'kitti'], \
"data_type should be 'mot', 'mcmot' or 'kitti'"
assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
"model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
# run tracking
n_frame = 0
timer_avgs, timer_calls = [], []
for seq in seqs:
infer_dir = os.path.join(data_root, seq)
if not os.path.exists(infer_dir) or not os.path.isdir(infer_dir):
logger.warning("Seq {} error, {} has no images.".format(
seq, infer_dir))
continue
if os.path.exists(os.path.join(infer_dir, 'img1')):
infer_dir = os.path.join(infer_dir, 'img1')
frame_rate = 30
seqinfo = os.path.join(data_root, seq, 'seqinfo.ini')
if os.path.exists(seqinfo):
meta_info = open(seqinfo).read()
frame_rate = int(meta_info[meta_info.find('frameRate') + 10:
meta_info.find('\nseqLength')])
save_dir = os.path.join(
output_dir, 'mot_outputs',
seq) if save_images or save_videos else None
logger.info('start seq: {}'.format(seq))
self.dataset.set_images(self.get_infer_images(infer_dir))
dataloader = create('EvalMOTReader')(self.dataset, 0)
result_filename = os.path.join(result_root, '{}.txt'.format(seq))
with paddle.no_grad():
if model_type in ['JDE', 'FairMOT']:
results, nf, ta, tc = self._eval_seq_jde(
dataloader,
save_dir=save_dir,
show_image=show_image,
frame_rate=frame_rate)
elif model_type in ['DeepSORT']:
results, nf, ta, tc = self._eval_seq_sde(
dataloader,
save_dir=save_dir,
show_image=show_image,
frame_rate=frame_rate,
seq_name=seq,
scaled=scaled,
det_file=os.path.join(det_results_dir,
'{}.txt'.format(seq)))
else:
raise ValueError(model_type)
write_mot_results(result_filename, results, data_type,
self.cfg.num_classes)
n_frame += nf
timer_avgs.append(ta)
timer_calls.append(tc)
if save_videos:
output_video_path = os.path.join(save_dir, '..',
'{}_vis.mp4'.format(seq))
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
save_dir, output_video_path)
os.system(cmd_str)
logger.info('Save video in {}.'.format(output_video_path))
logger.info('Evaluate seq: {}'.format(seq))
# update metrics
for metric in self._metrics:
metric.update(data_root, seq, data_type, result_root,
result_filename)
timer_avgs = np.asarray(timer_avgs)
timer_calls = np.asarray(timer_calls)
all_time = np.dot(timer_avgs, timer_calls)
avg_time = all_time / np.sum(timer_calls)
logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(
all_time, 1.0 / avg_time))
# accumulate metric to log out
for metric in self._metrics:
metric.accumulate()
metric.log()
# reset metric states for metric may performed multiple times
self._reset_metrics()
def get_infer_images(self, infer_dir):
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
images = set()
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
images.sort()
assert len(images) > 0, "no image found in {}".format(infer_dir)
logger.info("Found {} inference images in total.".format(len(images)))
return images
def mot_predict_seq(self,
video_file,
frame_rate,
image_dir,
output_dir,
data_type='mot',
model_type='JDE',
save_images=False,
save_videos=True,
show_image=False,
scaled=False,
det_results_dir='',
draw_threshold=0.5):
assert video_file is not None or image_dir is not None, \
"--video_file or --image_dir should be set."
assert video_file is None or os.path.isfile(video_file), \
"{} is not a file".format(video_file)
assert image_dir is None or os.path.isdir(image_dir), \
"{} is not a directory".format(image_dir)
if not os.path.exists(output_dir): os.makedirs(output_dir)
result_root = os.path.join(output_dir, 'mot_results')
if not os.path.exists(result_root): os.makedirs(result_root)
assert data_type in ['mot', 'mcmot', 'kitti'], \
"data_type should be 'mot', 'mcmot' or 'kitti'"
assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
"model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
# run tracking
if video_file:
seq = video_file.split('/')[-1].split('.')[0]
self.dataset.set_video(video_file, frame_rate)
logger.info('Starting tracking video {}'.format(video_file))
elif image_dir:
seq = image_dir.split('/')[-1].split('.')[0]
if os.path.exists(os.path.join(image_dir, 'img1')):
image_dir = os.path.join(image_dir, 'img1')
images = [
'{}/{}'.format(image_dir, x) for x in os.listdir(image_dir)
]
images.sort()
self.dataset.set_images(images)
logger.info('Starting tracking folder {}, found {} images'.format(
image_dir, len(images)))
else:
raise ValueError('--video_file or --image_dir should be set.')
save_dir = os.path.join(output_dir, 'mot_outputs',
seq) if save_images or save_videos else None
dataloader = create('TestMOTReader')(self.dataset, 0)
result_filename = os.path.join(result_root, '{}.txt'.format(seq))
if frame_rate == -1:
frame_rate = self.dataset.frame_rate
with paddle.no_grad():
if model_type in ['JDE', 'FairMOT']:
results, nf, ta, tc = self._eval_seq_jde(
dataloader,
save_dir=save_dir,
show_image=show_image,
frame_rate=frame_rate,
draw_threshold=draw_threshold)
elif model_type in ['DeepSORT']:
results, nf, ta, tc = self._eval_seq_sde(
dataloader,
save_dir=save_dir,
show_image=show_image,
frame_rate=frame_rate,
seq_name=seq,
scaled=scaled,
det_file=os.path.join(det_results_dir,
'{}.txt'.format(seq)),
draw_threshold=draw_threshold)
else:
raise ValueError(model_type)
if save_videos:
output_video_path = os.path.join(save_dir, '..',
'{}_vis.mp4'.format(seq))
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
save_dir, output_video_path)
os.system(cmd_str)
logger.info('Save video in {}'.format(output_video_path))
write_mot_results(result_filename, results, data_type,
self.cfg.num_classes)

@ -0,0 +1,742 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import copy
import time
import numpy as np
import typing
from PIL import Image, ImageOps
import paddle
import paddle.distributed as dist
from paddle.distributed import fleet
from paddle import amp
from paddle.static import InputSpec
from paddlers.models.ppdet.optimizer import ModelEMA
from paddlers.models.ppdet.core.workspace import create
from paddlers.models.ppdet.modeling.architectures.meta_arch import BaseArch
from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
from paddlers.models.ppdet.data.source.category import get_categories
from paddlers.models.ppdet.utils import stats
from paddlers.models.ppdet.utils import profiler
from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator
from .export_utils import _dump_infer_config, _prune_input_spec
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
__all__ = ['Trainer']
MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
class Trainer(object):
def __init__(self, cfg, mode='train'):
self.cfg = cfg
assert mode.lower() in ['train', 'eval', 'test'], \
"mode should be 'train', 'eval' or 'test'"
self.mode = mode.lower()
self.optimizer = None
self.is_loaded_weights = False
# build data loader
if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
else:
self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
if cfg.architecture == 'DeepSORT' and self.mode == 'train':
logger.error('DeepSORT has no need of training on mot dataset.')
sys.exit(1)
if self.mode == 'train':
self.loader = create('{}Reader'.format(self.mode.capitalize()))(
self.dataset, cfg.worker_num)
if cfg.architecture == 'JDE' and self.mode == 'train':
cfg['JDEEmbeddingHead'][
'num_identities'] = self.dataset.num_identities_dict[0]
# JDE only support single class MOT now.
if cfg.architecture == 'FairMOT' and self.mode == 'train':
cfg['FairMOTEmbeddingHead'][
'num_identities_dict'] = self.dataset.num_identities_dict
# FairMOT support single class and multi-class MOT now.
# build model
if 'model' not in self.cfg:
self.model = create(cfg.architecture)
else:
self.model = self.cfg.model
self.is_loaded_weights = True
#normalize params for deploy
self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
if self.use_ema:
ema_decay = self.cfg.get('ema_decay', 0.9998)
cycle_epoch = self.cfg.get('cycle_epoch', -1)
self.ema = ModelEMA(
self.model,
decay=ema_decay,
use_thres_step=True,
cycle_epoch=cycle_epoch)
# EvalDataset build with BatchSampler to evaluate in single device
# TODO: multi-device evaluate
if self.mode == 'eval':
self._eval_batch_sampler = paddle.io.BatchSampler(
self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
reader_name = '{}Reader'.format(self.mode.capitalize())
# If metric is VOC, need to be set collate_batch=False.
if cfg.metric == 'VOC':
cfg[reader_name]['collate_batch'] = False
self.loader = create(reader_name)(self.dataset, cfg.worker_num,
self._eval_batch_sampler)
# TestDataset build after user set images, skip loader creation here
# build optimizer in train mode
if self.mode == 'train':
steps_per_epoch = len(self.loader)
self.lr = create('LearningRate')(steps_per_epoch)
self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
if self.cfg.get('unstructured_prune'):
self.pruner = create('UnstructuredPruner')(self.model,
steps_per_epoch)
self._nranks = dist.get_world_size()
self._local_rank = dist.get_rank()
self.status = {}
self.start_epoch = 0
self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch
# initial default callbacks
self._init_callbacks()
# initial default metrics
self._init_metrics()
self._reset_metrics()
def _init_callbacks(self):
if self.mode == 'train':
self._callbacks = [LogPrinter(self), Checkpointer(self)]
if self.cfg.get('use_vdl', False):
self._callbacks.append(VisualDLWriter(self))
if self.cfg.get('save_proposals', False):
self._callbacks.append(SniperProposalsGenerator(self))
self._compose_callback = ComposeCallback(self._callbacks)
elif self.mode == 'eval':
self._callbacks = [LogPrinter(self)]
if self.cfg.metric == 'WiderFace':
self._callbacks.append(WiferFaceEval(self))
self._compose_callback = ComposeCallback(self._callbacks)
elif self.mode == 'test' and self.cfg.get('use_vdl', False):
self._callbacks = [VisualDLWriter(self)]
self._compose_callback = ComposeCallback(self._callbacks)
else:
self._callbacks = []
self._compose_callback = None
def _init_metrics(self, validate=False):
if self.mode == 'test' or (self.mode == 'train' and not validate):
self._metrics = []
return
classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO":
# TODO: bias should be unified
bias = self.cfg['bias'] if 'bias' in self.cfg else 0
output_eval = self.cfg['output_eval'] \
if 'output_eval' in self.cfg else None
save_prediction_only = self.cfg.get('save_prediction_only', False)
# pass clsid2catid info to metric instance to avoid multiple loading
# annotation file
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
if self.mode == 'eval' else None
# when do validation in train, annotation file should be get from
# EvalReader instead of self.dataset(which is TrainReader)
anno_file = self.dataset.get_anno()
dataset = self.dataset
if self.mode == 'train' and validate:
eval_dataset = self.cfg['EvalDataset']
eval_dataset.check_or_download_dataset()
anno_file = eval_dataset.get_anno()
dataset = eval_dataset
IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
if self.cfg.metric == "COCO":
self._metrics = [
COCOMetric(
anno_file=anno_file,
clsid2catid=clsid2catid,
classwise=classwise,
output_eval=output_eval,
bias=bias,
IouType=IouType,
save_prediction_only=save_prediction_only)
]
elif self.cfg.metric == "SNIPERCOCO": # sniper
self._metrics = [
SNIPERCOCOMetric(
anno_file=anno_file,
dataset=dataset,
clsid2catid=clsid2catid,
classwise=classwise,
output_eval=output_eval,
bias=bias,
IouType=IouType,
save_prediction_only=save_prediction_only)
]
elif self.cfg.metric == 'RBOX':
# TODO: bias should be unified
bias = self.cfg['bias'] if 'bias' in self.cfg else 0
output_eval = self.cfg['output_eval'] \
if 'output_eval' in self.cfg else None
save_prediction_only = self.cfg.get('save_prediction_only', False)
# pass clsid2catid info to metric instance to avoid multiple loading
# annotation file
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
if self.mode == 'eval' else None
# when do validation in train, annotation file should be get from
# EvalReader instead of self.dataset(which is TrainReader)
anno_file = self.dataset.get_anno()
if self.mode == 'train' and validate:
eval_dataset = self.cfg['EvalDataset']
eval_dataset.check_or_download_dataset()
anno_file = eval_dataset.get_anno()
self._metrics = [
RBoxMetric(
anno_file=anno_file,
clsid2catid=clsid2catid,
classwise=classwise,
output_eval=output_eval,
bias=bias,
save_prediction_only=save_prediction_only)
]
elif self.cfg.metric == 'VOC':
self._metrics = [
VOCMetric(
label_list=self.dataset.get_label_list(),
class_num=self.cfg.num_classes,
map_type=self.cfg.map_type,
classwise=classwise)
]
elif self.cfg.metric == 'WiderFace':
multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
self._metrics = [
WiderFaceMetric(
image_dir=os.path.join(self.dataset.dataset_dir,
self.dataset.image_dir),
anno_file=self.dataset.get_anno(),
multi_scale=multi_scale)
]
elif self.cfg.metric == 'KeyPointTopDownCOCOEval':
eval_dataset = self.cfg['EvalDataset']
eval_dataset.check_or_download_dataset()
anno_file = eval_dataset.get_anno()
save_prediction_only = self.cfg.get('save_prediction_only', False)
self._metrics = [
KeyPointTopDownCOCOEval(
anno_file,
len(eval_dataset),
self.cfg.num_joints,
self.cfg.save_dir,
save_prediction_only=save_prediction_only)
]
elif self.cfg.metric == 'KeyPointTopDownMPIIEval':
eval_dataset = self.cfg['EvalDataset']
eval_dataset.check_or_download_dataset()
anno_file = eval_dataset.get_anno()
save_prediction_only = self.cfg.get('save_prediction_only', False)
self._metrics = [
KeyPointTopDownMPIIEval(
anno_file,
len(eval_dataset),
self.cfg.num_joints,
self.cfg.save_dir,
save_prediction_only=save_prediction_only)
]
elif self.cfg.metric == 'MOTDet':
self._metrics = [JDEDetMetric(), ]
else:
logger.warning("Metric not support for metric type {}".format(
self.cfg.metric))
self._metrics = []
def _reset_metrics(self):
for metric in self._metrics:
metric.reset()
def register_callbacks(self, callbacks):
callbacks = [c for c in list(callbacks) if c is not None]
for c in callbacks:
assert isinstance(c, Callback), \
"metrics shoule be instances of subclass of Metric"
self._callbacks.extend(callbacks)
self._compose_callback = ComposeCallback(self._callbacks)
def register_metrics(self, metrics):
metrics = [m for m in list(metrics) if m is not None]
for m in metrics:
assert isinstance(m, Metric), \
"metrics shoule be instances of subclass of Metric"
self._metrics.extend(metrics)
def load_weights(self, weights):
if self.is_loaded_weights:
return
self.start_epoch = 0
load_pretrain_weight(self.model, weights)
logger.debug("Load weights {} to start training".format(weights))
def load_weights_sde(self, det_weights, reid_weights):
if self.model.detector:
load_weight(self.model.detector, det_weights)
load_weight(self.model.reid, reid_weights)
else:
load_weight(self.model.reid, reid_weights)
def resume_weights(self, weights):
# support Distill resume weights
if hasattr(self.model, 'student_model'):
self.start_epoch = load_weight(self.model.student_model, weights,
self.optimizer)
else:
self.start_epoch = load_weight(self.model, weights, self.optimizer)
logger.debug("Resume weights of epoch {}".format(self.start_epoch))
def train(self, validate=False):
assert self.mode == 'train', "Model not in 'train' mode"
Init_mark = False
sync_bn = (
getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
self.cfg.use_gpu and self._nranks > 1)
if sync_bn:
self.model = BaseArch.convert_sync_batchnorm(self.model)
model = self.model
if self.cfg.get('fleet', False):
model = fleet.distributed_model(model)
self.optimizer = fleet.distributed_optimizer(self.optimizer)
elif self._nranks > 1:
find_unused_parameters = self.cfg[
'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
model = paddle.DataParallel(
self.model, find_unused_parameters=find_unused_parameters)
# initial fp16
if self.cfg.get('fp16', False):
scaler = amp.GradScaler(
enable=self.cfg.use_gpu, init_loss_scaling=1024)
self.status.update({
'epoch_id': self.start_epoch,
'step_id': 0,
'steps_per_epoch': len(self.loader)
})
self.status['batch_time'] = stats.SmoothedValue(
self.cfg.log_iter, fmt='{avg:.4f}')
self.status['data_time'] = stats.SmoothedValue(
self.cfg.log_iter, fmt='{avg:.4f}')
self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
if self.cfg.get('print_flops', False):
flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
self.dataset, self.cfg.worker_num)
self._flops(flops_loader)
profiler_options = self.cfg.get('profiler_options', None)
self._compose_callback.on_train_begin(self.status)
for epoch_id in range(self.start_epoch, self.cfg.epoch):
self.status['mode'] = 'train'
self.status['epoch_id'] = epoch_id
self._compose_callback.on_epoch_begin(self.status)
self.loader.dataset.set_epoch(epoch_id)
model.train()
iter_tic = time.time()
for step_id, data in enumerate(self.loader):
self.status['data_time'].update(time.time() - iter_tic)
self.status['step_id'] = step_id
profiler.add_profiler_step(profiler_options)
self._compose_callback.on_step_begin(self.status)
data['epoch_id'] = epoch_id
if self.cfg.get('fp16', False):
with amp.auto_cast(enable=self.cfg.use_gpu):
# model forward
outputs = model(data)
loss = outputs['loss']
# model backward
scaled_loss = scaler.scale(loss)
scaled_loss.backward()
# in dygraph mode, optimizer.minimize is equal to optimizer.step
scaler.minimize(self.optimizer, scaled_loss)
else:
# model forward
outputs = model(data)
loss = outputs['loss']
# model backward
loss.backward()
self.optimizer.step()
curr_lr = self.optimizer.get_lr()
self.lr.step()
if self.cfg.get('unstructured_prune'):
self.pruner.step()
self.optimizer.clear_grad()
self.status['learning_rate'] = curr_lr
if self._nranks < 2 or self._local_rank == 0:
self.status['training_staus'].update(outputs)
self.status['batch_time'].update(time.time() - iter_tic)
self._compose_callback.on_step_end(self.status)
if self.use_ema:
self.ema.update(self.model)
iter_tic = time.time()
# apply ema weight on model
if self.use_ema:
weight = copy.deepcopy(self.model.state_dict())
self.model.set_dict(self.ema.apply())
if self.cfg.get('unstructured_prune'):
self.pruner.update_params()
self._compose_callback.on_epoch_end(self.status)
if validate and (self._nranks < 2 or self._local_rank == 0) \
and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
or epoch_id == self.end_epoch - 1):
if not hasattr(self, '_eval_loader'):
# build evaluation dataset and loader
self._eval_dataset = self.cfg.EvalDataset
self._eval_batch_sampler = \
paddle.io.BatchSampler(
self._eval_dataset,
batch_size=self.cfg.EvalReader['batch_size'])
# If metric is VOC, need to be set collate_batch=False.
if self.cfg.metric == 'VOC':
self.cfg['EvalReader']['collate_batch'] = False
self._eval_loader = create('EvalReader')(
self._eval_dataset,
self.cfg.worker_num,
batch_sampler=self._eval_batch_sampler)
# if validation in training is enabled, metrics should be re-init
# Init_mark makes sure this code will only execute once
if validate and Init_mark == False:
Init_mark = True
self._init_metrics(validate=validate)
self._reset_metrics()
with paddle.no_grad():
self.status['save_best_model'] = True
self._eval_with_loader(self._eval_loader)
# restore origin weight on model
if self.use_ema:
self.model.set_dict(weight)
self._compose_callback.on_train_end(self.status)
def _eval_with_loader(self, loader):
sample_num = 0
tic = time.time()
self._compose_callback.on_epoch_begin(self.status)
self.status['mode'] = 'eval'
self.model.eval()
if self.cfg.get('print_flops', False):
flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
self._flops(flops_loader)
for step_id, data in enumerate(loader):
self.status['step_id'] = step_id
self._compose_callback.on_step_begin(self.status)
# forward
outs = self.model(data)
# update metrics
for metric in self._metrics:
metric.update(data, outs)
# multi-scale inputs: all inputs have same im_id
if isinstance(data, typing.Sequence):
sample_num += data[0]['im_id'].numpy().shape[0]
else:
sample_num += data['im_id'].numpy().shape[0]
self._compose_callback.on_step_end(self.status)
self.status['sample_num'] = sample_num
self.status['cost_time'] = time.time() - tic
# accumulate metric to log out
for metric in self._metrics:
metric.accumulate()
metric.log()
self._compose_callback.on_epoch_end(self.status)
# reset metric states for metric may performed multiple times
self._reset_metrics()
def evaluate(self):
with paddle.no_grad():
self._eval_with_loader(self.loader)
def predict(self,
images,
draw_threshold=0.5,
output_dir='output',
save_txt=False):
self.dataset.set_images(images)
loader = create('TestReader')(self.dataset, 0)
imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(
self.cfg.metric, anno_file=anno_file)
# Run Infer
self.status['mode'] = 'test'
self.model.eval()
if self.cfg.get('print_flops', False):
flops_loader = create('TestReader')(self.dataset, 0)
self._flops(flops_loader)
results = []
for step_id, data in enumerate(loader):
self.status['step_id'] = step_id
# forward
outs = self.model(data)
for key in ['im_shape', 'scale_factor', 'im_id']:
if isinstance(data, typing.Sequence):
outs[key] = data[0][key]
else:
outs[key] = data[key]
for key, value in outs.items():
if hasattr(value, 'numpy'):
outs[key] = value.numpy()
results.append(outs)
# sniper
if type(self.dataset) == SniperCOCODataSet:
results = self.dataset.anno_cropper.aggregate_chips_detections(
results)
for outs in results:
batch_res = get_infer_results(outs, clsid2catid)
bbox_num = outs['bbox_num']
start = 0
for i, im_id in enumerate(outs['im_id']):
image_path = imid2path[int(im_id)]
image = Image.open(image_path).convert('RGB')
image = ImageOps.exif_transpose(image)
self.status['original_image'] = np.array(image.copy())
end = start + bbox_num[i]
bbox_res = batch_res['bbox'][start:end] \
if 'bbox' in batch_res else None
mask_res = batch_res['mask'][start:end] \
if 'mask' in batch_res else None
segm_res = batch_res['segm'][start:end] \
if 'segm' in batch_res else None
keypoint_res = batch_res['keypoint'][start:end] \
if 'keypoint' in batch_res else None
image = visualize_results(
image, bbox_res, mask_res, segm_res, keypoint_res,
int(im_id), catid2name, draw_threshold)
self.status['result_image'] = np.array(image.copy())
if self._compose_callback:
self._compose_callback.on_step_end(self.status)
# save image with detection
save_name = self._get_save_image_name(output_dir, image_path)
logger.info("Detection bbox results save in {}".format(
save_name))
image.save(save_name, quality=95)
if save_txt:
save_path = os.path.splitext(save_name)[0] + '.txt'
results = {}
results["im_id"] = im_id
if bbox_res:
results["bbox_res"] = bbox_res
if keypoint_res:
results["keypoint_res"] = keypoint_res
save_result(save_path, results, catid2name, draw_threshold)
start = end
def _get_save_image_name(self, output_dir, image_path):
"""
Get save image name from source image path.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
return os.path.join(output_dir, "{}".format(name)) + ext
def _get_infer_cfg_and_input_spec(self, save_dir, prune_input=True):
image_shape = None
im_shape = [None, 2]
scale_factor = [None, 2]
if self.cfg.architecture in MOT_ARCH:
test_reader_name = 'TestMOTReader'
else:
test_reader_name = 'TestReader'
if 'inputs_def' in self.cfg[test_reader_name]:
inputs_def = self.cfg[test_reader_name]['inputs_def']
image_shape = inputs_def.get('image_shape', None)
# set image_shape=[None, 3, -1, -1] as default
if image_shape is None:
image_shape = [None, 3, -1, -1]
if len(image_shape) == 3:
image_shape = [None] + image_shape
else:
im_shape = [image_shape[0], 2]
scale_factor = [image_shape[0], 2]
if hasattr(self.model, 'deploy'):
self.model.deploy = True
if hasattr(self.model, 'fuse_norm'):
self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize',
False)
# Save infer cfg
_dump_infer_config(self.cfg,
os.path.join(save_dir, 'infer_cfg.yml'),
image_shape, self.model)
input_spec = [{
"image": InputSpec(
shape=image_shape, name='image'),
"im_shape": InputSpec(
shape=im_shape, name='im_shape'),
"scale_factor": InputSpec(
shape=scale_factor, name='scale_factor')
}]
if self.cfg.architecture == 'DeepSORT':
input_spec[0].update({
"crops": InputSpec(
shape=[None, 3, 192, 64], name='crops')
})
if prune_input:
static_model = paddle.jit.to_static(
self.model, input_spec=input_spec)
# NOTE: dy2st do not pruned program, but jit.save will prune program
# input spec, prune input spec here and save with pruned input spec
pruned_input_spec = _prune_input_spec(
input_spec, static_model.forward.main_program,
static_model.forward.outputs)
else:
static_model = None
pruned_input_spec = input_spec
# TODO: Hard code, delete it when support prune input_spec.
if self.cfg.architecture == 'PicoDet':
pruned_input_spec = [{
"image": InputSpec(
shape=image_shape, name='image')
}]
return static_model, pruned_input_spec
def export(self, output_dir='output_inference'):
self.model.eval()
model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
save_dir = os.path.join(output_dir, model_name)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
static_model, pruned_input_spec = self._get_infer_cfg_and_input_spec(
save_dir)
# dy2st and save model
if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
paddle.jit.save(
static_model,
os.path.join(save_dir, 'model'),
input_spec=pruned_input_spec)
else:
self.cfg.slim.save_quantized_model(
self.model,
os.path.join(save_dir, 'model'),
input_spec=pruned_input_spec)
logger.info("Export model and saved in {}".format(save_dir))
def post_quant(self, output_dir='output_inference'):
model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
save_dir = os.path.join(output_dir, model_name)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for idx, data in enumerate(self.loader):
self.model(data)
if idx == int(self.cfg.get('quant_batch_num', 10)):
break
# TODO: support prune input_spec
_, pruned_input_spec = self._get_infer_cfg_and_input_spec(
save_dir, prune_input=False)
self.cfg.slim.save_quantized_model(
self.model,
os.path.join(save_dir, 'model'),
input_spec=pruned_input_spec)
logger.info("Export Post-Quant model and saved in {}".format(save_dir))
def _flops(self, loader):
self.model.eval()
try:
import paddleslim
except Exception as e:
logger.warning(
'Unable to calculate flops, please install paddleslim, for example: `pip install paddleslim`'
)
return
from paddleslim.analysis import dygraph_flops as flops
input_data = None
for data in loader:
input_data = data
break
input_spec = [{
"image": input_data['image'][0].unsqueeze(0),
"im_shape": input_data['im_shape'][0].unsqueeze(0),
"scale_factor": input_data['scale_factor'][0].unsqueeze(0)
}]
flops = flops(self.model, input_spec) / (1000**3)
logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format(
flops, input_data['image'][0].unsqueeze(0).shape))

@ -0,0 +1,29 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import metrics
from . import keypoint_metrics
from .metrics import *
from .keypoint_metrics import *
__all__ = metrics.__all__ + keypoint_metrics.__all__
from . import mot_metrics
from .mot_metrics import *
__all__ = metrics.__all__ + mot_metrics.__all__
from . import mcmot_metrics
from .mcmot_metrics import *
__all__ = metrics.__all__ + mcmot_metrics.__all__

@ -0,0 +1,184 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import numpy as np
import itertools
from paddlers.models.ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res
from paddlers.models.ppdet.metrics.map_utils import draw_pr_curve
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
def get_infer_results(outs, catid, bias=0):
"""
Get result at the stage of inference.
The output format is dictionary containing bbox or mask result.
For example, bbox result is a list and each element contains
image_id, category_id, bbox and score.
"""
if outs is None or len(outs) == 0:
raise ValueError(
'The number of valid detection result if zero. Please use reasonable model and check input data.'
)
im_id = outs['im_id']
infer_res = {}
if 'bbox' in outs:
if len(outs['bbox']) > 0 and len(outs['bbox'][0]) > 6:
infer_res['bbox'] = get_det_poly_res(
outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
else:
infer_res['bbox'] = get_det_res(
outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
if 'mask' in outs:
# mask post process
infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'],
outs['bbox_num'], im_id, catid)
if 'segm' in outs:
infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid)
if 'keypoint' in outs:
infer_res['keypoint'] = get_keypoint_res(outs, im_id)
outs['bbox_num'] = [len(infer_res['keypoint'])]
return infer_res
def cocoapi_eval(jsonfile,
style,
coco_gt=None,
anno_file=None,
max_dets=(100, 300, 1000),
classwise=False,
sigmas=None,
use_area=True):
"""
Args:
jsonfile (str): Evaluation json file, eg: bbox.json, mask.json.
style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`.
coco_gt (str): Whether to load COCOAPI through anno_file,
eg: coco_gt = COCO(anno_file)
anno_file (str): COCO annotations file.
max_dets (tuple): COCO evaluation maxDets.
classwise (bool): Whether per-category AP and draw P-R Curve or not.
sigmas (nparray): keypoint labelling sigmas.
use_area (bool): If gt annotations (eg. CrowdPose, AIC)
do not have 'area', please set use_area=False.
"""
assert coco_gt != None or anno_file != None
if style == 'keypoints_crowd':
#please install xtcocotools==1.6
from xtcocotools.coco import COCO
from xtcocotools.cocoeval import COCOeval
else:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
if coco_gt == None:
coco_gt = COCO(anno_file)
logger.info("Start evaluate...")
coco_dt = coco_gt.loadRes(jsonfile)
if style == 'proposal':
coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
coco_eval.params.useCats = 0
coco_eval.params.maxDets = list(max_dets)
elif style == 'keypoints_crowd':
coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area)
else:
coco_eval = COCOeval(coco_gt, coco_dt, style)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
if classwise:
# Compute per-category AP and PR curve
try:
from terminaltables import AsciiTable
except Exception as e:
logger.error(
'terminaltables not found, plaese install terminaltables. '
'for example: `pip install terminaltables`.')
raise e
precisions = coco_eval.eval['precision']
cat_ids = coco_gt.getCatIds()
# precision: (iou, recall, cls, area range, max dets)
assert len(cat_ids) == precisions.shape[2]
results_per_category = []
for idx, catId in enumerate(cat_ids):
# area range index 0: all area ranges
# max dets index -1: typically 100 per image
nm = coco_gt.loadCats(catId)[0]
precision = precisions[:, :, idx, 0, -1]
precision = precision[precision > -1]
if precision.size:
ap = np.mean(precision)
else:
ap = float('nan')
results_per_category.append(
(str(nm["name"]), '{:0.3f}'.format(float(ap))))
pr_array = precisions[0, :, idx, 0, 2]
recall_array = np.arange(0.0, 1.01, 0.01)
draw_pr_curve(
pr_array,
recall_array,
out_dir=style + '_pr_curve',
file_name='{}_precision_recall_curve.jpg'.format(nm["name"]))
num_columns = min(6, len(results_per_category) * 2)
results_flatten = list(itertools.chain(*results_per_category))
headers = ['category', 'AP'] * (num_columns // 2)
results_2d = itertools.zip_longest(
*[results_flatten[i::num_columns] for i in range(num_columns)])
table_data = [headers]
table_data += [result for result in results_2d]
table = AsciiTable(table_data)
logger.info('Per-category of {} AP: \n{}'.format(style, table.table))
logger.info("per-category PR curve has output to {} folder.".format(
style + '_pr_curve'))
# flush coco evaluation result
sys.stdout.flush()
return coco_eval.stats
def json_eval_results(metric, json_directory, dataset):
"""
cocoapi eval with already exists proposal.json, bbox.json or mask.json
"""
assert metric == 'COCO'
anno_file = dataset.get_anno()
json_file_list = ['proposal.json', 'bbox.json', 'mask.json']
if json_directory:
assert os.path.exists(
json_directory), "The json directory:{} does not exist".format(
json_directory)
for k, v in enumerate(json_file_list):
json_file_list[k] = os.path.join(str(json_directory), v)
coco_eval_style = ['proposal', 'bbox', 'segm']
for i, v_json in enumerate(json_file_list):
if os.path.exists(v_json):
cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file)
else:
logger.info("{} not exists!".format(v_json))

@ -0,0 +1,149 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import numpy as np
def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
det_res = []
k = 0
for i in range(len(bbox_nums)):
cur_image_id = int(image_id[i][0])
det_nums = bbox_nums[i]
for j in range(det_nums):
dt = bboxes[k]
k = k + 1
num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
if int(num_id) < 0:
continue
category_id = label_to_cat_id_map[int(num_id)]
w = xmax - xmin + bias
h = ymax - ymin + bias
bbox = [xmin, ymin, w, h]
dt_res = {
'image_id': cur_image_id,
'category_id': category_id,
'bbox': bbox,
'score': score
}
det_res.append(dt_res)
return det_res
def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
det_res = []
k = 0
for i in range(len(bbox_nums)):
cur_image_id = int(image_id[i][0])
det_nums = bbox_nums[i]
for j in range(det_nums):
dt = bboxes[k]
k = k + 1
num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist()
if int(num_id) < 0:
continue
category_id = label_to_cat_id_map[int(num_id)]
rbox = [x1, y1, x2, y2, x3, y3, x4, y4]
dt_res = {
'image_id': cur_image_id,
'category_id': category_id,
'bbox': rbox,
'score': score
}
det_res.append(dt_res)
return det_res
def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
import pycocotools.mask as mask_util
seg_res = []
k = 0
for i in range(len(mask_nums)):
cur_image_id = int(image_id[i][0])
det_nums = mask_nums[i]
for j in range(det_nums):
mask = masks[k].astype(np.uint8)
score = float(bboxes[k][1])
label = int(bboxes[k][0])
k = k + 1
if label == -1:
continue
cat_id = label_to_cat_id_map[label]
rle = mask_util.encode(
np.array(
mask[:, :, None], order="F", dtype="uint8"))[0]
if six.PY3:
if 'counts' in rle:
rle['counts'] = rle['counts'].decode("utf8")
sg_res = {
'image_id': cur_image_id,
'category_id': cat_id,
'segmentation': rle,
'score': score
}
seg_res.append(sg_res)
return seg_res
def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map):
import pycocotools.mask as mask_util
segm_res = []
# for each batch
segms = results['segm'].astype(np.uint8)
clsid_labels = results['cate_label']
clsid_scores = results['cate_score']
lengths = segms.shape[0]
im_id = int(image_id[0][0])
if lengths == 0 or segms is None:
return None
# for each sample
for i in range(lengths - 1):
clsid = int(clsid_labels[i])
catid = num_id_to_cat_id_map[clsid]
score = float(clsid_scores[i])
mask = segms[i]
segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
segm['counts'] = segm['counts'].decode('utf8')
coco_res = {
'image_id': im_id,
'category_id': catid,
'segmentation': segm,
'score': score
}
segm_res.append(coco_res)
return segm_res
def get_keypoint_res(results, im_id):
anns = []
preds = results['keypoint']
for idx in range(im_id.shape[0]):
image_id = im_id[idx].item()
kpts, scores = preds[idx]
for kpt, score in zip(kpts, scores):
kpt = kpt.flatten()
ann = {
'image_id': image_id,
'category_id': 1, # XXX hard code
'keypoints': kpt.tolist(),
'score': float(score)
}
x = kpt[0::3]
y = kpt[1::3]
x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(
y).item(), np.max(y).item()
ann['area'] = (x1 - x0) * (y1 - y0)
ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
anns.append(ann)
return anns

@ -0,0 +1,401 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
from collections import defaultdict, OrderedDict
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from ..modeling.keypoint_utils import oks_nms
from scipy.io import loadmat, savemat
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
class KeyPointTopDownCOCOEval(object):
"""refer to
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
Copyright (c) Microsoft, under the MIT License.
"""
def __init__(self,
anno_file,
num_samples,
num_joints,
output_eval,
iou_type='keypoints',
in_vis_thre=0.2,
oks_thre=0.9,
save_prediction_only=False):
super(KeyPointTopDownCOCOEval, self).__init__()
self.coco = COCO(anno_file)
self.num_samples = num_samples
self.num_joints = num_joints
self.iou_type = iou_type
self.in_vis_thre = in_vis_thre
self.oks_thre = oks_thre
self.output_eval = output_eval
self.res_file = os.path.join(output_eval, "keypoints_results.json")
self.save_prediction_only = save_prediction_only
self.reset()
def reset(self):
self.results = {
'all_preds': np.zeros(
(self.num_samples, self.num_joints, 3), dtype=np.float32),
'all_boxes': np.zeros((self.num_samples, 6)),
'image_path': []
}
self.eval_results = {}
self.idx = 0
def update(self, inputs, outputs):
kpts, _ = outputs['keypoint'][0]
num_images = inputs['image'].shape[0]
self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
3] = kpts[:, :, 0:3]
self.results['all_boxes'][self.idx:self.idx + num_images, 0:
2] = inputs['center'].numpy()[:, 0:2]
self.results['all_boxes'][self.idx:self.idx + num_images, 2:
4] = inputs['scale'].numpy()[:, 0:2]
self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
inputs['scale'].numpy() * 200, 1)
self.results['all_boxes'][self.idx:self.idx + num_images,
5] = np.squeeze(inputs['score'].numpy())
self.results['image_path'].extend(inputs['im_id'].numpy())
self.idx += num_images
def _write_coco_keypoint_results(self, keypoints):
data_pack = [{
'cat_id': 1,
'cls': 'person',
'ann_type': 'keypoints',
'keypoints': keypoints
}]
results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
if not os.path.exists(self.output_eval):
os.makedirs(self.output_eval)
with open(self.res_file, 'w') as f:
json.dump(results, f, sort_keys=True, indent=4)
logger.info(f'The keypoint result is saved to {self.res_file}.')
try:
json.load(open(self.res_file))
except Exception:
content = []
with open(self.res_file, 'r') as f:
for line in f:
content.append(line)
content[-1] = ']'
with open(self.res_file, 'w') as f:
for c in content:
f.write(c)
def _coco_keypoint_results_one_category_kernel(self, data_pack):
cat_id = data_pack['cat_id']
keypoints = data_pack['keypoints']
cat_results = []
for img_kpts in keypoints:
if len(img_kpts) == 0:
continue
_key_points = np.array(
[img_kpts[k]['keypoints'] for k in range(len(img_kpts))])
_key_points = _key_points.reshape(_key_points.shape[0], -1)
result = [{
'image_id': img_kpts[k]['image'],
'category_id': cat_id,
'keypoints': _key_points[k].tolist(),
'score': img_kpts[k]['score'],
'center': list(img_kpts[k]['center']),
'scale': list(img_kpts[k]['scale'])
} for k in range(len(img_kpts))]
cat_results.extend(result)
return cat_results
def get_final_results(self, preds, all_boxes, img_path):
_kpts = []
for idx, kpt in enumerate(preds):
_kpts.append({
'keypoints': kpt,
'center': all_boxes[idx][0:2],
'scale': all_boxes[idx][2:4],
'area': all_boxes[idx][4],
'score': all_boxes[idx][5],
'image': int(img_path[idx])
})
# image x person x (keypoints)
kpts = defaultdict(list)
for kpt in _kpts:
kpts[kpt['image']].append(kpt)
# rescoring and oks nms
num_joints = preds.shape[1]
in_vis_thre = self.in_vis_thre
oks_thre = self.oks_thre
oks_nmsed_kpts = []
for img in kpts.keys():
img_kpts = kpts[img]
for n_p in img_kpts:
box_score = n_p['score']
kpt_score = 0
valid_num = 0
for n_jt in range(0, num_joints):
t_s = n_p['keypoints'][n_jt][2]
if t_s > in_vis_thre:
kpt_score = kpt_score + t_s
valid_num = valid_num + 1
if valid_num != 0:
kpt_score = kpt_score / valid_num
# rescoring
n_p['score'] = kpt_score * box_score
keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
oks_thre)
if len(keep) == 0:
oks_nmsed_kpts.append(img_kpts)
else:
oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
self._write_coco_keypoint_results(oks_nmsed_kpts)
def accumulate(self):
self.get_final_results(self.results['all_preds'],
self.results['all_boxes'],
self.results['image_path'])
if self.save_prediction_only:
logger.info(f'The keypoint result is saved to {self.res_file} '
'and do not evaluate the mAP.')
return
coco_dt = self.coco.loadRes(self.res_file)
coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
coco_eval.params.useSegm = None
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
keypoint_stats = []
for ind in range(len(coco_eval.stats)):
keypoint_stats.append((coco_eval.stats[ind]))
self.eval_results['keypoint'] = keypoint_stats
def log(self):
if self.save_prediction_only:
return
stats_names = [
'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
'AR .75', 'AR (M)', 'AR (L)'
]
num_values = len(stats_names)
print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
print('|---' * (num_values + 1) + '|')
print(' '.join([
'| {:.3f}'.format(value) for value in self.eval_results['keypoint']
]) + ' |')
def get_results(self):
return self.eval_results
class KeyPointTopDownMPIIEval(object):
def __init__(self,
anno_file,
num_samples,
num_joints,
output_eval,
oks_thre=0.9,
save_prediction_only=False):
super(KeyPointTopDownMPIIEval, self).__init__()
self.ann_file = anno_file
self.res_file = os.path.join(output_eval, "keypoints_results.json")
self.save_prediction_only = save_prediction_only
self.reset()
def reset(self):
self.results = []
self.eval_results = {}
self.idx = 0
def update(self, inputs, outputs):
kpts, _ = outputs['keypoint'][0]
num_images = inputs['image'].shape[0]
results = {}
results['preds'] = kpts[:, :, 0:3]
results['boxes'] = np.zeros((num_images, 6))
results['boxes'][:, 0:2] = inputs['center'].numpy()[:, 0:2]
results['boxes'][:, 2:4] = inputs['scale'].numpy()[:, 0:2]
results['boxes'][:, 4] = np.prod(inputs['scale'].numpy() * 200, 1)
results['boxes'][:, 5] = np.squeeze(inputs['score'].numpy())
results['image_path'] = inputs['image_file']
self.results.append(results)
def accumulate(self):
self._mpii_keypoint_results_save()
if self.save_prediction_only:
logger.info(f'The keypoint result is saved to {self.res_file} '
'and do not evaluate the mAP.')
return
self.eval_results = self.evaluate(self.results)
def _mpii_keypoint_results_save(self):
results = []
for res in self.results:
if len(res) == 0:
continue
result = [{
'preds': res['preds'][k].tolist(),
'boxes': res['boxes'][k].tolist(),
'image_path': res['image_path'][k],
} for k in range(len(res))]
results.extend(result)
with open(self.res_file, 'w') as f:
json.dump(results, f, sort_keys=True, indent=4)
logger.info(f'The keypoint result is saved to {self.res_file}.')
def log(self):
if self.save_prediction_only:
return
for item, value in self.eval_results.items():
print("{} : {}".format(item, value))
def get_results(self):
return self.eval_results
def evaluate(self, outputs, savepath=None):
"""Evaluate PCKh for MPII dataset. refer to
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
Copyright (c) Microsoft, under the MIT License.
Args:
outputs(list(preds, boxes)):
* preds (np.ndarray[N,K,3]): The first two dimensions are
coordinates, score is the third dimension of the array.
* boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
, scale[1],area, score]
Returns:
dict: PCKh for each joint
"""
kpts = []
for output in outputs:
preds = output['preds']
batch_size = preds.shape[0]
for i in range(batch_size):
kpts.append({'keypoints': preds[i]})
preds = np.stack([kpt['keypoints'] for kpt in kpts])
# convert 0-based index to 1-based index,
# and get the first two dimensions.
preds = preds[..., :2] + 1.0
if savepath is not None:
pred_file = os.path.join(savepath, 'pred.mat')
savemat(pred_file, mdict={'preds': preds})
SC_BIAS = 0.6
threshold = 0.5
gt_file = os.path.join(
os.path.dirname(self.ann_file), 'mpii_gt_val.mat')
gt_dict = loadmat(gt_file)
dataset_joints = gt_dict['dataset_joints']
jnt_missing = gt_dict['jnt_missing']
pos_gt_src = gt_dict['pos_gt_src']
headboxes_src = gt_dict['headboxes_src']
pos_pred_src = np.transpose(preds, [1, 2, 0])
head = np.where(dataset_joints == 'head')[1][0]
lsho = np.where(dataset_joints == 'lsho')[1][0]
lelb = np.where(dataset_joints == 'lelb')[1][0]
lwri = np.where(dataset_joints == 'lwri')[1][0]
lhip = np.where(dataset_joints == 'lhip')[1][0]
lkne = np.where(dataset_joints == 'lkne')[1][0]
lank = np.where(dataset_joints == 'lank')[1][0]
rsho = np.where(dataset_joints == 'rsho')[1][0]
relb = np.where(dataset_joints == 'relb')[1][0]
rwri = np.where(dataset_joints == 'rwri')[1][0]
rkne = np.where(dataset_joints == 'rkne')[1][0]
rank = np.where(dataset_joints == 'rank')[1][0]
rhip = np.where(dataset_joints == 'rhip')[1][0]
jnt_visible = 1 - jnt_missing
uv_error = pos_pred_src - pos_gt_src
uv_err = np.linalg.norm(uv_error, axis=1)
headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
headsizes = np.linalg.norm(headsizes, axis=0)
headsizes *= SC_BIAS
scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32)
scaled_uv_err = uv_err / scale
scaled_uv_err = scaled_uv_err * jnt_visible
jnt_count = np.sum(jnt_visible, axis=1)
less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count
# save
rng = np.arange(0, 0.5 + 0.01, 0.01)
pckAll = np.zeros((len(rng), 16), dtype=np.float32)
for r, threshold in enumerate(rng):
less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
pckAll[r, :] = 100. * np.sum(less_than_threshold,
axis=1) / jnt_count
PCKh = np.ma.array(PCKh, mask=False)
PCKh.mask[6:8] = True
jnt_count = np.ma.array(jnt_count, mask=False)
jnt_count.mask[6:8] = True
jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
name_value = [ #noqa
('Head', PCKh[head]),
('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
('PCKh', np.sum(PCKh * jnt_ratio)),
('PCKh@0.1', np.sum(pckAll[11, :] * jnt_ratio))
]
name_value = OrderedDict(name_value)
return name_value
def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
"""sort kpts and remove the repeated ones."""
kpts = sorted(kpts, key=lambda x: x[key])
num = len(kpts)
for i in range(num - 1, 0, -1):
if kpts[i][key] == kpts[i - 1][key]:
del kpts[i]
return kpts

@ -0,0 +1,444 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import os
import sys
import numpy as np
import itertools
import paddle
from paddlers.models.ppdet.modeling.bbox_utils import poly2rbox, rbox2poly_np
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'draw_pr_curve',
'bbox_area',
'jaccard_overlap',
'prune_zero_padding',
'DetectionMAP',
'ap_per_class',
'compute_ap',
]
def draw_pr_curve(precision,
recall,
iou=0.5,
out_dir='pr_curve',
file_name='precision_recall_curve.jpg'):
if not os.path.exists(out_dir):
os.makedirs(out_dir)
output_path = os.path.join(out_dir, file_name)
try:
import matplotlib.pyplot as plt
except Exception as e:
logger.error('Matplotlib not found, plaese install matplotlib.'
'for example: `pip install matplotlib`.')
raise e
plt.cla()
plt.figure('P-R Curve')
plt.title('Precision/Recall Curve(IoU={})'.format(iou))
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.grid(True)
plt.plot(recall, precision)
plt.savefig(output_path)
def bbox_area(bbox, is_bbox_normalized):
"""
Calculate area of a bounding box
"""
norm = 1. - float(is_bbox_normalized)
width = bbox[2] - bbox[0] + norm
height = bbox[3] - bbox[1] + norm
return width * height
def jaccard_overlap(pred, gt, is_bbox_normalized=False):
"""
Calculate jaccard overlap ratio between two bounding box
"""
if pred[0] >= gt[2] or pred[2] <= gt[0] or \
pred[1] >= gt[3] or pred[3] <= gt[1]:
return 0.
inter_xmin = max(pred[0], gt[0])
inter_ymin = max(pred[1], gt[1])
inter_xmax = min(pred[2], gt[2])
inter_ymax = min(pred[3], gt[3])
inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax],
is_bbox_normalized)
pred_size = bbox_area(pred, is_bbox_normalized)
gt_size = bbox_area(gt, is_bbox_normalized)
overlap = float(inter_size) / (pred_size + gt_size - inter_size)
return overlap
def calc_rbox_iou(pred, gt_rbox):
"""
calc iou between rotated bbox
"""
# calc iou of bounding box for speedup
pred = np.array(pred, np.float32).reshape(-1, 8)
pred = pred.reshape(-1, 2)
gt_poly = rbox2poly_np(np.array(gt_rbox).reshape(-1, 5))[0]
gt_poly = gt_poly.reshape(-1, 2)
pred_rect = [
np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
np.max(pred[:, 1])
]
gt_rect = [
np.min(gt_poly[:, 0]), np.min(gt_poly[:, 1]), np.max(gt_poly[:, 0]),
np.max(gt_poly[:, 1])
]
iou = jaccard_overlap(pred_rect, gt_rect, False)
if iou <= 0:
return iou
# calc rbox iou
pred = pred.reshape(-1, 8)
pred = np.array(pred, np.float32).reshape(-1, 8)
pred_rbox = poly2rbox(pred)
pred_rbox = pred_rbox.reshape(-1, 5)
pred_rbox = pred_rbox.reshape(-1, 5)
try:
from rbox_iou_ops import rbox_iou
except Exception as e:
print("import custom_ops error, try install rbox_iou_ops " \
"following ppdet/ext_op/README.md", e)
sys.stdout.flush()
sys.exit(-1)
gt_rbox = np.array(gt_rbox, np.float32).reshape(-1, 5)
pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
iou = iou.numpy()
return iou[0][0]
def prune_zero_padding(gt_box, gt_label, difficult=None):
valid_cnt = 0
for i in range(len(gt_box)):
if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
break
valid_cnt += 1
return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
if difficult is not None else None)
class DetectionMAP(object):
"""
Calculate detection mean average precision.
Currently support two types: 11point and integral
Args:
class_num (int): The class number.
overlap_thresh (float): The threshold of overlap
ratio between prediction bounding box and
ground truth bounding box for deciding
true/false positive. Default 0.5.
map_type (str): Calculation method of mean average
precision, currently support '11point' and
'integral'. Default '11point'.
is_bbox_normalized (bool): Whether bounding boxes
is normalized to range[0, 1]. Default False.
evaluate_difficult (bool): Whether to evaluate
difficult bounding boxes. Default False.
catid2name (dict): Mapping between category id and category name.
classwise (bool): Whether per-category AP and draw
P-R Curve or not.
"""
def __init__(self,
class_num,
overlap_thresh=0.5,
map_type='11point',
is_bbox_normalized=False,
evaluate_difficult=False,
catid2name=None,
classwise=False):
self.class_num = class_num
self.overlap_thresh = overlap_thresh
assert map_type in ['11point', 'integral'], \
"map_type currently only support '11point' "\
"and 'integral'"
self.map_type = map_type
self.is_bbox_normalized = is_bbox_normalized
self.evaluate_difficult = evaluate_difficult
self.classwise = classwise
self.classes = []
for cname in catid2name.values():
self.classes.append(cname)
self.reset()
def update(self, bbox, score, label, gt_box, gt_label, difficult=None):
"""
Update metric statics from given prediction and ground
truth infomations.
"""
if difficult is None:
difficult = np.zeros_like(gt_label)
# record class gt count
for gtl, diff in zip(gt_label, difficult):
if self.evaluate_difficult or int(diff) == 0:
self.class_gt_counts[int(np.array(gtl))] += 1
# record class score positive
visited = [False] * len(gt_label)
for b, s, l in zip(bbox, score, label):
pred = b.tolist() if isinstance(b, np.ndarray) else b
max_idx = -1
max_overlap = -1.0
for i, gl in enumerate(gt_label):
if int(gl) == int(l):
if len(gt_box[i]) == 5:
overlap = calc_rbox_iou(pred, gt_box[i])
else:
overlap = jaccard_overlap(pred, gt_box[i],
self.is_bbox_normalized)
if overlap > max_overlap:
max_overlap = overlap
max_idx = i
if max_overlap > self.overlap_thresh:
if self.evaluate_difficult or \
int(np.array(difficult[max_idx])) == 0:
if not visited[max_idx]:
self.class_score_poss[int(l)].append([s, 1.0])
visited[max_idx] = True
else:
self.class_score_poss[int(l)].append([s, 0.0])
else:
self.class_score_poss[int(l)].append([s, 0.0])
def reset(self):
"""
Reset metric statics
"""
self.class_score_poss = [[] for _ in range(self.class_num)]
self.class_gt_counts = [0] * self.class_num
self.mAP = 0.0
def accumulate(self):
"""
Accumulate metric results and calculate mAP
"""
mAP = 0.
valid_cnt = 0
eval_results = []
for score_pos, count in zip(self.class_score_poss,
self.class_gt_counts):
if count == 0: continue
if len(score_pos) == 0:
valid_cnt += 1
continue
accum_tp_list, accum_fp_list = \
self._get_tp_fp_accum(score_pos)
precision = []
recall = []
for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list):
precision.append(float(ac_tp) / (ac_tp + ac_fp))
recall.append(float(ac_tp) / count)
one_class_ap = 0.0
if self.map_type == '11point':
max_precisions = [0.] * 11
start_idx = len(precision) - 1
for j in range(10, -1, -1):
for i in range(start_idx, -1, -1):
if recall[i] < float(j) / 10.:
start_idx = i
if j > 0:
max_precisions[j - 1] = max_precisions[j]
break
else:
if max_precisions[j] < precision[i]:
max_precisions[j] = precision[i]
one_class_ap = sum(max_precisions) / 11.
mAP += one_class_ap
valid_cnt += 1
elif self.map_type == 'integral':
import math
prev_recall = 0.
for i in range(len(precision)):
recall_gap = math.fabs(recall[i] - prev_recall)
if recall_gap > 1e-6:
one_class_ap += precision[i] * recall_gap
prev_recall = recall[i]
mAP += one_class_ap
valid_cnt += 1
else:
logger.error("Unspported mAP type {}".format(self.map_type))
sys.exit(1)
eval_results.append({
'class': self.classes[valid_cnt - 1],
'ap': one_class_ap,
'precision': precision,
'recall': recall,
})
self.eval_results = eval_results
self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP
def get_map(self):
"""
Get mAP result
"""
if self.mAP is None:
logger.error("mAP is not calculated.")
if self.classwise:
# Compute per-category AP and PR curve
try:
from terminaltables import AsciiTable
except Exception as e:
logger.error(
'terminaltables not found, plaese install terminaltables. '
'for example: `pip install terminaltables`.')
raise e
results_per_category = []
for eval_result in self.eval_results:
results_per_category.append(
(str(eval_result['class']),
'{:0.3f}'.format(float(eval_result['ap']))))
draw_pr_curve(
eval_result['precision'],
eval_result['recall'],
out_dir='voc_pr_curve',
file_name='{}_precision_recall_curve.jpg'.format(
eval_result['class']))
num_columns = min(6, len(results_per_category) * 2)
results_flatten = list(itertools.chain(*results_per_category))
headers = ['category', 'AP'] * (num_columns // 2)
results_2d = itertools.zip_longest(*[
results_flatten[i::num_columns] for i in range(num_columns)
])
table_data = [headers]
table_data += [result for result in results_2d]
table = AsciiTable(table_data)
logger.info('Per-category of VOC AP: \n{}'.format(table.table))
logger.info(
"per-category PR curve has output to voc_pr_curve folder.")
return self.mAP
def _get_tp_fp_accum(self, score_pos_list):
"""
Calculate accumulating true/false positive results from
[score, pos] records
"""
sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True)
accum_tp = 0
accum_fp = 0
accum_tp_list = []
accum_fp_list = []
for (score, pos) in sorted_list:
accum_tp += int(pos)
accum_tp_list.append(accum_tp)
accum_fp += 1 - int(pos)
accum_fp_list.append(accum_fp)
return accum_tp_list, accum_fp_list
def ap_per_class(tp, conf, pred_cls, target_cls):
"""
Computes the average precision, given the recall and precision curves.
Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
Args:
tp (list): True positives.
conf (list): Objectness value from 0-1.
pred_cls (list): Predicted object classes.
target_cls (list): Target object classes.
"""
tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(
pred_cls), np.array(target_cls)
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in unique_classes:
i = pred_cls == c
n_gt = sum(target_cls == c) # Number of ground truth objects
n_p = sum(i) # Number of predicted objects
if (n_p == 0) and (n_gt == 0):
continue
elif (n_p == 0) or (n_gt == 0):
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = np.cumsum(1 - tp[i])
tpc = np.cumsum(tp[i])
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(tpc[-1] / (n_gt + 1e-16))
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(
p)
def compute_ap(recall, precision):
"""
Computes the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
Args:
recall (list): The recall curve.
precision (list): The precision curve.
Returns:
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap

@ -0,0 +1,470 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import copy
import sys
import math
from collections import defaultdict
from motmetrics.math_util import quiet_divide
import numpy as np
import pandas as pd
import paddle
import paddle.nn.functional as F
from .metrics import Metric
import motmetrics as mm
import openpyxl
metrics = mm.metrics.motchallenge_metrics
mh = mm.metrics.create()
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['MCMOTEvaluator', 'MCMOTMetric']
METRICS_LIST = [
'num_frames', 'num_matches', 'num_switches', 'num_transfer', 'num_ascend',
'num_migrate', 'num_false_positives', 'num_misses', 'num_detections',
'num_objects', 'num_predictions', 'num_unique_objects', 'mostly_tracked',
'partially_tracked', 'mostly_lost', 'num_fragmentations', 'motp', 'mota',
'precision', 'recall', 'idfp', 'idfn', 'idtp', 'idp', 'idr', 'idf1'
]
NAME_MAP = {
'num_frames': 'num_frames',
'num_matches': 'num_matches',
'num_switches': 'IDs',
'num_transfer': 'IDt',
'num_ascend': 'IDa',
'num_migrate': 'IDm',
'num_false_positives': 'FP',
'num_misses': 'FN',
'num_detections': 'num_detections',
'num_objects': 'num_objects',
'num_predictions': 'num_predictions',
'num_unique_objects': 'GT',
'mostly_tracked': 'MT',
'partially_tracked': 'partially_tracked',
'mostly_lost': 'ML',
'num_fragmentations': 'FM',
'motp': 'MOTP',
'mota': 'MOTA',
'precision': 'Prcn',
'recall': 'Rcll',
'idfp': 'idfp',
'idfn': 'idfn',
'idtp': 'idtp',
'idp': 'IDP',
'idr': 'IDR',
'idf1': 'IDF1'
}
def parse_accs_metrics(seq_acc, index_name, verbose=False):
"""
Parse the evaluation indicators of multiple MOTAccumulator
"""
mh = mm.metrics.create()
summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
summary.loc['OVERALL', 'motp'] = (summary['motp'] * summary['num_detections']).sum() / \
summary.loc['OVERALL', 'num_detections']
if verbose:
strsummary = mm.io.render_summary(
summary, formatters=mh.formatters, namemap=NAME_MAP)
print(strsummary)
return summary
def seqs_overall_metrics(summary_df, verbose=False):
"""
Calculate overall metrics for multiple sequences
"""
add_col = [
'num_frames', 'num_matches', 'num_switches', 'num_transfer',
'num_ascend', 'num_migrate', 'num_false_positives', 'num_misses',
'num_detections', 'num_objects', 'num_predictions',
'num_unique_objects', 'mostly_tracked', 'partially_tracked',
'mostly_lost', 'num_fragmentations', 'idfp', 'idfn', 'idtp'
]
calc_col = ['motp', 'mota', 'precision', 'recall', 'idp', 'idr', 'idf1']
calc_df = summary_df.copy()
overall_dic = {}
for col in add_col:
overall_dic[col] = calc_df[col].sum()
for col in calc_col:
overall_dic[col] = getattr(MCMOTMetricOverall, col + '_overall')(
calc_df, overall_dic)
overall_df = pd.DataFrame(overall_dic, index=['overall_calc'])
calc_df = pd.concat([calc_df, overall_df])
if verbose:
mh = mm.metrics.create()
str_calc_df = mm.io.render_summary(
calc_df, formatters=mh.formatters, namemap=NAME_MAP)
print(str_calc_df)
return calc_df
class MCMOTMetricOverall(object):
def motp_overall(summary_df, overall_dic):
motp = quiet_divide((summary_df['motp'] *
summary_df['num_detections']).sum(),
overall_dic['num_detections'])
return motp
def mota_overall(summary_df, overall_dic):
del summary_df
mota = 1. - quiet_divide(
(overall_dic['num_misses'] + overall_dic['num_switches'] +
overall_dic['num_false_positives']), overall_dic['num_objects'])
return mota
def precision_overall(summary_df, overall_dic):
del summary_df
precision = quiet_divide(overall_dic['num_detections'], (
overall_dic['num_false_positives'] + overall_dic['num_detections']
))
return precision
def recall_overall(summary_df, overall_dic):
del summary_df
recall = quiet_divide(overall_dic['num_detections'],
overall_dic['num_objects'])
return recall
def idp_overall(summary_df, overall_dic):
del summary_df
idp = quiet_divide(overall_dic['idtp'],
(overall_dic['idtp'] + overall_dic['idfp']))
return idp
def idr_overall(summary_df, overall_dic):
del summary_df
idr = quiet_divide(overall_dic['idtp'],
(overall_dic['idtp'] + overall_dic['idfn']))
return idr
def idf1_overall(summary_df, overall_dic):
del summary_df
idf1 = quiet_divide(2. * overall_dic['idtp'], (
overall_dic['num_objects'] + overall_dic['num_predictions']))
return idf1
def read_mcmot_results_union(filename, is_gt, is_ignore):
results_dict = dict()
if os.path.isfile(filename):
all_result = np.loadtxt(filename, delimiter=',')
if all_result.shape[0] == 0 or all_result.shape[1] < 7:
return results_dict
if is_ignore:
return results_dict
if is_gt:
# only for test use
all_result = all_result[all_result[:, 7] != 0]
all_result[:, 7] = all_result[:, 7] - 1
if all_result.shape[0] == 0:
return results_dict
class_unique = np.unique(all_result[:, 7])
last_max_id = 0
result_cls_list = []
for cls in class_unique:
result_cls_split = all_result[all_result[:, 7] == cls]
result_cls_split[:, 1] = result_cls_split[:, 1] + last_max_id
# make sure track id different between every category
last_max_id = max(np.unique(result_cls_split[:, 1])) + 1
result_cls_list.append(result_cls_split)
results_con = np.concatenate(result_cls_list)
for line in range(len(results_con)):
linelist = results_con[line]
fid = int(linelist[0])
if fid < 1:
continue
results_dict.setdefault(fid, list())
if is_gt:
score = 1
else:
score = float(linelist[6])
tlwh = tuple(map(float, linelist[2:6]))
target_id = int(linelist[1])
cls = int(linelist[7])
results_dict[fid].append((tlwh, target_id, cls, score))
return results_dict
def read_mcmot_results(filename, is_gt, is_ignore):
results_dict = dict()
if os.path.isfile(filename):
with open(filename, 'r') as f:
for line in f.readlines():
linelist = line.strip().split(',')
if len(linelist) < 7:
continue
fid = int(linelist[0])
if fid < 1:
continue
cid = int(linelist[7])
if is_gt:
score = 1
# only for test use
cid -= 1
else:
score = float(linelist[6])
cls_result_dict = results_dict.setdefault(cid, dict())
cls_result_dict.setdefault(fid, list())
tlwh = tuple(map(float, linelist[2:6]))
target_id = int(linelist[1])
cls_result_dict[fid].append((tlwh, target_id, score))
return results_dict
def read_results(filename,
data_type,
is_gt=False,
is_ignore=False,
multi_class=False,
union=False):
if data_type in ['mcmot', 'lab']:
if multi_class:
if union:
# The results are evaluated by union all the categories.
# Track IDs between different categories cannot be duplicate.
read_fun = read_mcmot_results_union
else:
# The results are evaluated separately by category.
read_fun = read_mcmot_results
else:
raise ValueError('multi_class: {}, MCMOT should have cls_id.'.
format(multi_class))
else:
raise ValueError('Unknown data type: {}'.format(data_type))
return read_fun(filename, is_gt, is_ignore)
def unzip_objs(objs):
if len(objs) > 0:
tlwhs, ids, scores = zip(*objs)
else:
tlwhs, ids, scores = [], [], []
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
return tlwhs, ids, scores
def unzip_objs_cls(objs):
if len(objs) > 0:
tlwhs, ids, cls, scores = zip(*objs)
else:
tlwhs, ids, cls, scores = [], [], [], []
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
ids = np.array(ids)
cls = np.array(cls)
scores = np.array(scores)
return tlwhs, ids, cls, scores
class MCMOTEvaluator(object):
def __init__(self, data_root, seq_name, data_type, num_classes):
self.data_root = data_root
self.seq_name = seq_name
self.data_type = data_type
self.num_classes = num_classes
self.load_annotations()
self.reset_accumulator()
self.class_accs = []
def load_annotations(self):
assert self.data_type == 'mcmot'
self.gt_filename = os.path.join(self.data_root, '../', '../',
'sequences',
'{}.txt'.format(self.seq_name))
def reset_accumulator(self):
import motmetrics as mm
mm.lap.default_solver = 'lap'
self.acc = mm.MOTAccumulator(auto_id=True)
def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False,
union=False):
import motmetrics as mm
mm.lap.default_solver = 'lap'
if union:
trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
# get distance matrix
iou_distance = mm.distances.iou_matrix(
gt_tlwhs, trk_tlwhs, max_iou=0.5)
# Set the distance between objects of different categories to nan
gt_cls_len = len(gt_cls)
trk_cls_len = len(trk_cls)
# When the number of GT or Trk is 0, iou_distance dimension is (0,0)
if gt_cls_len != 0 and trk_cls_len != 0:
gt_cls = gt_cls.reshape(gt_cls_len, 1)
gt_cls = np.repeat(gt_cls, trk_cls_len, axis=1)
trk_cls = trk_cls.reshape(1, trk_cls_len)
trk_cls = np.repeat(trk_cls, gt_cls_len, axis=0)
iou_distance = np.where(gt_cls == trk_cls, iou_distance,
np.nan)
else:
trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
# get distance matrix
iou_distance = mm.distances.iou_matrix(
gt_tlwhs, trk_tlwhs, max_iou=0.5)
self.acc.update(gt_ids, trk_ids, iou_distance)
if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
'mot_events'):
events = self.acc.mot_events # only supported by https://github.com/longcw/py-motmetrics
else:
events = None
return events
def eval_file(self, result_filename):
# evaluation of each category
gt_frame_dict = read_results(
self.gt_filename,
self.data_type,
is_gt=True,
multi_class=True,
union=False)
result_frame_dict = read_results(
result_filename,
self.data_type,
is_gt=False,
multi_class=True,
union=False)
for cid in range(self.num_classes):
self.reset_accumulator()
cls_result_frame_dict = result_frame_dict.setdefault(cid, dict())
cls_gt_frame_dict = gt_frame_dict.setdefault(cid, dict())
# only labeled frames will be evaluated
frames = sorted(list(set(cls_gt_frame_dict.keys())))
for frame_id in frames:
trk_objs = cls_result_frame_dict.get(frame_id, [])
gt_objs = cls_gt_frame_dict.get(frame_id, [])
self.eval_frame_dict(trk_objs, gt_objs, rtn_events=False)
self.class_accs.append(self.acc)
return self.class_accs
@staticmethod
def get_summary(accs,
names,
metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
'precision', 'recall')):
import motmetrics as mm
mm.lap.default_solver = 'lap'
names = copy.deepcopy(names)
if metrics is None:
metrics = mm.metrics.motchallenge_metrics
metrics = copy.deepcopy(metrics)
mh = mm.metrics.create()
summary = mh.compute_many(
accs, metrics=metrics, names=names, generate_overall=True)
return summary
@staticmethod
def save_summary(summary, filename):
import pandas as pd
writer = pd.ExcelWriter(filename)
summary.to_excel(writer)
writer.save()
class MCMOTMetric(Metric):
def __init__(self, num_classes, save_summary=False):
self.num_classes = num_classes
self.save_summary = save_summary
self.MCMOTEvaluator = MCMOTEvaluator
self.result_root = None
self.reset()
self.seqs_overall = defaultdict(list)
def reset(self):
self.accs = []
self.seqs = []
def update(self, data_root, seq, data_type, result_root, result_filename):
evaluator = self.MCMOTEvaluator(data_root, seq, data_type,
self.num_classes)
seq_acc = evaluator.eval_file(result_filename)
self.accs.append(seq_acc)
self.seqs.append(seq)
self.result_root = result_root
cls_index_name = [
'{}_{}'.format(seq, i) for i in range(self.num_classes)
]
summary = parse_accs_metrics(seq_acc, cls_index_name)
summary.rename(
index={'OVERALL': '{}_OVERALL'.format(seq)}, inplace=True)
for row in range(len(summary)):
self.seqs_overall[row].append(summary.iloc[row:row + 1])
def accumulate(self):
self.cls_summary_list = []
for row in range(self.num_classes):
seqs_cls_df = pd.concat(self.seqs_overall[row])
seqs_cls_summary = seqs_overall_metrics(seqs_cls_df)
cls_summary_overall = seqs_cls_summary.iloc[-1:].copy()
cls_summary_overall.rename(
index={'overall_calc': 'overall_calc_{}'.format(row)},
inplace=True)
self.cls_summary_list.append(cls_summary_overall)
def log(self):
seqs_summary = seqs_overall_metrics(
pd.concat(self.seqs_overall[self.num_classes]), verbose=True)
class_summary = seqs_overall_metrics(
pd.concat(self.cls_summary_list), verbose=True)
def get_results(self):
return 1

@ -0,0 +1,434 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import json
import paddle
import numpy as np
import typing
from .map_utils import prune_zero_padding, DetectionMAP
from .coco_utils import get_infer_results, cocoapi_eval
from .widerface_utils import face_eval_run
from paddlers.models.ppdet.data.source.category import get_categories
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'Metric', 'COCOMetric', 'VOCMetric', 'WiderFaceMetric',
'get_infer_results', 'RBoxMetric', 'SNIPERCOCOMetric'
]
COCO_SIGMAS = np.array([
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87,
.87, .89, .89
]) / 10.0
CROWD_SIGMAS = np.array(
[.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79,
.79]) / 10.0
class Metric(paddle.metric.Metric):
def name(self):
return self.__class__.__name__
def reset(self):
pass
def accumulate(self):
pass
# paddle.metric.Metric defined :metch:`update`, :meth:`accumulate`
# :metch:`reset`, in ppdet, we also need following 2 methods:
# abstract method for logging metric results
def log(self):
pass
# abstract method for getting metric results
def get_results(self):
pass
class COCOMetric(Metric):
def __init__(self, anno_file, **kwargs):
assert os.path.isfile(anno_file), \
"anno_file {} not a file".format(anno_file)
self.anno_file = anno_file
self.clsid2catid = kwargs.get('clsid2catid', None)
if self.clsid2catid is None:
self.clsid2catid, _ = get_categories('COCO', anno_file)
self.classwise = kwargs.get('classwise', False)
self.output_eval = kwargs.get('output_eval', None)
# TODO: bias should be unified
self.bias = kwargs.get('bias', 0)
self.save_prediction_only = kwargs.get('save_prediction_only', False)
self.iou_type = kwargs.get('IouType', 'bbox')
self.reset()
def reset(self):
# only bbox and mask evaluation support currently
self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
self.eval_results = {}
def update(self, inputs, outputs):
outs = {}
# outputs Tensor -> numpy.ndarray
for k, v in outputs.items():
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
# multi-scale inputs: all inputs have same im_id
if isinstance(inputs, typing.Sequence):
im_id = inputs[0]['im_id']
else:
im_id = inputs['im_id']
outs['im_id'] = im_id.numpy() if isinstance(im_id,
paddle.Tensor) else im_id
infer_results = get_infer_results(
outs, self.clsid2catid, bias=self.bias)
self.results['bbox'] += infer_results[
'bbox'] if 'bbox' in infer_results else []
self.results['mask'] += infer_results[
'mask'] if 'mask' in infer_results else []
self.results['segm'] += infer_results[
'segm'] if 'segm' in infer_results else []
self.results['keypoint'] += infer_results[
'keypoint'] if 'keypoint' in infer_results else []
def accumulate(self):
if len(self.results['bbox']) > 0:
output = "bbox.json"
if self.output_eval:
output = os.path.join(self.output_eval, output)
with open(output, 'w') as f:
json.dump(self.results['bbox'], f)
logger.info('The bbox result is saved to bbox.json.')
if self.save_prediction_only:
logger.info('The bbox result is saved to {} and do not '
'evaluate the mAP.'.format(output))
else:
bbox_stats = cocoapi_eval(
output,
'bbox',
anno_file=self.anno_file,
classwise=self.classwise)
self.eval_results['bbox'] = bbox_stats
sys.stdout.flush()
if len(self.results['mask']) > 0:
output = "mask.json"
if self.output_eval:
output = os.path.join(self.output_eval, output)
with open(output, 'w') as f:
json.dump(self.results['mask'], f)
logger.info('The mask result is saved to mask.json.')
if self.save_prediction_only:
logger.info('The mask result is saved to {} and do not '
'evaluate the mAP.'.format(output))
else:
seg_stats = cocoapi_eval(
output,
'segm',
anno_file=self.anno_file,
classwise=self.classwise)
self.eval_results['mask'] = seg_stats
sys.stdout.flush()
if len(self.results['segm']) > 0:
output = "segm.json"
if self.output_eval:
output = os.path.join(self.output_eval, output)
with open(output, 'w') as f:
json.dump(self.results['segm'], f)
logger.info('The segm result is saved to segm.json.')
if self.save_prediction_only:
logger.info('The segm result is saved to {} and do not '
'evaluate the mAP.'.format(output))
else:
seg_stats = cocoapi_eval(
output,
'segm',
anno_file=self.anno_file,
classwise=self.classwise)
self.eval_results['mask'] = seg_stats
sys.stdout.flush()
if len(self.results['keypoint']) > 0:
output = "keypoint.json"
if self.output_eval:
output = os.path.join(self.output_eval, output)
with open(output, 'w') as f:
json.dump(self.results['keypoint'], f)
logger.info('The keypoint result is saved to keypoint.json.')
if self.save_prediction_only:
logger.info('The keypoint result is saved to {} and do not '
'evaluate the mAP.'.format(output))
else:
style = 'keypoints'
use_area = True
sigmas = COCO_SIGMAS
if self.iou_type == 'keypoints_crowd':
style = 'keypoints_crowd'
use_area = False
sigmas = CROWD_SIGMAS
keypoint_stats = cocoapi_eval(
output,
style,
anno_file=self.anno_file,
classwise=self.classwise,
sigmas=sigmas,
use_area=use_area)
self.eval_results['keypoint'] = keypoint_stats
sys.stdout.flush()
def log(self):
pass
def get_results(self):
return self.eval_results
class VOCMetric(Metric):
def __init__(self,
label_list,
class_num=20,
overlap_thresh=0.5,
map_type='11point',
is_bbox_normalized=False,
evaluate_difficult=False,
classwise=False):
assert os.path.isfile(label_list), \
"label_list {} not a file".format(label_list)
self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
self.overlap_thresh = overlap_thresh
self.map_type = map_type
self.evaluate_difficult = evaluate_difficult
self.detection_map = DetectionMAP(
class_num=class_num,
overlap_thresh=overlap_thresh,
map_type=map_type,
is_bbox_normalized=is_bbox_normalized,
evaluate_difficult=evaluate_difficult,
catid2name=self.catid2name,
classwise=classwise)
self.reset()
def reset(self):
self.detection_map.reset()
def update(self, inputs, outputs):
bbox_np = outputs['bbox'].numpy()
bboxes = bbox_np[:, 2:]
scores = bbox_np[:, 1]
labels = bbox_np[:, 0]
bbox_lengths = outputs['bbox_num'].numpy()
if bboxes.shape == (1, 1) or bboxes is None:
return
gt_boxes = inputs['gt_bbox']
gt_labels = inputs['gt_class']
difficults = inputs['difficult'] if not self.evaluate_difficult \
else None
scale_factor = inputs['scale_factor'].numpy(
) if 'scale_factor' in inputs else np.ones(
(gt_boxes.shape[0], 2)).astype('float32')
bbox_idx = 0
for i in range(len(gt_boxes)):
gt_box = gt_boxes[i].numpy()
h, w = scale_factor[i]
gt_box = gt_box / np.array([w, h, w, h])
gt_label = gt_labels[i].numpy()
difficult = None if difficults is None \
else difficults[i].numpy()
bbox_num = bbox_lengths[i]
bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
score = scores[bbox_idx:bbox_idx + bbox_num]
label = labels[bbox_idx:bbox_idx + bbox_num]
gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label,
difficult)
self.detection_map.update(bbox, score, label, gt_box, gt_label,
difficult)
bbox_idx += bbox_num
def accumulate(self):
logger.info("Accumulating evaluatation results...")
self.detection_map.accumulate()
def log(self):
map_stat = 100. * self.detection_map.get_map()
logger.info("mAP({:.2f}, {}) = {:.2f}%".format(
self.overlap_thresh, self.map_type, map_stat))
def get_results(self):
return {'bbox': [self.detection_map.get_map()]}
class WiderFaceMetric(Metric):
def __init__(self, image_dir, anno_file, multi_scale=True):
self.image_dir = image_dir
self.anno_file = anno_file
self.multi_scale = multi_scale
self.clsid2catid, self.catid2name = get_categories('widerface')
def update(self, model):
face_eval_run(
model,
self.image_dir,
self.anno_file,
pred_dir='output/pred',
eval_mode='widerface',
multi_scale=self.multi_scale)
class RBoxMetric(Metric):
def __init__(self, anno_file, **kwargs):
assert os.path.isfile(anno_file), \
"anno_file {} not a file".format(anno_file)
assert os.path.exists(anno_file), "anno_file {} not exists".format(
anno_file)
self.anno_file = anno_file
self.gt_anno = json.load(open(self.anno_file))
cats = self.gt_anno['categories']
self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
self.catid2name = {cat['id']: cat['name'] for cat in cats}
self.classwise = kwargs.get('classwise', False)
self.output_eval = kwargs.get('output_eval', None)
# TODO: bias should be unified
self.bias = kwargs.get('bias', 0)
self.save_prediction_only = kwargs.get('save_prediction_only', False)
self.iou_type = kwargs.get('IouType', 'bbox')
self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
self.map_type = kwargs.get('map_type', '11point')
self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
class_num = len(self.catid2name)
self.detection_map = DetectionMAP(
class_num=class_num,
overlap_thresh=self.overlap_thresh,
map_type=self.map_type,
is_bbox_normalized=False,
evaluate_difficult=self.evaluate_difficult,
catid2name=self.catid2name,
classwise=self.classwise)
self.reset()
def reset(self):
self.result_bbox = []
self.detection_map.reset()
def update(self, inputs, outputs):
outs = {}
# outputs Tensor -> numpy.ndarray
for k, v in outputs.items():
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
im_id = inputs['im_id']
outs['im_id'] = im_id.numpy() if isinstance(im_id,
paddle.Tensor) else im_id
infer_results = get_infer_results(
outs, self.clsid2catid, bias=self.bias)
self.result_bbox += infer_results[
'bbox'] if 'bbox' in infer_results else []
bbox = [b['bbox'] for b in self.result_bbox]
score = [b['score'] for b in self.result_bbox]
label = [b['category_id'] for b in self.result_bbox]
label = [self.catid2clsid[e] for e in label]
gt_box = [
e['bbox'] for e in self.gt_anno['annotations']
if e['image_id'] == outs['im_id']
]
gt_label = [
e['category_id'] for e in self.gt_anno['annotations']
if e['image_id'] == outs['im_id']
]
gt_label = [self.catid2clsid[e] for e in gt_label]
self.detection_map.update(bbox, score, label, gt_box, gt_label)
def accumulate(self):
if len(self.result_bbox) > 0:
output = "bbox.json"
if self.output_eval:
output = os.path.join(self.output_eval, output)
with open(output, 'w') as f:
json.dump(self.result_bbox, f)
logger.info('The bbox result is saved to bbox.json.')
if self.save_prediction_only:
logger.info('The bbox result is saved to {} and do not '
'evaluate the mAP.'.format(output))
else:
logger.info("Accumulating evaluatation results...")
self.detection_map.accumulate()
def log(self):
map_stat = 100. * self.detection_map.get_map()
logger.info("mAP({:.2f}, {}) = {:.2f}%".format(
self.overlap_thresh, self.map_type, map_stat))
def get_results(self):
return {'bbox': [self.detection_map.get_map()]}
class SNIPERCOCOMetric(COCOMetric):
def __init__(self, anno_file, **kwargs):
super(SNIPERCOCOMetric, self).__init__(anno_file, **kwargs)
self.dataset = kwargs["dataset"]
self.chip_results = []
def reset(self):
# only bbox and mask evaluation support currently
self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
self.eval_results = {}
self.chip_results = []
def update(self, inputs, outputs):
outs = {}
# outputs Tensor -> numpy.ndarray
for k, v in outputs.items():
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
im_id = inputs['im_id']
outs['im_id'] = im_id.numpy() if isinstance(im_id,
paddle.Tensor) else im_id
self.chip_results.append(outs)
def accumulate(self):
results = self.dataset.anno_cropper.aggregate_chips_detections(
self.chip_results)
for outs in results:
infer_results = get_infer_results(
outs, self.clsid2catid, bias=self.bias)
self.results['bbox'] += infer_results[
'bbox'] if 'bbox' in infer_results else []
super(SNIPERCOCOMetric, self).accumulate()

File diff suppressed because it is too large Load Diff

@ -0,0 +1,428 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py
"""
import sys
__all__ = ['Munkres', 'make_cost_matrix']
class Munkres:
"""
Calculate the Munkres solution to the classical assignment problem.
See the module documentation for usage.
"""
def __init__(self):
"""Create a new instance"""
self.C = None
self.row_covered = []
self.col_covered = []
self.n = 0
self.Z0_r = 0
self.Z0_c = 0
self.marked = None
self.path = None
def make_cost_matrix(profit_matrix, inversion_function):
"""
**DEPRECATED**
Please use the module function ``make_cost_matrix()``.
"""
import munkres
return munkres.make_cost_matrix(profit_matrix, inversion_function)
make_cost_matrix = staticmethod(make_cost_matrix)
def pad_matrix(self, matrix, pad_value=0):
"""
Pad a possibly non-square matrix to make it square.
:Parameters:
matrix : list of lists
matrix to pad
pad_value : int
value to use to pad the matrix
:rtype: list of lists
:return: a new, possibly padded, matrix
"""
max_columns = 0
total_rows = len(matrix)
for row in matrix:
max_columns = max(max_columns, len(row))
total_rows = max(max_columns, total_rows)
new_matrix = []
for row in matrix:
row_len = len(row)
new_row = row[:]
if total_rows > row_len:
# Row too short. Pad it.
new_row += [0] * (total_rows - row_len)
new_matrix += [new_row]
while len(new_matrix) < total_rows:
new_matrix += [[0] * total_rows]
return new_matrix
def compute(self, cost_matrix):
"""
Compute the indexes for the lowest-cost pairings between rows and
columns in the database. Returns a list of (row, column) tuples
that can be used to traverse the matrix.
:Parameters:
cost_matrix : list of lists
The cost matrix. If this cost matrix is not square, it
will be padded with zeros, via a call to ``pad_matrix()``.
(This method does *not* modify the caller's matrix. It
operates on a copy of the matrix.)
**WARNING**: This code handles square and rectangular
matrices. It does *not* handle irregular matrices.
:rtype: list
:return: A list of ``(row, column)`` tuples that describe the lowest
cost path through the matrix
"""
self.C = self.pad_matrix(cost_matrix)
self.n = len(self.C)
self.original_length = len(cost_matrix)
self.original_width = len(cost_matrix[0])
self.row_covered = [False for i in range(self.n)]
self.col_covered = [False for i in range(self.n)]
self.Z0_r = 0
self.Z0_c = 0
self.path = self.__make_matrix(self.n * 2, 0)
self.marked = self.__make_matrix(self.n, 0)
done = False
step = 1
steps = {
1: self.__step1,
2: self.__step2,
3: self.__step3,
4: self.__step4,
5: self.__step5,
6: self.__step6
}
while not done:
try:
func = steps[step]
step = func()
except KeyError:
done = True
# Look for the starred columns
results = []
for i in range(self.original_length):
for j in range(self.original_width):
if self.marked[i][j] == 1:
results += [(i, j)]
return results
def __copy_matrix(self, matrix):
"""Return an exact copy of the supplied matrix"""
return copy.deepcopy(matrix)
def __make_matrix(self, n, val):
"""Create an *n*x*n* matrix, populating it with the specific value."""
matrix = []
for i in range(n):
matrix += [[val for j in range(n)]]
return matrix
def __step1(self):
"""
For each row of the matrix, find the smallest element and
subtract it from every element in its row. Go to Step 2.
"""
C = self.C
n = self.n
for i in range(n):
minval = min(self.C[i])
# Find the minimum value for this row and subtract that minimum
# from every element in the row.
for j in range(n):
self.C[i][j] -= minval
return 2
def __step2(self):
"""
Find a zero (Z) in the resulting matrix. If there is no starred
zero in its row or column, star Z. Repeat for each element in the
matrix. Go to Step 3.
"""
n = self.n
for i in range(n):
for j in range(n):
if (self.C[i][j] == 0) and \
(not self.col_covered[j]) and \
(not self.row_covered[i]):
self.marked[i][j] = 1
self.col_covered[j] = True
self.row_covered[i] = True
self.__clear_covers()
return 3
def __step3(self):
"""
Cover each column containing a starred zero. If K columns are
covered, the starred zeros describe a complete set of unique
assignments. In this case, Go to DONE, otherwise, Go to Step 4.
"""
n = self.n
count = 0
for i in range(n):
for j in range(n):
if self.marked[i][j] == 1:
self.col_covered[j] = True
count += 1
if count >= n:
step = 7 # done
else:
step = 4
return step
def __step4(self):
"""
Find a noncovered zero and prime it. If there is no starred zero
in the row containing this primed zero, Go to Step 5. Otherwise,
cover this row and uncover the column containing the starred
zero. Continue in this manner until there are no uncovered zeros
left. Save the smallest uncovered value and Go to Step 6.
"""
step = 0
done = False
row = -1
col = -1
star_col = -1
while not done:
(row, col) = self.__find_a_zero()
if row < 0:
done = True
step = 6
else:
self.marked[row][col] = 2
star_col = self.__find_star_in_row(row)
if star_col >= 0:
col = star_col
self.row_covered[row] = True
self.col_covered[col] = False
else:
done = True
self.Z0_r = row
self.Z0_c = col
step = 5
return step
def __step5(self):
"""
Construct a series of alternating primed and starred zeros as
follows. Let Z0 represent the uncovered primed zero found in Step 4.
Let Z1 denote the starred zero in the column of Z0 (if any).
Let Z2 denote the primed zero in the row of Z1 (there will always
be one). Continue until the series terminates at a primed zero
that has no starred zero in its column. Unstar each starred zero
of the series, star each primed zero of the series, erase all
primes and uncover every line in the matrix. Return to Step 3
"""
count = 0
path = self.path
path[count][0] = self.Z0_r
path[count][1] = self.Z0_c
done = False
while not done:
row = self.__find_star_in_col(path[count][1])
if row >= 0:
count += 1
path[count][0] = row
path[count][1] = path[count - 1][1]
else:
done = True
if not done:
col = self.__find_prime_in_row(path[count][0])
count += 1
path[count][0] = path[count - 1][0]
path[count][1] = col
self.__convert_path(path, count)
self.__clear_covers()
self.__erase_primes()
return 3
def __step6(self):
"""
Add the value found in Step 4 to every element of each covered
row, and subtract it from every element of each uncovered column.
Return to Step 4 without altering any stars, primes, or covered
lines.
"""
minval = self.__find_smallest()
for i in range(self.n):
for j in range(self.n):
if self.row_covered[i]:
self.C[i][j] += minval
if not self.col_covered[j]:
self.C[i][j] -= minval
return 4
def __find_smallest(self):
"""Find the smallest uncovered value in the matrix."""
minval = 2e9 # sys.maxint
for i in range(self.n):
for j in range(self.n):
if (not self.row_covered[i]) and (not self.col_covered[j]):
if minval > self.C[i][j]:
minval = self.C[i][j]
return minval
def __find_a_zero(self):
"""Find the first uncovered element with value 0"""
row = -1
col = -1
i = 0
n = self.n
done = False
while not done:
j = 0
while True:
if (self.C[i][j] == 0) and \
(not self.row_covered[i]) and \
(not self.col_covered[j]):
row = i
col = j
done = True
j += 1
if j >= n:
break
i += 1
if i >= n:
done = True
return (row, col)
def __find_star_in_row(self, row):
"""
Find the first starred element in the specified row. Returns
the column index, or -1 if no starred element was found.
"""
col = -1
for j in range(self.n):
if self.marked[row][j] == 1:
col = j
break
return col
def __find_star_in_col(self, col):
"""
Find the first starred element in the specified row. Returns
the row index, or -1 if no starred element was found.
"""
row = -1
for i in range(self.n):
if self.marked[i][col] == 1:
row = i
break
return row
def __find_prime_in_row(self, row):
"""
Find the first prime element in the specified row. Returns
the column index, or -1 if no starred element was found.
"""
col = -1
for j in range(self.n):
if self.marked[row][j] == 2:
col = j
break
return col
def __convert_path(self, path, count):
for i in range(count + 1):
if self.marked[path[i][0]][path[i][1]] == 1:
self.marked[path[i][0]][path[i][1]] = 0
else:
self.marked[path[i][0]][path[i][1]] = 1
def __clear_covers(self):
"""Clear all covered matrix cells"""
for i in range(self.n):
self.row_covered[i] = False
self.col_covered[i] = False
def __erase_primes(self):
"""Erase all prime markings"""
for i in range(self.n):
for j in range(self.n):
if self.marked[i][j] == 2:
self.marked[i][j] = 0
def make_cost_matrix(profit_matrix, inversion_function):
"""
Create a cost matrix from a profit matrix by calling
'inversion_function' to invert each value. The inversion
function must take one numeric argument (of any type) and return
another numeric argument which is presumed to be the cost inverse
of the original profit.
This is a static method. Call it like this:
.. python::
cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func)
For example:
.. python::
cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxint - x)
:Parameters:
profit_matrix : list of lists
The matrix to convert from a profit to a cost matrix
inversion_function : function
The function to use to invert each entry in the profit matrix
:rtype: list of lists
:return: The converted matrix
"""
cost_matrix = []
for row in profit_matrix:
cost_matrix.append([inversion_function(value) for value in row])
return cost_matrix

@ -0,0 +1,393 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import cv2
import numpy as np
from collections import OrderedDict
import paddle
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['face_eval_run', 'lmk2out']
def face_eval_run(model,
image_dir,
gt_file,
pred_dir='output/pred',
eval_mode='widerface',
multi_scale=False):
# load ground truth files
with open(gt_file, 'r') as f:
gt_lines = f.readlines()
imid2path = []
pos_gt = 0
while pos_gt < len(gt_lines):
name_gt = gt_lines[pos_gt].strip('\n\t').split()[0]
imid2path.append(name_gt)
pos_gt += 1
n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0])
pos_gt += 1 + n_gt
logger.info('The ground truth file load {} images'.format(len(imid2path)))
dets_dist = OrderedDict()
for iter_id, im_path in enumerate(imid2path):
image_path = os.path.join(image_dir, im_path)
if eval_mode == 'fddb':
image_path += '.jpg'
assert os.path.exists(image_path)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if multi_scale:
shrink, max_shrink = get_shrink(image.shape[0], image.shape[1])
det0 = detect_face(model, image, shrink)
det1 = flip_test(model, image, shrink)
[det2, det3] = multi_scale_test(model, image, max_shrink)
det4 = multi_scale_test_pyramid(model, image, max_shrink)
det = np.row_stack((det0, det1, det2, det3, det4))
dets = bbox_vote(det)
else:
dets = detect_face(model, image, 1)
if eval_mode == 'widerface':
save_widerface_bboxes(image_path, dets, pred_dir)
else:
dets_dist[im_path] = dets
if iter_id % 100 == 0:
logger.info('Test iter {}'.format(iter_id))
if eval_mode == 'fddb':
save_fddb_bboxes(dets_dist, pred_dir)
logger.info("Finish evaluation.")
def detect_face(model, image, shrink):
image_shape = [image.shape[0], image.shape[1]]
if shrink != 1:
h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink)
image = cv2.resize(image, (w, h))
image_shape = [h, w]
img = face_img_process(image)
image_shape = np.asarray([image_shape])
scale_factor = np.asarray([[shrink, shrink]])
data = {
"image": paddle.to_tensor(
img, dtype='float32'),
"im_shape": paddle.to_tensor(
image_shape, dtype='float32'),
"scale_factor": paddle.to_tensor(
scale_factor, dtype='float32')
}
model.eval()
detection = model(data)
detection = detection['bbox'].numpy()
# layout: xmin, ymin, xmax. ymax, score
if np.prod(detection.shape) == 1:
logger.info("No face detected")
return np.array([[0, 0, 0, 0, 0]])
det_conf = detection[:, 1]
det_xmin = detection[:, 2]
det_ymin = detection[:, 3]
det_xmax = detection[:, 4]
det_ymax = detection[:, 5]
det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
return det
def flip_test(model, image, shrink):
img = cv2.flip(image, 1)
det_f = detect_face(model, img, shrink)
det_t = np.zeros(det_f.shape)
img_width = image.shape[1]
det_t[:, 0] = img_width - det_f[:, 2]
det_t[:, 1] = det_f[:, 1]
det_t[:, 2] = img_width - det_f[:, 0]
det_t[:, 3] = det_f[:, 3]
det_t[:, 4] = det_f[:, 4]
return det_t
def multi_scale_test(model, image, max_shrink):
# Shrink detecting is only used to detect big faces
st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
det_s = detect_face(model, image, st)
index = np.where(
np.maximum(det_s[:, 2] - det_s[:, 0] + 1,
det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
det_s = det_s[index, :]
# Enlarge one times
bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
det_b = detect_face(model, image, bt)
# Enlarge small image x times for small faces
if max_shrink > 2:
bt *= 2
while bt < max_shrink:
det_b = np.row_stack((det_b, detect_face(model, image, bt)))
bt *= 2
det_b = np.row_stack((det_b, detect_face(model, image, max_shrink)))
# Enlarged images are only used to detect small faces.
if bt > 1:
index = np.where(
np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
det_b = det_b[index, :]
# Shrinked images are only used to detect big faces.
else:
index = np.where(
np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
det_b = det_b[index, :]
return det_s, det_b
def multi_scale_test_pyramid(model, image, max_shrink):
# Use image pyramids to detect faces
det_b = detect_face(model, image, 0.25)
index = np.where(
np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
det_b = det_b[index, :]
st = [0.75, 1.25, 1.5, 1.75]
for i in range(len(st)):
if st[i] <= max_shrink:
det_temp = detect_face(model, image, st[i])
# Enlarged images are only used to detect small faces.
if st[i] > 1:
index = np.where(
np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
det_temp = det_temp[index, :]
# Shrinked images are only used to detect big faces.
else:
index = np.where(
np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
det_temp = det_temp[index, :]
det_b = np.row_stack((det_b, det_temp))
return det_b
def to_chw(image):
"""
Transpose image from HWC to CHW.
Args:
image (np.array): an image with HWC layout.
"""
# HWC to CHW
if len(image.shape) == 3:
image = np.swapaxes(image, 1, 2)
image = np.swapaxes(image, 1, 0)
return image
def face_img_process(image,
mean=[104., 117., 123.],
std=[127.502231, 127.502231, 127.502231]):
img = np.array(image)
img = to_chw(img)
img = img.astype('float32')
img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32')
img = [img]
img = np.array(img)
return img
def get_shrink(height, width):
"""
Args:
height (int): image height.
width (int): image width.
"""
# avoid out of memory
max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
def get_round(x, loc):
str_x = str(x)
if '.' in str_x:
str_before, str_after = str_x.split('.')
len_after = len(str_after)
if len_after >= 3:
str_final = str_before + '.' + str_after[0:loc]
return float(str_final)
else:
return x
max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
if max_shrink >= 1.5 and max_shrink < 2:
max_shrink = max_shrink - 0.1
elif max_shrink >= 2 and max_shrink < 3:
max_shrink = max_shrink - 0.2
elif max_shrink >= 3 and max_shrink < 4:
max_shrink = max_shrink - 0.3
elif max_shrink >= 4 and max_shrink < 5:
max_shrink = max_shrink - 0.4
elif max_shrink >= 5:
max_shrink = max_shrink - 0.5
elif max_shrink <= 0.1:
max_shrink = 0.1
shrink = max_shrink if max_shrink < 1 else 1
return shrink, max_shrink
def bbox_vote(det):
order = det[:, 4].ravel().argsort()[::-1]
det = det[order, :]
if det.shape[0] == 0:
dets = np.array([[10, 10, 20, 20, 0.002]])
det = np.empty(shape=[0, 5])
while det.shape[0] > 0:
# IOU
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
xx1 = np.maximum(det[0, 0], det[:, 0])
yy1 = np.maximum(det[0, 1], det[:, 1])
xx2 = np.minimum(det[0, 2], det[:, 2])
yy2 = np.minimum(det[0, 3], det[:, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
o = inter / (area[0] + area[:] - inter)
# nms
merge_index = np.where(o >= 0.3)[0]
det_accu = det[merge_index, :]
det = np.delete(det, merge_index, 0)
if merge_index.shape[0] <= 1:
if det.shape[0] == 0:
try:
dets = np.row_stack((dets, det_accu))
except:
dets = det_accu
continue
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
max_score = np.max(det_accu[:, 4])
det_accu_sum = np.zeros((1, 5))
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
axis=0) / np.sum(det_accu[:, -1:])
det_accu_sum[:, 4] = max_score
try:
dets = np.row_stack((dets, det_accu_sum))
except:
dets = det_accu_sum
dets = dets[0:750, :]
keep_index = np.where(dets[:, 4] >= 0.01)[0]
dets = dets[keep_index, :]
return dets
def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
image_name = image_path.split('/')[-1]
image_class = image_path.split('/')[-2]
odir = os.path.join(output_dir, image_class)
if not os.path.exists(odir):
os.makedirs(odir)
ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
f = open(ofname, 'w')
f.write('{:s}\n'.format(image_class + '/' + image_name))
f.write('{:d}\n'.format(bboxes_scores.shape[0]))
for box_score in bboxes_scores:
xmin, ymin, xmax, ymax, score = box_score
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
xmax - xmin + 1), (ymax - ymin + 1), score))
f.close()
logger.info("The predicted result is saved as {}".format(ofname))
def save_fddb_bboxes(bboxes_scores,
output_dir,
output_fname='pred_fddb_res.txt'):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
predict_file = os.path.join(output_dir, output_fname)
f = open(predict_file, 'w')
for image_path, dets in bboxes_scores.iteritems():
f.write('{:s}\n'.format(image_path))
f.write('{:d}\n'.format(dets.shape[0]))
for box_score in dets:
xmin, ymin, xmax, ymax, score = box_score
width, height = xmax - xmin, ymax - ymin
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
.format(xmin, ymin, width, height, score))
logger.info("The predicted result is saved as {}".format(predict_file))
return predict_file
def lmk2out(results, is_bbox_normalized=False):
"""
Args:
results: request a dict, should include: `landmark`, `im_id`,
if is_bbox_normalized=True, also need `im_shape`.
is_bbox_normalized: whether or not landmark is normalized.
"""
xywh_res = []
for t in results:
bboxes = t['bbox'][0]
lengths = t['bbox'][1][0]
im_ids = np.array(t['im_id'][0]).flatten()
if bboxes.shape == (1, 1) or bboxes is None:
continue
face_index = t['face_index'][0]
prior_box = t['prior_boxes'][0]
predict_lmk = t['landmark'][0]
prior = np.reshape(prior_box, (-1, 4))
predictlmk = np.reshape(predict_lmk, (-1, 10))
k = 0
for a in range(len(lengths)):
num = lengths[a]
im_id = int(im_ids[a])
for i in range(num):
score = bboxes[k][1]
theindex = face_index[i][0]
me_prior = prior[theindex, :]
lmk_pred = predictlmk[theindex, :]
prior_w = me_prior[2] - me_prior[0]
prior_h = me_prior[3] - me_prior[1]
prior_w_center = (me_prior[2] + me_prior[0]) / 2
prior_h_center = (me_prior[3] + me_prior[1]) / 2
lmk_decode = np.zeros((10))
for j in [0, 2, 4, 6, 8]:
lmk_decode[j] = lmk_pred[
j] * 0.1 * prior_w + prior_w_center
for j in [1, 3, 5, 7, 9]:
lmk_decode[j] = lmk_pred[
j] * 0.1 * prior_h + prior_h_center
im_shape = t['im_shape'][0][a].tolist()
image_h, image_w = int(im_shape[0]), int(im_shape[1])
if is_bbox_normalized:
lmk_decode = lmk_decode * np.array([
image_w, image_h, image_w, image_h, image_w, image_h,
image_w, image_h, image_w, image_h
])
lmk_res = {
'image_id': im_id,
'landmark': lmk_decode,
'score': score,
}
xywh_res.append(lmk_res)
k += 1
return xywh_res

@ -0,0 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import model_zoo
from .model_zoo import *
__all__ = model_zoo.__all__

@ -0,0 +1,84 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os.path as osp
import pkg_resources
try:
from collections.abc import Sequence
except:
from collections import Sequence
from paddlers.models.ppdet.core.workspace import load_config, create
from paddlers.models.ppdet.utils.checkpoint import load_weight
from paddlers.models.ppdet.utils.download import get_config_path
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'list_model', 'get_config_file', 'get_weights_url', 'get_model',
'MODEL_ZOO_FILENAME'
]
MODEL_ZOO_FILENAME = 'MODEL_ZOO'
def list_model(filters=[]):
model_zoo_file = pkg_resources.resource_filename('ppdet.model_zoo',
MODEL_ZOO_FILENAME)
with open(model_zoo_file) as f:
model_names = f.read().splitlines()
# filter model_name
def filt(name):
for f in filters:
if name.find(f) < 0:
return False
return True
if isinstance(filters, str) or not isinstance(filters, Sequence):
filters = [filters]
model_names = [name for name in model_names if filt(name)]
if len(model_names) == 0 and len(filters) > 0:
raise ValueError("no model found, please check filters seeting, "
"filters can be set as following kinds:\n"
"\tDataset: coco, voc ...\n"
"\tArchitecture: yolo, rcnn, ssd ...\n"
"\tBackbone: resnet, vgg, darknet ...\n")
model_str = "Available Models:\n"
for model_name in model_names:
model_str += "\t{}\n".format(model_name)
logger.info(model_str)
# models and configs save on bcebos under dygraph directory
def get_config_file(model_name):
return get_config_path("ppdet://configs/{}.yml".format(model_name))
def get_weights_url(model_name):
return "ppdet://models/{}.pdparams".format(osp.split(model_name)[-1])
def get_model(model_name, pretrained=True):
cfg_file = get_config_file(model_name)
cfg = load_config(cfg_file)
model = create(cfg.architecture)
if pretrained:
load_weight(model, get_weights_url(model_name))
return model

@ -0,0 +1,45 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
warnings.filterwarnings(
action='ignore', category=DeprecationWarning, module='ops')
from . import ops
from . import backbones
from . import necks
from . import proposal_generator
from . import heads
from . import losses
from . import architectures
from . import post_process
from . import layers
from . import reid
from . import mot
from . import transformers
from . import assigners
from .ops import *
from .backbones import *
from .necks import *
from .proposal_generator import *
from .heads import *
from .losses import *
from .architectures import *
from .post_process import *
from .layers import *
from .reid import *
from .mot import *
from .transformers import *
from .assigners import *

@ -0,0 +1,51 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
from . import meta_arch
from . import faster_rcnn
from . import mask_rcnn
from . import yolo
from . import cascade_rcnn
from . import ssd
from . import fcos
from . import solov2
from . import ttfnet
from . import s2anet
from . import keypoint_hrhrnet
from . import keypoint_hrnet
from . import jde
from . import deepsort
from . import fairmot
from . import centernet
from . import gfl
from . import picodet
from . import detr
from . import sparse_rcnn
from . import tood
from .meta_arch import *
from .faster_rcnn import *
from .mask_rcnn import *
from .yolo import *
from .cascade_rcnn import *
from .ssd import *
from .fcos import *
from .solov2 import *
from .ttfnet import *
from .s2anet import *
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
from .jde import *
from .deepsort import *
from .fairmot import *
from .centernet import *
from .blazeface import *
from .gfl import *
from .picodet import *
from .detr import *
from .sparse_rcnn import *
from .tood import *

@ -0,0 +1,91 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['BlazeFace']
@register
class BlazeFace(BaseArch):
"""
BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
see https://arxiv.org/abs/1907.05047
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
blaze_head (nn.Layer): `blazeHead` instance
post_process (object): `BBoxPostProcess` instance
"""
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self, backbone, blaze_head, neck, post_process):
super(BlazeFace, self).__init__()
self.backbone = backbone
self.neck = neck
self.blaze_head = blaze_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
blaze_head = create(cfg['blaze_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'blaze_head': blaze_head,
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
# neck
neck_feats = self.neck(body_feats)
# blaze Head
if self.training:
return self.blaze_head(neck_feats, self.inputs['image'],
self.inputs['gt_bbox'],
self.inputs['gt_class'])
else:
preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
bbox, bbox_num = self.post_process(preds, anchors,
self.inputs['im_shape'],
self.inputs['scale_factor'])
return bbox, bbox_num
def get_loss(self, ):
return {"loss": self._forward()}
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

@ -0,0 +1,144 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['CascadeRCNN']
@register
class CascadeRCNN(BaseArch):
"""
Cascade R-CNN network, see https://arxiv.org/abs/1712.00726
Args:
backbone (object): backbone instance
rpn_head (object): `RPNHead` instance
bbox_head (object): `BBoxHead` instance
bbox_post_process (object): `BBoxPostProcess` instance
neck (object): 'FPN' instance
mask_head (object): `MaskHead` instance
mask_post_process (object): `MaskPostProcess` instance
"""
__category__ = 'architecture'
__inject__ = [
'bbox_post_process',
'mask_post_process',
]
def __init__(self,
backbone,
rpn_head,
bbox_head,
bbox_post_process,
neck=None,
mask_head=None,
mask_post_process=None):
super(CascadeRCNN, self).__init__()
self.backbone = backbone
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
self.neck = neck
self.mask_head = mask_head
self.mask_post_process = mask_post_process
self.with_mask = mask_head is not None
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
out_shape = neck and out_shape or bbox_head.get_head().out_shape
kwargs = {'input_shape': out_shape}
mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
"mask_head": mask_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
rois, rois_num = self.bbox_head.get_assigned_rois()
bbox_targets = self.bbox_head.get_assigned_targets()
if self.with_mask:
mask_loss = self.mask_head(body_feats, rois, rois_num,
self.inputs, bbox_targets,
bbox_feat)
return rpn_loss, bbox_loss, mask_loss
else:
return rpn_loss, bbox_loss, {}
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
refined_rois = self.bbox_head.get_refined_rois()
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num = self.bbox_post_process(
preds, (refined_rois, rois_num), im_shape, scale_factor)
# rescale the prediction back to origin image
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
im_shape, scale_factor)
if not self.with_mask:
return bbox_pred, bbox_num, None
mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
origin_shape = self.bbox_post_process.get_origin_shape()
mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
bbox_num, origin_shape)
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
rpn_loss, bbox_loss, mask_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
if self.with_mask:
loss.update(mask_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num, mask_pred = self._forward()
output = {
'bbox': bbox_pred,
'bbox_num': bbox_num,
}
if self.with_mask:
output.update({'mask': mask_pred})
return output

@ -0,0 +1,108 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['CenterNet']
@register
class CenterNet(BaseArch):
"""
CenterNet network, see http://arxiv.org/abs/1904.07850
Args:
backbone (object): backbone instance
neck (object): FPN instance, default use 'CenterNetDLAFPN'
head (object): 'CenterNetHead' instance
post_process (object): 'CenterNetPostProcess' instance
for_mot (bool): whether return other features used in tracking model
"""
__category__ = 'architecture'
__inject__ = ['post_process']
__shared__ = ['for_mot']
def __init__(self,
backbone,
neck='CenterNetDLAFPN',
head='CenterNetHead',
post_process='CenterNetPostProcess',
for_mot=False):
super(CenterNet, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.post_process = post_process
self.for_mot = for_mot
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
head = create(cfg['head'], **kwargs)
return {'backbone': backbone, 'neck': neck, "head": head}
def _forward(self):
neck_feat = self.backbone(self.inputs)
if self.neck is not None:
neck_feat = self.neck(neck_feat)
head_out = self.head(neck_feat, self.inputs)
if self.for_mot:
head_out.update({'neck_feat': neck_feat})
elif self.training:
head_out['loss'] = head_out.pop('det_loss')
return head_out
def get_pred(self):
head_out = self._forward()
if self.for_mot:
bbox, bbox_inds, topk_clses = self.post_process(
head_out['heatmap'],
head_out['size'],
head_out['offset'],
im_shape=self.inputs['im_shape'],
scale_factor=self.inputs['scale_factor'])
output = {
"bbox": bbox,
"bbox_inds": bbox_inds,
"topk_clses": topk_clses,
"neck_feat": head_out['neck_feat']
}
else:
bbox, bbox_num, _ = self.post_process(
head_out['heatmap'],
head_out['size'],
head_out['offset'],
im_shape=self.inputs['im_shape'],
scale_factor=self.inputs['scale_factor'])
output = {
"bbox": bbox,
"bbox_num": bbox_num,
}
return output
def get_loss(self):
return self._forward()

@ -0,0 +1,69 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
__all__ = ['DeepSORT']
@register
class DeepSORT(BaseArch):
"""
DeepSORT network, see https://arxiv.org/abs/1703.07402
Args:
detector (object): detector model instance
reid (object): reid model instance
tracker (object): tracker instance
"""
__category__ = 'architecture'
def __init__(self,
detector='YOLOv3',
reid='PCBPyramid',
tracker='DeepSORTTracker'):
super(DeepSORT, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
@classmethod
def from_config(cls, cfg, *args, **kwargs):
if cfg['detector'] != 'None':
detector = create(cfg['detector'])
else:
detector = None
reid = create(cfg['reid'])
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
crops = self.inputs['crops']
features = self.reid(crops)
return features
def get_pred(self):
return self._forward()

@ -0,0 +1,93 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from .meta_arch import BaseArch
from paddlers.models.ppdet.core.workspace import register, create
__all__ = ['DETR']
@register
class DETR(BaseArch):
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self,
backbone,
transformer,
detr_head,
post_process='DETRBBoxPostProcess'):
super(DETR, self).__init__()
self.backbone = backbone
self.transformer = transformer
self.detr_head = detr_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# transformer
kwargs = {'input_shape': backbone.out_shape}
transformer = create(cfg['transformer'], **kwargs)
# head
kwargs = {
'hidden_dim': transformer.hidden_dim,
'nhead': transformer.nhead,
'input_shape': backbone.out_shape
}
detr_head = create(cfg['detr_head'], **kwargs)
return {
'backbone': backbone,
'transformer': transformer,
"detr_head": detr_head,
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
# Transformer
out_transformer = self.transformer(body_feats, self.inputs['pad_mask'])
# DETR Head
if self.training:
return self.detr_head(out_transformer, body_feats, self.inputs)
else:
preds = self.detr_head(out_transformer, body_feats)
bbox, bbox_num = self.post_process(preds, self.inputs['im_shape'],
self.inputs['scale_factor'])
return bbox, bbox_num
def get_loss(self, ):
losses = self._forward()
losses.update({
'loss':
paddle.add_n([v for k, v in losses.items() if 'log' not in k])
})
return losses
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

@ -0,0 +1,100 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['FairMOT']
@register
class FairMOT(BaseArch):
"""
FairMOT network, see http://arxiv.org/abs/2004.01888
Args:
detector (object): 'CenterNet' instance
reid (object): 'FairMOTEmbeddingHead' instance
tracker (object): 'JDETracker' instance
loss (object): 'FairMOTLoss' instance
"""
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self,
detector='CenterNet',
reid='FairMOTEmbeddingHead',
tracker='JDETracker',
loss='FairMOTLoss'):
super(FairMOT, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
self.loss = loss
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
kwargs = {'input_shape': detector_out_shape}
reid = create(cfg['reid'], **kwargs)
loss = create(cfg['loss'])
tracker = create(cfg['tracker'])
return {
'detector': detector,
'reid': reid,
'loss': loss,
'tracker': tracker
}
def _forward(self):
loss = dict()
# det_outs keys:
# train: neck_feat, det_loss, heatmap_loss, size_loss, offset_loss (optional: iou_loss)
# eval/infer: neck_feat, bbox, bbox_inds
det_outs = self.detector(self.inputs)
neck_feat = det_outs['neck_feat']
if self.training:
reid_loss = self.reid(neck_feat, self.inputs)
det_loss = det_outs['det_loss']
loss = self.loss(det_loss, reid_loss)
for k, v in det_outs.items():
if 'loss' not in k:
continue
loss.update({k: v})
loss.update({'reid_loss': reid_loss})
return loss
else:
pred_dets, pred_embs = self.reid(
neck_feat, self.inputs, det_outs['bbox'],
det_outs['bbox_inds'], det_outs['topk_clses'])
return pred_dets, pred_embs
def get_pred(self):
output = self._forward()
return output
def get_loss(self):
loss = self._forward()
return loss

@ -0,0 +1,106 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['FasterRCNN']
@register
class FasterRCNN(BaseArch):
"""
Faster R-CNN network, see https://arxiv.org/abs/1506.01497
Args:
backbone (object): backbone instance
rpn_head (object): `RPNHead` instance
bbox_head (object): `BBoxHead` instance
bbox_post_process (object): `BBoxPostProcess` instance
neck (object): 'FPN' instance
"""
__category__ = 'architecture'
__inject__ = ['bbox_post_process']
def __init__(self,
backbone,
rpn_head,
bbox_head,
bbox_post_process,
neck=None):
super(FasterRCNN, self).__init__()
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
return rpn_loss, bbox_loss
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
im_shape, scale_factor)
# rescale the prediction back to origin image
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
im_shape, scale_factor)
return bbox_pred, bbox_num
def get_loss(self, ):
rpn_loss, bbox_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

@ -0,0 +1,105 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['FCOS']
@register
class FCOS(BaseArch):
"""
FCOS network, see https://arxiv.org/abs/1904.01355
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
fcos_head (object): 'FCOSHead' instance
post_process (object): 'FCOSPostProcess' instance
"""
__category__ = 'architecture'
__inject__ = ['fcos_post_process']
def __init__(self,
backbone,
neck,
fcos_head='FCOSHead',
fcos_post_process='FCOSPostProcess'):
super(FCOS, self).__init__()
self.backbone = backbone
self.neck = neck
self.fcos_head = fcos_head
self.fcos_post_process = fcos_post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
fcos_head = create(cfg['fcos_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"fcos_head": fcos_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
fcos_head_outs = self.fcos_head(fpn_feats, self.training)
if not self.training:
scale_factor = self.inputs['scale_factor']
bboxes = self.fcos_post_process(fcos_head_outs, scale_factor)
return bboxes
else:
return fcos_head_outs
def get_loss(self, ):
loss = {}
tag_labels, tag_bboxes, tag_centerness = [], [], []
for i in range(len(self.fcos_head.fpn_stride)):
# labels, reg_target, centerness
k_lbl = 'labels{}'.format(i)
if k_lbl in self.inputs:
tag_labels.append(self.inputs[k_lbl])
k_box = 'reg_target{}'.format(i)
if k_box in self.inputs:
tag_bboxes.append(self.inputs[k_box])
k_ctn = 'centerness{}'.format(i)
if k_ctn in self.inputs:
tag_centerness.append(self.inputs[k_ctn])
fcos_head_outs = self._forward()
loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
tag_bboxes, tag_centerness)
loss.update(loss_fcos)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

@ -0,0 +1,87 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['GFL']
@register
class GFL(BaseArch):
"""
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
head (object): 'GFLHead' instance
"""
__category__ = 'architecture'
def __init__(self, backbone, neck, head='GFLHead'):
super(GFL, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats)
if not self.training:
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
scale_factor)
return bboxes, bbox_num
else:
return head_outs
def get_loss(self, ):
loss = {}
head_outs = self._forward()
loss_gfl = self.head.get_loss(head_outs, self.inputs)
loss.update(loss_gfl)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

@ -0,0 +1,111 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['JDE']
@register
class JDE(BaseArch):
__category__ = 'architecture'
__shared__ = ['metric']
"""
JDE network, see https://arxiv.org/abs/1909.12605v1
Args:
detector (object): detector model instance
reid (object): reid model instance
tracker (object): tracker instance
metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
for ReID embedding evaluation, or 'MOT' for multi object tracking
evaluation.
"""
def __init__(self,
detector='YOLOv3',
reid='JDEEmbeddingHead',
tracker='JDETracker',
metric='MOT'):
super(JDE, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
self.metric = metric
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
kwargs = {'input_shape': detector.neck.out_shape}
reid = create(cfg['reid'], **kwargs)
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
det_outs = self.detector(self.inputs)
if self.training:
emb_feats = det_outs['emb_feats']
loss_confs = det_outs['det_losses']['loss_confs']
loss_boxes = det_outs['det_losses']['loss_boxes']
jde_losses = self.reid(
emb_feats,
self.inputs,
loss_confs=loss_confs,
loss_boxes=loss_boxes)
return jde_losses
else:
if self.metric == 'MOTDet':
det_results = {
'bbox': det_outs['bbox'],
'bbox_num': det_outs['bbox_num'],
}
return det_results
elif self.metric == 'MOT':
emb_feats = det_outs['emb_feats']
bboxes = det_outs['bbox']
boxes_idx = det_outs['boxes_idx']
nms_keep_idx = det_outs['nms_keep_idx']
pred_dets, pred_embs = self.reid(
emb_feats,
self.inputs,
bboxes=bboxes,
boxes_idx=boxes_idx,
nms_keep_idx=nms_keep_idx)
return pred_dets, pred_embs
else:
raise ValueError(
"Unknown metric {} for multi object tracking.".format(
self.metric))
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

@ -0,0 +1,287 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.optimize import linear_sum_assignment
from collections import abc, defaultdict
import numpy as np
import paddle
from paddlers.models.ppdet.core.workspace import register, create, serializable
from .meta_arch import BaseArch
from .. import layers as L
from ..keypoint_utils import transpred
__all__ = ['HigherHRNet']
@register
class HigherHRNet(BaseArch):
__category__ = 'architecture'
def __init__(self,
backbone='HRNet',
hrhrnet_head='HrHRNetHead',
post_process='HrHRNetPostProcess',
eval_flip=True,
flip_perm=None,
max_num_people=30):
"""
HigherHRNet network, see https://arxiv.org/abs/1908.10357
HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
Args:
backbone (nn.Layer): backbone instance
hrhrnet_head (nn.Layer): keypoint_head instance
bbox_post_process (object): `BBoxPostProcess` instance
"""
super(HigherHRNet, self).__init__()
self.backbone = backbone
self.hrhrnet_head = hrhrnet_head
self.post_process = post_process
self.flip = eval_flip
self.flip_perm = paddle.to_tensor(flip_perm)
self.deploy = False
self.interpolate = L.Upsample(2, mode='bilinear')
self.pool = L.MaxPool(5, 1, 2)
self.max_num_people = max_num_people
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# head
kwargs = {'input_shape': backbone.out_shape}
hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs)
post_process = create(cfg['post_process'])
return {
'backbone': backbone,
"hrhrnet_head": hrhrnet_head,
"post_process": post_process,
}
def _forward(self):
if self.flip and not self.training and not self.deploy:
self.inputs['image'] = paddle.concat(
(self.inputs['image'], paddle.flip(self.inputs['image'], [3])))
body_feats = self.backbone(self.inputs)
if self.training:
return self.hrhrnet_head(body_feats, self.inputs)
else:
outputs = self.hrhrnet_head(body_feats)
if self.flip and not self.deploy:
outputs = [paddle.split(o, 2) for o in outputs]
output_rflip = [
paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3])
for o in outputs
]
output1 = [o[0] for o in outputs]
heatmap = (output1[0] + output_rflip[0]) / 2.
tagmaps = [output1[1], output_rflip[1]]
outputs = [heatmap] + tagmaps
outputs = self.get_topk(outputs)
if self.deploy:
return outputs
res_lst = []
h = self.inputs['im_shape'][0, 0].numpy().item()
w = self.inputs['im_shape'][0, 1].numpy().item()
kpts, scores = self.post_process(*outputs, h, w)
res_lst.append([kpts, scores])
return res_lst
def get_loss(self):
return self._forward()
def get_pred(self):
outputs = {}
res_lst = self._forward()
outputs['keypoint'] = res_lst
return outputs
def get_topk(self, outputs):
# resize to image size
outputs = [self.interpolate(x) for x in outputs]
if len(outputs) == 3:
tagmap = paddle.concat(
(outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4)
else:
tagmap = outputs[1].unsqueeze(4)
heatmap = outputs[0]
N, J = 1, self.hrhrnet_head.num_joints
heatmap_maxpool = self.pool(heatmap)
# topk
maxmap = heatmap * (heatmap == heatmap_maxpool)
maxmap = maxmap.reshape([N, J, -1])
heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2)
outputs = [heatmap, tagmap, heat_k, inds_k]
return outputs
@register
@serializable
class HrHRNetPostProcess(object):
'''
HrHRNet postprocess contain:
1) get topk keypoints in the output heatmap
2) sample the tagmap's value corresponding to each of the topk coordinate
3) match different joints to combine to some people with Hungary algorithm
4) adjust the coordinate by +-0.25 to decrease error std
5) salvage missing joints by check positivity of heatmap - tagdiff_norm
Args:
max_num_people (int): max number of people support in postprocess
heat_thresh (float): value of topk below this threshhold will be ignored
tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
original_height, original_width (float): the original image size
'''
def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.):
self.max_num_people = max_num_people
self.heat_thresh = heat_thresh
self.tag_thresh = tag_thresh
def lerp(self, j, y, x, heatmap):
H, W = heatmap.shape[-2:]
left = np.clip(x - 1, 0, W - 1)
right = np.clip(x + 1, 0, W - 1)
up = np.clip(y - 1, 0, H - 1)
down = np.clip(y + 1, 0, H - 1)
offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25,
-0.25)
offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25,
-0.25)
return offset_y + 0.5, offset_x + 0.5
def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height,
original_width):
N, J, H, W = heatmap.shape
assert N == 1, "only support batch size 1"
heatmap = heatmap[0].cpu().detach().numpy()
tagmap = tagmap[0].cpu().detach().numpy()
heats = heat_k[0].cpu().detach().numpy()
inds_np = inds_k[0].cpu().detach().numpy()
y = inds_np // W
x = inds_np % W
tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people), y.
flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1])
coords = np.stack((y, x), axis=2)
# threshold
mask = heats > self.heat_thresh
# cluster
cluster = defaultdict(lambda: {
'coords': np.zeros((J, 2), dtype=np.float32),
'scores': np.zeros(J, dtype=np.float32),
'tags': []
})
for jid, m in enumerate(mask):
num_valid = m.sum()
if num_valid == 0:
continue
valid_inds = np.where(m)[0]
valid_tags = tags[jid, m, :]
if len(cluster) == 0: # initialize
for i in valid_inds:
tag = tags[jid, i]
key = tag[0]
cluster[key]['tags'].append(tag)
cluster[key]['scores'][jid] = heats[jid, i]
cluster[key]['coords'][jid] = coords[jid, i]
continue
candidates = list(cluster.keys())[:self.max_num_people]
centroids = [
np.mean(
cluster[k]['tags'], axis=0) for k in candidates
]
num_clusters = len(centroids)
# shape is (num_valid, num_clusters, tag_dim)
dist = valid_tags[:, None, :] - np.array(centroids)[None, ...]
l2_dist = np.linalg.norm(dist, ord=2, axis=2)
# modulate dist with heat value, see `use_detection_val`
cost = np.round(l2_dist) * 100 - heats[jid, m, None]
# pad the cost matrix, otherwise new pose are ignored
if num_valid > num_clusters:
cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)),
'constant',
constant_values=((0, 0), (0, 1e-10)))
rows, cols = linear_sum_assignment(cost)
for y, x in zip(rows, cols):
tag = tags[jid, y]
if y < num_valid and x < num_clusters and \
l2_dist[y, x] < self.tag_thresh:
key = candidates[x] # merge to cluster
else:
key = tag[0] # initialize new cluster
cluster[key]['tags'].append(tag)
cluster[key]['scores'][jid] = heats[jid, y]
cluster[key]['coords'][jid] = coords[jid, y]
# shape is [k, J, 2] and [k, J]
pose_tags = np.array([cluster[k]['tags'] for k in cluster])
pose_coords = np.array([cluster[k]['coords'] for k in cluster])
pose_scores = np.array([cluster[k]['scores'] for k in cluster])
valid = pose_scores > 0
pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32)
if valid.sum() == 0:
return pose_kpts, pose_kpts
# refine coords
valid_coords = pose_coords[valid].astype(np.int32)
y = valid_coords[..., 0].flatten()
x = valid_coords[..., 1].flatten()
_, j = np.nonzero(valid)
offsets = self.lerp(j, y, x, heatmap)
pose_coords[valid, 0] += offsets[0]
pose_coords[valid, 1] += offsets[1]
# mean score before salvage
mean_score = pose_scores.mean(axis=1)
pose_kpts[valid, 2] = pose_scores[valid]
# salvage missing joints
if True:
for pid, coords in enumerate(pose_coords):
tag_mean = np.array(pose_tags[pid]).mean(axis=0)
norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5
score = heatmap - np.round(norm) # (J, H, W)
flat_score = score.reshape(J, -1)
max_inds = np.argmax(flat_score, axis=1)
max_scores = np.max(flat_score, axis=1)
salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0)
if salvage_joints.sum() == 0:
continue
y = max_inds[salvage_joints] // W
x = max_inds[salvage_joints] % W
offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap)
y = y.astype(np.float32) + offsets[0]
x = x.astype(np.float32) + offsets[1]
pose_coords[pid][salvage_joints, 0] = y
pose_coords[pid][salvage_joints, 1] = x
pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints]
pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1],
original_height, original_width,
min(H, W))
return pose_kpts, mean_score

@ -0,0 +1,267 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import numpy as np
import math
import cv2
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ..keypoint_utils import transform_preds
from .. import layers as L
__all__ = ['TopDownHRNet']
@register
class TopDownHRNet(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self,
width,
num_joints,
backbone='HRNet',
loss='KeyPointMSELoss',
post_process='HRNetPostProcess',
flip_perm=None,
flip=True,
shift_heatmap=True,
use_dark=True):
"""
HRNet network, see https://arxiv.org/abs/1902.09212
Args:
backbone (nn.Layer): backbone instance
post_process (object): `HRNetPostProcess` instance
flip_perm (list): The left-right joints exchange order list
use_dark(bool): Whether to use DARK in post processing
"""
super(TopDownHRNet, self).__init__()
self.backbone = backbone
self.post_process = HRNetPostProcess(use_dark)
self.loss = loss
self.flip_perm = flip_perm
self.flip = flip
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
self.shift_heatmap = shift_heatmap
self.deploy = False
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
return {'backbone': backbone, }
def _forward(self):
feats = self.backbone(self.inputs)
hrnet_outputs = self.final_conv(feats[0])
if self.training:
return self.loss(hrnet_outputs, self.inputs)
elif self.deploy:
outshape = hrnet_outputs.shape
max_idx = paddle.argmax(
hrnet_outputs.reshape(
(outshape[0], outshape[1], outshape[2] * outshape[3])),
axis=-1)
return hrnet_outputs, max_idx
else:
if self.flip:
self.inputs['image'] = self.inputs['image'].flip([3])
feats = self.backbone(self.inputs)
output_flipped = self.final_conv(feats[0])
output_flipped = self.flip_back(output_flipped.numpy(),
self.flip_perm)
output_flipped = paddle.to_tensor(output_flipped.copy())
if self.shift_heatmap:
output_flipped[:, :, :, 1:] = output_flipped.clone(
)[:, :, :, 0:-1]
hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5
imshape = (self.inputs['im_shape'].numpy()
)[:, ::-1] if 'im_shape' in self.inputs else None
center = self.inputs['center'].numpy(
) if 'center' in self.inputs else np.round(imshape / 2.)
scale = self.inputs['scale'].numpy(
) if 'scale' in self.inputs else imshape / 200.
outputs = self.post_process(hrnet_outputs, center, scale)
return outputs
def get_loss(self):
return self._forward()
def get_pred(self):
res_lst = self._forward()
outputs = {'keypoint': res_lst}
return outputs
def flip_back(self, output_flipped, matched_parts):
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped
class HRNetPostProcess(object):
def __init__(self, use_dark=True):
self.use_dark = use_dark
def get_max_preds(self, heatmaps):
'''get predictions from score maps
Args:
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
'''
assert isinstance(heatmaps,
np.ndarray), 'heatmaps should be numpy.ndarray'
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = heatmaps.shape[0]
num_joints = heatmaps.shape[1]
width = heatmaps.shape[3]
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def gaussian_blur(self, heatmap, kernel):
border = (kernel - 1) // 2
batch_size = heatmap.shape[0]
num_joints = heatmap.shape[1]
height = heatmap.shape[2]
width = heatmap.shape[3]
for i in range(batch_size):
for j in range(num_joints):
origin_max = np.max(heatmap[i, j])
dr = np.zeros((height + 2 * border, width + 2 * border))
dr[border:-border, border:-border] = heatmap[i, j].copy()
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
heatmap[i, j] = dr[border:-border, border:-border].copy()
heatmap[i, j] *= origin_max / np.max(heatmap[i, j])
return heatmap
def dark_parse(self, hm, coord):
heatmap_height = hm.shape[0]
heatmap_width = hm.shape[1]
px = int(coord[0])
py = int(coord[1])
if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2:
dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1])
dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px])
dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2])
dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \
+ hm[py-1][px-1])
dyy = 0.25 * (
hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px])
derivative = np.matrix([[dx], [dy]])
hessian = np.matrix([[dxx, dxy], [dxy, dyy]])
if dxx * dyy - dxy**2 != 0:
hessianinv = hessian.I
offset = -hessianinv * derivative
offset = np.squeeze(np.array(offset.T), axis=0)
coord += offset
return coord
def dark_postprocess(self, hm, coords, kernelsize):
'''DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
'''
hm = self.gaussian_blur(hm, kernelsize)
hm = np.maximum(hm, 1e-10)
hm = np.log(hm)
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
coords[n, p] = self.dark_parse(hm[n][p], coords[n][p])
return coords
def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
"""the highest heatvalue location with a quarter offset in the
direction from the highest response to the second highest response.
Args:
heatmaps (numpy.ndarray): The predicted heatmaps
center (numpy.ndarray): The boxes center
scale (numpy.ndarray): The scale factor
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
"""
coords, maxvals = self.get_max_preds(heatmaps)
heatmap_height = heatmaps.shape[2]
heatmap_width = heatmaps.shape[3]
if self.use_dark:
coords = self.dark_postprocess(heatmaps, coords, kernelsize)
else:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
diff = np.array([
hm[py][px + 1] - hm[py][px - 1],
hm[py + 1][px] - hm[py - 1][px]
])
coords[n][p] += np.sign(diff) * .25
preds = coords.copy()
# Transform back
for i in range(coords.shape[0]):
preds[i] = transform_preds(coords[i], center[i], scale[i],
[heatmap_width, heatmap_height])
return preds, maxvals
def __call__(self, output, center, scale):
preds, maxvals = self.get_final_preds(output.numpy(), center, scale)
outputs = [[
np.concatenate(
(preds, maxvals), axis=-1), np.mean(
maxvals, axis=1)
]]
return outputs

@ -0,0 +1,135 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['MaskRCNN']
@register
class MaskRCNN(BaseArch):
"""
Mask R-CNN network, see https://arxiv.org/abs/1703.06870
Args:
backbone (object): backbone instance
rpn_head (object): `RPNHead` instance
bbox_head (object): `BBoxHead` instance
mask_head (object): `MaskHead` instance
bbox_post_process (object): `BBoxPostProcess` instance
mask_post_process (object): `MaskPostProcess` instance
neck (object): 'FPN' instance
"""
__category__ = 'architecture'
__inject__ = [
'bbox_post_process',
'mask_post_process',
]
def __init__(self,
backbone,
rpn_head,
bbox_head,
mask_head,
bbox_post_process,
mask_post_process,
neck=None):
super(MaskRCNN, self).__init__()
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.mask_head = mask_head
self.bbox_post_process = bbox_post_process
self.mask_post_process = mask_post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
out_shape = neck and out_shape or bbox_head.get_head().out_shape
kwargs = {'input_shape': out_shape}
mask_head = create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
"mask_head": mask_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
rois, rois_num = self.bbox_head.get_assigned_rois()
bbox_targets = self.bbox_head.get_assigned_targets()
# Mask Head needs bbox_feat in Mask RCNN
mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
bbox_targets, bbox_feat)
return rpn_loss, bbox_loss, mask_loss
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
im_shape, scale_factor)
mask_out = self.mask_head(
body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
# rescale the prediction back to origin image
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
im_shape, scale_factor)
origin_shape = self.bbox_post_process.get_origin_shape()
mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
bbox_num, origin_shape)
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
bbox_loss, mask_loss, rpn_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
loss.update(mask_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num, mask_pred = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
return output

@ -0,0 +1,141 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import typing
from paddlers.models.ppdet.core.workspace import register
from paddlers.models.ppdet.modeling.post_process import nms
__all__ = ['BaseArch']
@register
class BaseArch(nn.Layer):
def __init__(self, data_format='NCHW'):
super(BaseArch, self).__init__()
self.data_format = data_format
self.inputs = {}
self.fuse_norm = False
def load_meanstd(self, cfg_transform):
self.scale = 1.
self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
(1, 3, 1, 1))
self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape(
(1, 3, 1, 1))
for item in cfg_transform:
if 'NormalizeImage' in item:
self.mean = paddle.to_tensor(item['NormalizeImage'][
'mean']).reshape((1, 3, 1, 1))
self.std = paddle.to_tensor(item['NormalizeImage'][
'std']).reshape((1, 3, 1, 1))
if item['NormalizeImage'].get('is_scale', True):
self.scale = 1. / 255.
break
if self.data_format == 'NHWC':
self.mean = self.mean.reshape(1, 1, 1, 3)
self.std = self.std.reshape(1, 1, 1, 3)
def forward(self, inputs):
if self.data_format == 'NHWC':
image = inputs['image']
inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
if self.fuse_norm:
image = inputs['image']
self.inputs['image'] = (image * self.scale - self.mean) / self.std
self.inputs['im_shape'] = inputs['im_shape']
self.inputs['scale_factor'] = inputs['scale_factor']
else:
self.inputs = inputs
self.model_arch()
if self.training:
out = self.get_loss()
else:
inputs_list = []
# multi-scale input
if not isinstance(inputs, typing.Sequence):
inputs_list.append(inputs)
else:
inputs_list.extend(inputs)
outs = []
for inp in inputs_list:
self.inputs = inp
outs.append(self.get_pred())
# multi-scale test
if len(outs) > 1:
out = self.merge_multi_scale_predictions(outs)
else:
out = outs[0]
return out
def merge_multi_scale_predictions(self, outs):
# default values for architectures not included in following list
num_classes = 80
nms_threshold = 0.5
keep_top_k = 100
if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'
):
num_classes = self.bbox_head.num_classes
keep_top_k = self.bbox_post_process.nms.keep_top_k
nms_threshold = self.bbox_post_process.nms.nms_threshold
else:
raise Exception(
"Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
)
final_boxes = []
all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
for c in range(num_classes):
idxs = all_scale_outs[:, 0] == c
if np.count_nonzero(idxs) == 0:
continue
r = nms(all_scale_outs[idxs, 1:], nms_threshold)
final_boxes.append(
np.concatenate([np.full((r.shape[0], 1), c), r], 1))
out = np.concatenate(final_boxes)
out = np.concatenate(sorted(
out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
out = {
'bbox': paddle.to_tensor(out),
'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
}
return out
def build_inputs(self, data, input_def):
inputs = {}
for i, k in enumerate(input_def):
inputs[k] = data[i]
return inputs
def model_arch(self, ):
pass
def get_loss(self, ):
raise NotImplementedError("Should implement get_loss method!")
def get_pred(self, ):
raise NotImplementedError("Should implement get_pred method!")
@classmethod
def convert_sync_batchnorm(cls, layer):
layer_output = layer
if getattr(layer, 'norm_type', None) == 'sync_bn':
layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
else:
for name, sublayer in layer.named_children():
layer_output.add_sublayer(name,
cls.convert_sync_batchnorm(sublayer))
del layer
return layer_output

@ -0,0 +1,91 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['PicoDet']
@register
class PicoDet(BaseArch):
"""
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
head (object): 'PicoHead' instance
"""
__category__ = 'architecture'
def __init__(self, backbone, neck, head='PicoHead'):
super(PicoDet, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.deploy = False
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats, self.deploy)
if self.training or self.deploy:
return head_outs, None
else:
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
scale_factor)
return bboxes, bbox_num
def get_loss(self, ):
loss = {}
head_outs, _ = self._forward()
loss_gfl = self.head.get_loss(head_outs, self.inputs)
loss.update(loss_gfl)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
if self.deploy:
return {'picodet': self._forward()[0]}
else:
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

@ -0,0 +1,102 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['S2ANet']
@register
class S2ANet(BaseArch):
__category__ = 'architecture'
__inject__ = [
's2anet_head',
's2anet_bbox_post_process',
]
def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
"""
S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
Args:
backbone (object): backbone instance
neck (object): `FPN` instance
s2anet_head (object): `S2ANetHead` instance
s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
"""
super(S2ANet, self).__init__()
self.backbone = backbone
self.neck = neck
self.s2anet_head = s2anet_head
self.s2anet_bbox_post_process = s2anet_bbox_post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
s2anet_head = create(cfg['s2anet_head'], **kwargs)
s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
**kwargs)
return {
'backbone': backbone,
'neck': neck,
"s2anet_head": s2anet_head,
"s2anet_bbox_post_process": s2anet_bbox_post_process,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
self.s2anet_head(body_feats)
if self.training:
loss = self.s2anet_head.get_loss(self.inputs)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
else:
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
nms_pre = self.s2anet_bbox_post_process.nms_pre
pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
# post_process
pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
pred_bboxes)
# rescale the prediction back to origin image
pred_bboxes = self.s2anet_bbox_post_process.get_pred(
pred_bboxes, bbox_num, im_shape, scale_factor)
# output
output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
return output
def get_loss(self, ):
loss = self._forward()
return loss
def get_pred(self):
output = self._forward()
return output

@ -0,0 +1,110 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['SOLOv2']
@register
class SOLOv2(BaseArch):
"""
SOLOv2 network, see https://arxiv.org/abs/2003.10152
Args:
backbone (object): an backbone instance
solov2_head (object): an `SOLOv2Head` instance
mask_head (object): an `SOLOv2MaskHead` instance
neck (object): neck of network, such as feature pyramid network instance
"""
__category__ = 'architecture'
def __init__(self, backbone, solov2_head, mask_head, neck=None):
super(SOLOv2, self).__init__()
self.backbone = backbone
self.neck = neck
self.solov2_head = solov2_head
self.mask_head = mask_head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
solov2_head = create(cfg['solov2_head'], **kwargs)
mask_head = create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'solov2_head': solov2_head,
'mask_head': mask_head,
}
def model_arch(self):
body_feats = self.backbone(self.inputs)
body_feats = self.neck(body_feats)
self.seg_pred = self.mask_head(body_feats)
self.cate_pred_list, self.kernel_pred_list = self.solov2_head(
body_feats)
def get_loss(self, ):
loss = {}
# get gt_ins_labels, gt_cate_labels, etc.
gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], []
fg_num = self.inputs['fg_num']
for i in range(len(self.solov2_head.seg_num_grids)):
ins_label = 'ins_label{}'.format(i)
if ins_label in self.inputs:
gt_ins_labels.append(self.inputs[ins_label])
cate_label = 'cate_label{}'.format(i)
if cate_label in self.inputs:
gt_cate_labels.append(self.inputs[cate_label])
grid_order = 'grid_order{}'.format(i)
if grid_order in self.inputs:
gt_grid_orders.append(self.inputs[grid_order])
loss_solov2 = self.solov2_head.get_loss(
self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num)
loss.update(loss_solov2)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction(
self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
self.inputs['im_shape'], self.inputs['scale_factor'])
outs = {
"segm": seg_masks,
"bbox_num": bbox_num,
'cate_label': cate_labels,
'cate_score': cate_scores
}
return outs

@ -0,0 +1,99 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ["SparseRCNN"]
@register
class SparseRCNN(BaseArch):
__category__ = 'architecture'
__inject__ = ["postprocess"]
def __init__(self,
backbone,
neck,
head="SparsercnnHead",
postprocess="SparsePostProcess"):
super(SparseRCNN, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.postprocess = postprocess
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'roi_input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats, self.inputs["img_whwh"])
if not self.training:
bboxes = self.postprocess(
head_outs["pred_logits"], head_outs["pred_boxes"],
self.inputs["scale_factor_wh"], self.inputs["img_whwh"])
return bboxes
else:
return head_outs
def get_loss(self):
batch_gt_class = self.inputs["gt_class"]
batch_gt_box = self.inputs["gt_bbox"]
batch_whwh = self.inputs["img_whwh"]
targets = []
for i in range(len(batch_gt_class)):
boxes = batch_gt_box[i]
labels = batch_gt_class[i].squeeze(-1)
img_whwh = batch_whwh[i]
img_whwh_tgt = img_whwh.unsqueeze(0).tile([int(boxes.shape[0]), 1])
targets.append({
"boxes": boxes,
"labels": labels,
"img_whwh": img_whwh,
"img_whwh_tgt": img_whwh_tgt
})
outputs = self._forward()
loss_dict = self.head.get_loss(outputs, targets)
acc = loss_dict["acc"]
loss_dict.pop("acc")
total_loss = sum(loss_dict.values())
loss_dict.update({"loss": total_loss, "acc": acc})
return loss_dict
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

@ -0,0 +1,93 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['SSD']
@register
class SSD(BaseArch):
"""
Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
Args:
backbone (nn.Layer): backbone instance
ssd_head (nn.Layer): `SSDHead` instance
post_process (object): `BBoxPostProcess` instance
"""
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self, backbone, ssd_head, post_process, r34_backbone=False):
super(SSD, self).__init__()
self.backbone = backbone
self.ssd_head = ssd_head
self.post_process = post_process
self.r34_backbone = r34_backbone
if self.r34_backbone:
from paddlers.models.ppdet.modeling.backbones.resnet import ResNet
assert isinstance(self.backbone, ResNet) and \
self.backbone.depth == 34, \
"If you set r34_backbone=True, please use ResNet-34 as backbone."
self.backbone.res_layers[2].blocks[
0].branch2a.conv._stride = [1, 1]
self.backbone.res_layers[2].blocks[0].short.conv._stride = [1, 1]
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# head
kwargs = {'input_shape': backbone.out_shape}
ssd_head = create(cfg['ssd_head'], **kwargs)
return {
'backbone': backbone,
"ssd_head": ssd_head,
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
# SSD Head
if self.training:
return self.ssd_head(body_feats, self.inputs['image'],
self.inputs['gt_bbox'],
self.inputs['gt_class'])
else:
preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
bbox, bbox_num = self.post_process(preds, anchors,
self.inputs['im_shape'],
self.inputs['scale_factor'])
return bbox, bbox_num
def get_loss(self, ):
return {"loss": self._forward()}
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

@ -0,0 +1,78 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['TOOD']
@register
class TOOD(BaseArch):
"""
TOOD: Task-aligned One-stage Object Detection, see https://arxiv.org/abs/2108.07755
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): 'FPN' instance
head (nn.Layer): 'TOODHead' instance
"""
__category__ = 'architecture'
def __init__(self, backbone, neck, head):
super(TOOD, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats)
if not self.training:
bboxes, bbox_num = self.head.post_process(
head_outs, self.inputs['im_shape'],
self.inputs['scale_factor'])
return bboxes, bbox_num
else:
loss = self.head.get_loss(head_outs, self.inputs)
return loss
def get_loss(self):
return self._forward()
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

@ -0,0 +1,98 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['TTFNet']
@register
class TTFNet(BaseArch):
"""
TTFNet network, see https://arxiv.org/abs/1909.00700
Args:
backbone (object): backbone instance
neck (object): 'TTFFPN' instance
ttf_head (object): 'TTFHead' instance
post_process (object): 'BBoxPostProcess' instance
"""
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self,
backbone='DarkNet',
neck='TTFFPN',
ttf_head='TTFHead',
post_process='BBoxPostProcess'):
super(TTFNet, self).__init__()
self.backbone = backbone
self.neck = neck
self.ttf_head = ttf_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
ttf_head = create(cfg['ttf_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"ttf_head": ttf_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
body_feats = self.neck(body_feats)
hm, wh = self.ttf_head(body_feats)
if self.training:
return hm, wh
else:
bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
self.inputs['scale_factor'])
return bbox, bbox_num
def get_loss(self, ):
loss = {}
heatmap = self.inputs['ttf_heatmap']
box_target = self.inputs['ttf_box_target']
reg_weight = self.inputs['ttf_reg_weight']
hm, wh = self._forward()
head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
reg_weight)
loss.update(head_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

@ -0,0 +1,124 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddlers.models.ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ..post_process import JDEBBoxPostProcess
__all__ = ['YOLOv3']
@register
class YOLOv3(BaseArch):
__category__ = 'architecture'
__shared__ = ['data_format']
__inject__ = ['post_process']
def __init__(self,
backbone='DarkNet',
neck='YOLOv3FPN',
yolo_head='YOLOv3Head',
post_process='BBoxPostProcess',
data_format='NCHW',
for_mot=False):
"""
YOLOv3 network, see https://arxiv.org/abs/1804.02767
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
yolo_head (nn.Layer): anchor_head instance
bbox_post_process (object): `BBoxPostProcess` instance
data_format (str): data format, NCHW or NHWC
for_mot (bool): whether return other features for multi-object tracking
models, default False in pure object detection models.
"""
super(YOLOv3, self).__init__(data_format=data_format)
self.backbone = backbone
self.neck = neck
self.yolo_head = yolo_head
self.post_process = post_process
self.for_mot = for_mot
self.return_idx = isinstance(post_process, JDEBBoxPostProcess)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
yolo_head = create(cfg['yolo_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"yolo_head": yolo_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
neck_feats = self.neck(body_feats, self.for_mot)
if isinstance(neck_feats, dict):
assert self.for_mot == True
emb_feats = neck_feats['emb_feats']
neck_feats = neck_feats['yolo_feats']
if self.training:
yolo_losses = self.yolo_head(neck_feats, self.inputs)
if self.for_mot:
return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
else:
return yolo_losses
else:
yolo_head_outs = self.yolo_head(neck_feats)
if self.for_mot:
boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors)
output = {
'bbox': bbox,
'bbox_num': bbox_num,
'boxes_idx': boxes_idx,
'nms_keep_idx': nms_keep_idx,
'emb_feats': emb_feats,
}
else:
if self.return_idx:
_, bbox, bbox_num, _ = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors)
else:
bbox, bbox_num = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

@ -0,0 +1,23 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import utils
from . import task_aligned_assigner
from . import atss_assigner
from . import simota_assigner
from .utils import *
from .task_aligned_assigner import *
from .atss_assigner import *
from .simota_assigner import *

@ -0,0 +1,211 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
from ..ops import iou_similarity
from ..bbox_utils import bbox_center
from .utils import (pad_gt, check_points_inside_bboxes, compute_max_iou_anchor,
compute_max_iou_gt)
@register
class ATSSAssigner(nn.Layer):
"""Bridging the Gap Between Anchor-based and Anchor-free Detection
via Adaptive Training Sample Selection
"""
__shared__ = ['num_classes']
def __init__(self,
topk=9,
num_classes=80,
force_gt_matching=False,
eps=1e-9):
super(ATSSAssigner, self).__init__()
self.topk = topk
self.num_classes = num_classes
self.force_gt_matching = force_gt_matching
self.eps = eps
def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
pad_gt_mask):
pad_gt_mask = pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)
gt2anchor_distances_list = paddle.split(
gt2anchor_distances, num_anchors_list, axis=-1)
num_anchors_index = np.cumsum(num_anchors_list).tolist()
num_anchors_index = [0, ] + num_anchors_index[:-1]
is_in_topk_list = []
topk_idxs_list = []
for distances, anchors_index in zip(gt2anchor_distances_list,
num_anchors_index):
num_anchors = distances.shape[-1]
topk_metrics, topk_idxs = paddle.topk(
distances, self.topk, axis=-1, largest=False)
topk_idxs_list.append(topk_idxs + anchors_index)
topk_idxs = paddle.where(pad_gt_mask, topk_idxs,
paddle.zeros_like(topk_idxs))
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
is_in_topk = paddle.where(is_in_topk > 1,
paddle.zeros_like(is_in_topk),
is_in_topk)
is_in_topk_list.append(
is_in_topk.astype(gt2anchor_distances.dtype))
is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
return is_in_topk_list, topk_idxs_list
@paddle.no_grad()
def forward(self,
anchor_bboxes,
num_anchors_list,
gt_labels,
gt_bboxes,
bg_index,
gt_scores=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
The assignment is done in following steps
1. compute iou between all bbox (bbox of all pyramid levels) and gt
2. compute center distance between all bbox and gt
3. on each pyramid level, for each gt, select k bbox whose center
are closest to the gt center, so we total select k*l bbox as
candidates for each gt
4. get corresponding iou for the these candidates, and compute the
mean and std, set mean + std as the iou threshold
5. select these candidates whose iou are greater than or equal to
the threshold as positive
6. limit the positive sample's center in gt
7. if an anchor box is assigned to multiple gts, the one with the
highest iou will be selected.
Args:
anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
"xmin, xmax, ymin, ymax" format
num_anchors_list (List): num of anchors in each level
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
bg_index (int): background index
gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
shape(B, n, 1), if None, then it will initialize with one_hot label
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
assigned_scores (Tensor): (B, L, C)
"""
gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
gt_labels, gt_bboxes, gt_scores)
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
num_anchors, _ = anchor_bboxes.shape
batch_size, num_max_boxes, _ = gt_bboxes.shape
# negative batch
if num_max_boxes == 0:
assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, self.num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# 1. compute iou between gt and anchor bbox, [B, n, L]
ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
ious = ious.reshape([batch_size, -1, num_anchors])
# 2. compute center distance between all anchors and gt, [B, n, L]
gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
anchor_centers = bbox_center(anchor_bboxes)
gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \
.norm(2, axis=-1).reshape([batch_size, -1, num_anchors])
# 3. on each pyramid level, selecting topk closest candidates
# based on the center distance, [B, n, L]
is_in_topk, topk_idxs = self._gather_topk_pyramid(
gt2anchor_distances, num_anchors_list, pad_gt_mask)
# 4. get corresponding iou for the these candidates, and compute the
# mean and std, 5. set mean + std as the iou threshold
iou_candidates = ious * is_in_topk
iou_threshold = paddle.index_sample(
iou_candidates.flatten(stop_axis=-2),
topk_idxs.flatten(stop_axis=-2))
iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
iou_threshold.std(axis=-1, keepdim=True)
is_in_topk = paddle.where(
iou_candidates > iou_threshold.tile([1, 1, num_anchors]),
is_in_topk, paddle.zeros_like(is_in_topk))
# 6. check the positive sample's center in gt, [B, n, L]
is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
# select positive sample, [B, n, L]
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
# 7. if an anchor box is assigned to multiple gts,
# the one with the highest iou will be selected.
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
[1, num_max_boxes, 1])
is_max_iou = compute_max_iou_anchor(ious)
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
# 8. make sure every gt_bbox matches the anchor
if self.force_gt_matching:
is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile(
[1, num_max_boxes, 1])
mask_positive = paddle.where(mask_max_iou, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
assert mask_positive_sum.max() == 1, \
("one anchor just assign one gt, but received not equals 1. "
"Received: %f" % mask_positive_sum.max().item())
# assigned target
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
mask_positive_sum > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_bboxes = paddle.gather(
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
assigned_scores = F.one_hot(assigned_labels, self.num_classes)
if gt_scores is not None:
gather_scores = paddle.gather(
pad_gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
gather_scores = gather_scores.reshape([batch_size, num_anchors])
gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
paddle.zeros_like(gather_scores))
assigned_scores *= gather_scores.unsqueeze(-1)
return assigned_labels, assigned_bboxes, assigned_scores

@ -0,0 +1,262 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/sim_ota_assigner.py
import paddle
import numpy as np
import paddle.nn.functional as F
from paddlers.models.ppdet.modeling.losses.varifocal_loss import varifocal_loss
from paddlers.models.ppdet.modeling.bbox_utils import batch_bbox_overlaps
from paddlers.models.ppdet.core.workspace import register
@register
class SimOTAAssigner(object):
"""Computes matching between predictions and ground truth.
Args:
center_radius (int | float, optional): Ground truth center size
to judge whether a prior is in center. Default 2.5.
candidate_topk (int, optional): The candidate top-k which used to
get top-k ious to calculate dynamic-k. Default 10.
iou_weight (int | float, optional): The scale factor for regression
iou cost. Default 3.0.
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
num_classes (int): The num_classes of dataset.
use_vfl (int): Whether to use varifocal_loss when calculating the cost matrix.
"""
__shared__ = ['num_classes']
def __init__(self,
center_radius=2.5,
candidate_topk=10,
iou_weight=3.0,
cls_weight=1.0,
num_classes=80,
use_vfl=True):
self.center_radius = center_radius
self.candidate_topk = candidate_topk
self.iou_weight = iou_weight
self.cls_weight = cls_weight
self.num_classes = num_classes
self.use_vfl = use_vfl
def get_in_gt_and_in_center_info(self, flatten_center_and_stride,
gt_bboxes):
num_gt = gt_bboxes.shape[0]
flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile(
[1, num_gt])
flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile(
[1, num_gt])
flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile(
[1, num_gt])
flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile(
[1, num_gt])
# is prior centers in gt bboxes, shape: [n_center, n_gt]
l_ = flatten_x - gt_bboxes[:, 0]
t_ = flatten_y - gt_bboxes[:, 1]
r_ = gt_bboxes[:, 2] - flatten_x
b_ = gt_bboxes[:, 3] - flatten_y
deltas = paddle.stack([l_, t_, r_, b_], axis=1)
is_in_gts = deltas.min(axis=1) > 0
is_in_gts_all = is_in_gts.sum(axis=1) > 0
# is prior centers in gt centers
gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x
ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y
ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x
ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y
cl_ = flatten_x - ct_bound_l
ct_ = flatten_y - ct_bound_t
cr_ = ct_bound_r - flatten_x
cb_ = ct_bound_b - flatten_y
ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1)
is_in_cts = ct_deltas.min(axis=1) > 0
is_in_cts_all = is_in_cts.sum(axis=1) > 0
# in any of gts or gt centers, shape: [n_center]
is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all,
is_in_cts_all)
is_in_gts_or_centers_all_inds = paddle.nonzero(
is_in_gts_or_centers_all).squeeze(1)
# both in gts and gt centers, shape: [num_fg, num_gt]
is_in_gts_and_centers = paddle.logical_and(
paddle.gather(
is_in_gts.cast('int'), is_in_gts_or_centers_all_inds,
axis=0).cast('bool'),
paddle.gather(
is_in_cts.cast('int'), is_in_gts_or_centers_all_inds,
axis=0).cast('bool'))
return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
match_matrix = np.zeros_like(cost_matrix.numpy())
# select candidate topk ious for dynamic-k calculation
topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0)
# calculate dynamic k for each gt
dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
for gt_idx in range(num_gt):
_, pos_idx = paddle.topk(
cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)
match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0
del topk_ious, dynamic_ks, pos_idx
# match points more than two gts
extra_match_gts_mask = match_matrix.sum(1) > 1
if extra_match_gts_mask.sum() > 0:
cost_matrix = cost_matrix.numpy()
cost_argmin = np.argmin(
cost_matrix[extra_match_gts_mask, :], axis=1)
match_matrix[extra_match_gts_mask, :] *= 0.0
match_matrix[extra_match_gts_mask, cost_argmin] = 1.0
# get foreground mask
match_fg_mask_inmatrix = match_matrix.sum(1) > 0
match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1)
return match_gt_inds_to_fg, match_fg_mask_inmatrix
def get_sample(self, assign_gt_inds, gt_bboxes):
pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
if gt_bboxes.size == 0:
# hack for index error case
assert pos_assigned_gt_inds.size == 0
pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
else:
if len(gt_bboxes.shape) < 2:
gt_bboxes = gt_bboxes.resize(-1, 4)
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
def __call__(self,
flatten_cls_pred_scores,
flatten_center_and_stride,
flatten_bboxes,
gt_bboxes,
gt_labels,
eps=1e-7):
"""Assign gt to priors using SimOTA.
TODO: add comment.
Returns:
assign_result: The assigned result.
"""
num_gt = gt_bboxes.shape[0]
num_bboxes = flatten_bboxes.shape[0]
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
label_weight = np.ones([num_bboxes], dtype=np.float32)
bbox_target = np.zeros_like(flatten_center_and_stride)
return 0, label, label_weight, bbox_target
is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(
flatten_center_and_stride, gt_bboxes)
# bboxes and scores to calculate matrix
valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds]
valid_cls_pred_scores = flatten_cls_pred_scores[
is_in_gts_or_centers_all_inds]
num_valid_bboxes = valid_flatten_bboxes.shape[0]
pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes,
gt_bboxes) # [num_points,num_gts]
if self.use_vfl:
gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile(
[num_valid_bboxes, 1]).reshape([-1])
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
[1, num_gt, 1]).reshape([-1, self.num_classes])
vfl_score = np.zeros(valid_pred_scores.shape)
vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy(
)] = pairwise_ious.reshape([-1])
vfl_score = paddle.to_tensor(vfl_score)
losses_vfl = varifocal_loss(
valid_pred_scores, vfl_score,
use_sigmoid=False).reshape([num_valid_bboxes, num_gt])
losses_giou = batch_bbox_overlaps(
valid_flatten_bboxes, gt_bboxes, mode='giou')
cost_matrix = (
losses_vfl * self.cls_weight + losses_giou * self.iou_weight +
paddle.logical_not(is_in_boxes_and_center).cast('float32') *
100000000)
else:
iou_cost = -paddle.log(pairwise_ious + eps)
gt_onehot_label = (F.one_hot(
gt_labels.squeeze(-1).cast(paddle.int64),
flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze(0)
.tile([num_valid_bboxes, 1, 1]))
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
[1, num_gt, 1])
cls_cost = F.binary_cross_entropy(
valid_pred_scores, gt_onehot_label, reduction='none').sum(-1)
cost_matrix = (
cls_cost * self.cls_weight + iou_cost * self.iou_weight +
paddle.logical_not(is_in_boxes_and_center).cast('float32') *
100000000)
match_gt_inds_to_fg, match_fg_mask_inmatrix = \
self.dynamic_k_matching(
cost_matrix, pairwise_ious, num_gt)
# sample and assign results
assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64)
match_fg_mask_inall = np.zeros_like(assigned_gt_inds)
match_fg_mask_inall[is_in_gts_or_centers_all.numpy(
)] = match_fg_mask_inmatrix
assigned_gt_inds[match_fg_mask_inall.astype(
np.bool)] = match_gt_inds_to_fg + 1
pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \
= self.get_sample(assigned_gt_inds, gt_bboxes.numpy())
bbox_target = np.zeros_like(flatten_bboxes)
bbox_weight = np.zeros_like(flatten_bboxes)
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
label_weight = np.zeros([num_bboxes], dtype=np.float32)
if len(pos_inds) > 0:
gt_labels = gt_labels.numpy()
pos_bbox_targets = pos_gt_bboxes
bbox_target[pos_inds, :] = pos_bbox_targets
bbox_weight[pos_inds, :] = 1.0
if not np.any(gt_labels):
label[pos_inds] = 0
else:
label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds]
label_weight[pos_inds] = 1.0
if len(neg_inds) > 0:
label_weight[neg_inds] = 1.0
pos_num = max(pos_inds.size, 1)
return pos_num, label, label_weight, bbox_target

@ -0,0 +1,158 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
from ..bbox_utils import iou_similarity
from .utils import (pad_gt, gather_topk_anchors, check_points_inside_bboxes,
compute_max_iou_anchor)
@register
class TaskAlignedAssigner(nn.Layer):
"""TOOD: Task-aligned One-stage Object Detection
"""
def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
super(TaskAlignedAssigner, self).__init__()
self.topk = topk
self.alpha = alpha
self.beta = beta
self.eps = eps
@paddle.no_grad()
def forward(self,
pred_scores,
pred_bboxes,
anchor_points,
gt_labels,
gt_bboxes,
bg_index,
gt_scores=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
The assignment is done in following steps
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
2. select top-k bbox as candidates for each gt
3. limit the positive sample's center in gt (because the anchor-free detector
only can predict positive distance)
4. if an anchor box is assigned to multiple gts, the one with the
highest iou will be selected.
Args:
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
bg_index (int): background index
gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
shape(B, n, 1), if None, then it will initialize with one_hot label
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
assigned_scores (Tensor): (B, L, C)
"""
assert pred_scores.ndim == pred_bboxes.ndim
gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
gt_labels, gt_bboxes, gt_scores)
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
batch_size, num_anchors, num_classes = pred_scores.shape
_, num_max_boxes, _ = gt_bboxes.shape
# negative batch
if num_max_boxes == 0:
assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# compute iou between gt and pred bbox, [B, n, L]
ious = iou_similarity(gt_bboxes, pred_bboxes)
# gather pred bboxes class score
pred_scores = pred_scores.transpose([0, 2, 1])
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
gt_labels_ind = paddle.stack(
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
axis=-1)
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
# compute alignment metrics, [B, n, L]
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
self.beta)
# check the positive sample's center in gt, [B, n, L]
is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
# select topk largest alignment metrics pred bbox as candidates
# for each gt, [B, n, L]
is_in_topk = gather_topk_anchors(
alignment_metrics * is_in_gts,
self.topk,
topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool))
# select positive sample, [B, n, L]
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
# if an anchor box is assigned to multiple gts,
# the one with the highest iou will be selected, [B, n, L]
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
[1, num_max_boxes, 1])
is_max_iou = compute_max_iou_anchor(ious)
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
assert mask_positive_sum.max() == 1, \
("one anchor just assign one gt, but received not equals 1. "
"Received: %f" % mask_positive_sum.max().item())
# assigned target
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
mask_positive_sum > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_bboxes = paddle.gather(
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
assigned_scores = F.one_hot(assigned_labels, num_classes)
# rescale alignment metrics
alignment_metrics *= mask_positive
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
keepdim=True)
alignment_metrics = alignment_metrics / (
max_metrics_per_instance + self.eps) * max_ious_per_instance
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
return assigned_labels, assigned_bboxes, assigned_scores

@ -0,0 +1,195 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn.functional as F
__all__ = [
'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes',
'compute_max_iou_anchor', 'compute_max_iou_gt',
'generate_anchors_for_grid_cell'
]
def pad_gt(gt_labels, gt_bboxes, gt_scores=None):
r""" Pad 0 in gt_labels and gt_bboxes.
Args:
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes,
shape is [B, n, 1] or [[n_1, 1], [n_2, 1], ...], here n = sum(n_i)
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes,
shape is [B, n, 4] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
gt_scores (Tensor|List[Tensor]|None, float32): Score of gt_bboxes,
shape is [B, n, 1] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
Returns:
pad_gt_labels (Tensor, int64): shape[B, n, 1]
pad_gt_bboxes (Tensor, float32): shape[B, n, 4]
pad_gt_scores (Tensor, float32): shape[B, n, 1]
pad_gt_mask (Tensor, float32): shape[B, n, 1], 1 means bbox, 0 means no bbox
"""
if isinstance(gt_labels, paddle.Tensor) and isinstance(gt_bboxes,
paddle.Tensor):
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
pad_gt_mask = (
gt_bboxes.sum(axis=-1, keepdim=True) > 0).astype(gt_bboxes.dtype)
if gt_scores is None:
gt_scores = pad_gt_mask.clone()
assert gt_labels.ndim == gt_scores.ndim
return gt_labels, gt_bboxes, gt_scores, pad_gt_mask
elif isinstance(gt_labels, list) and isinstance(gt_bboxes, list):
assert len(gt_labels) == len(gt_bboxes), \
'The number of `gt_labels` and `gt_bboxes` is not equal. '
num_max_boxes = max([len(a) for a in gt_bboxes])
batch_size = len(gt_bboxes)
# pad label and bbox
pad_gt_labels = paddle.zeros(
[batch_size, num_max_boxes, 1], dtype=gt_labels[0].dtype)
pad_gt_bboxes = paddle.zeros(
[batch_size, num_max_boxes, 4], dtype=gt_bboxes[0].dtype)
pad_gt_scores = paddle.zeros(
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
pad_gt_mask = paddle.zeros(
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
for i, (label, bbox) in enumerate(zip(gt_labels, gt_bboxes)):
if len(label) > 0 and len(bbox) > 0:
pad_gt_labels[i, :len(label)] = label
pad_gt_bboxes[i, :len(bbox)] = bbox
pad_gt_mask[i, :len(bbox)] = 1.
if gt_scores is not None:
pad_gt_scores[i, :len(gt_scores[i])] = gt_scores[i]
if gt_scores is None:
pad_gt_scores = pad_gt_mask.clone()
return pad_gt_labels, pad_gt_bboxes, pad_gt_scores, pad_gt_mask
else:
raise ValueError('The input `gt_labels` or `gt_bboxes` is invalid! ')
def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
r"""
Args:
metrics (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
topk (int): The number of top elements to look for along the axis.
largest (bool) : largest is a flag, if set to true,
algorithm will sort by descending order, otherwise sort by
ascending order. Default: True
topk_mask (Tensor, bool|None): shape[B, n, topk], ignore bbox mask,
Default: None
eps (float): Default: 1e-9
Returns:
is_in_topk (Tensor, float32): shape[B, n, L], value=1. means selected
"""
num_anchors = metrics.shape[-1]
topk_metrics, topk_idxs = paddle.topk(
metrics, topk, axis=-1, largest=largest)
if topk_mask is None:
topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > eps).tile(
[1, 1, topk])
topk_idxs = paddle.where(topk_mask, topk_idxs,
paddle.zeros_like(topk_idxs))
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
is_in_topk = paddle.where(is_in_topk > 1,
paddle.zeros_like(is_in_topk), is_in_topk)
return is_in_topk.astype(metrics.dtype)
def check_points_inside_bboxes(points, bboxes, eps=1e-9):
r"""
Args:
points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
eps (float): Default: 1e-9
Returns:
is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
"""
points = points.unsqueeze([0, 1])
x, y = points.chunk(2, axis=-1)
xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
l = x - xmin
t = y - ymin
r = xmax - x
b = ymax - y
bbox_ltrb = paddle.concat([l, t, r, b], axis=-1)
return (bbox_ltrb.min(axis=-1) > eps).astype(bboxes.dtype)
def compute_max_iou_anchor(ious):
r"""
For each anchor, find the GT with the largest IOU.
Args:
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
Returns:
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
"""
num_max_boxes = ious.shape[-2]
max_iou_index = ious.argmax(axis=-2)
is_max_iou = F.one_hot(max_iou_index, num_max_boxes).transpose([0, 2, 1])
return is_max_iou.astype(ious.dtype)
def compute_max_iou_gt(ious):
r"""
For each GT, find the anchor with the largest IOU.
Args:
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
Returns:
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
"""
num_anchors = ious.shape[-1]
max_iou_index = ious.argmax(axis=-1)
is_max_iou = F.one_hot(max_iou_index, num_anchors)
return is_max_iou.astype(ious.dtype)
def generate_anchors_for_grid_cell(feats,
fpn_strides,
grid_cell_size=5.0,
grid_cell_offset=0.5):
r"""
Like ATSS, generate anchors based on grid size.
Args:
feats (List[Tensor]): shape[s, (b, c, h, w)]
fpn_strides (tuple|list): shape[s], stride for each scale feature
grid_cell_size (float): anchor size
grid_cell_offset (float): The range is between 0 and 1.
Returns:
anchors (List[Tensor]): shape[s, (l, 4)]
num_anchors_list (List[int]): shape[s]
stride_tensor_list (List[Tensor]): shape[s, (l, 1)]
"""
assert len(feats) == len(fpn_strides)
anchors = []
num_anchors_list = []
stride_tensor_list = []
for feat, stride in zip(feats, fpn_strides):
_, _, h, w = feat.shape
cell_half_size = grid_cell_size * stride * 0.5
shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride
shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride
shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
anchor = paddle.stack(
[
shift_x - cell_half_size, shift_y - cell_half_size,
shift_x + cell_half_size, shift_y + cell_half_size
],
axis=-1).astype(feat.dtype)
anchors.append(anchor.reshape([-1, 4]))
num_anchors_list.append(len(anchors[-1]))
stride_tensor_list.append(
paddle.full([num_anchors_list[-1], 1], stride))
return anchors, num_anchors_list, stride_tensor_list

@ -0,0 +1,49 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import vgg
from . import resnet
from . import darknet
from . import mobilenet_v1
from . import mobilenet_v3
from . import hrnet
from . import lite_hrnet
from . import blazenet
from . import ghostnet
from . import senet
from . import res2net
from . import dla
from . import shufflenet_v2
from . import swin_transformer
from . import lcnet
from . import hardnet
from . import esnet
from .vgg import *
from .resnet import *
from .darknet import *
from .mobilenet_v1 import *
from .mobilenet_v3 import *
from .hrnet import *
from .lite_hrnet import *
from .blazenet import *
from .ghostnet import *
from .senet import *
from .res2net import *
from .dla import *
from .shufflenet_v2 import *
from .swin_transformer import *
from .lcnet import *
from .hardnet import *
from .esnet import *

@ -0,0 +1,320 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import KaimingNormal
from paddlers.models.ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
__all__ = ['BlazeNet']
def hard_swish(x):
return x * F.relu6(x + 3) / 6.
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
num_groups=1,
act='relu',
conv_lr=0.1,
conv_decay=0.,
norm_decay=0.,
norm_type='bn',
name=None):
super(ConvBNLayer, self).__init__()
self.act = act
self._conv = nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(
learning_rate=conv_lr, initializer=KaimingNormal()),
bias_attr=False)
if norm_type in ['bn', 'sync_bn']:
self._batch_norm = nn.BatchNorm2D(out_channels)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
if self.act == "relu":
x = F.relu(x)
elif self.act == "relu6":
x = F.relu6(x)
elif self.act == 'leaky':
x = F.leaky_relu(x)
elif self.act == 'hard_swish':
x = hard_swish(x)
return x
class BlazeBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels1,
out_channels2,
double_channels=None,
stride=1,
use_5x5kernel=True,
act='relu',
name=None):
super(BlazeBlock, self).__init__()
assert stride in [1, 2]
self.use_pool = not stride == 1
self.use_double_block = double_channels is not None
self.conv_dw = []
if use_5x5kernel:
self.conv_dw.append(
self.add_sublayer(
name + "1_dw",
ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels1,
kernel_size=5,
stride=stride,
padding=2,
num_groups=out_channels1,
name=name + "1_dw")))
else:
self.conv_dw.append(
self.add_sublayer(
name + "1_dw_1",
ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels1,
kernel_size=3,
stride=1,
padding=1,
num_groups=out_channels1,
name=name + "1_dw_1")))
self.conv_dw.append(
self.add_sublayer(
name + "1_dw_2",
ConvBNLayer(
in_channels=out_channels1,
out_channels=out_channels1,
kernel_size=3,
stride=stride,
padding=1,
num_groups=out_channels1,
name=name + "1_dw_2")))
self.act = act if self.use_double_block else None
self.conv_pw = ConvBNLayer(
in_channels=out_channels1,
out_channels=out_channels2,
kernel_size=1,
stride=1,
padding=0,
act=self.act,
name=name + "1_sep")
if self.use_double_block:
self.conv_dw2 = []
if use_5x5kernel:
self.conv_dw2.append(
self.add_sublayer(
name + "2_dw",
ConvBNLayer(
in_channels=out_channels2,
out_channels=out_channels2,
kernel_size=5,
stride=1,
padding=2,
num_groups=out_channels2,
name=name + "2_dw")))
else:
self.conv_dw2.append(
self.add_sublayer(
name + "2_dw_1",
ConvBNLayer(
in_channels=out_channels2,
out_channels=out_channels2,
kernel_size=3,
stride=1,
padding=1,
num_groups=out_channels2,
name=name + "1_dw_1")))
self.conv_dw2.append(
self.add_sublayer(
name + "2_dw_2",
ConvBNLayer(
in_channels=out_channels2,
out_channels=out_channels2,
kernel_size=3,
stride=1,
padding=1,
num_groups=out_channels2,
name=name + "2_dw_2")))
self.conv_pw2 = ConvBNLayer(
in_channels=out_channels2,
out_channels=double_channels,
kernel_size=1,
stride=1,
padding=0,
name=name + "2_sep")
# shortcut
if self.use_pool:
shortcut_channel = double_channels or out_channels2
self._shortcut = []
self._shortcut.append(
self.add_sublayer(
name + '_shortcut_pool',
nn.MaxPool2D(
kernel_size=stride, stride=stride, ceil_mode=True)))
self._shortcut.append(
self.add_sublayer(
name + '_shortcut_conv',
ConvBNLayer(
in_channels=in_channels,
out_channels=shortcut_channel,
kernel_size=1,
stride=1,
padding=0,
name="shortcut" + name)))
def forward(self, x):
y = x
for conv_dw_block in self.conv_dw:
y = conv_dw_block(y)
y = self.conv_pw(y)
if self.use_double_block:
for conv_dw2_block in self.conv_dw2:
y = conv_dw2_block(y)
y = self.conv_pw2(y)
if self.use_pool:
for shortcut in self._shortcut:
x = shortcut(x)
return F.relu(paddle.add(x, y))
@register
@serializable
class BlazeNet(nn.Layer):
"""
BlazeFace, see https://arxiv.org/abs/1907.05047
Args:
blaze_filters (list): number of filter for each blaze block.
double_blaze_filters (list): number of filter for each double_blaze block.
use_5x5kernel (bool): whether or not filter size is 5x5 in depth-wise conv.
"""
def __init__(self,
blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48],
[48, 48]],
double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96],
[96, 24, 96], [96, 24, 96, 2],
[96, 24, 96], [96, 24, 96]],
use_5x5kernel=True,
act=None):
super(BlazeNet, self).__init__()
conv1_num_filters = blaze_filters[0][0]
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=conv1_num_filters,
kernel_size=3,
stride=2,
padding=1,
name="conv1")
in_channels = conv1_num_filters
self.blaze_block = []
self._out_channels = []
for k, v in enumerate(blaze_filters):
assert len(v) in [2, 3], \
"blaze_filters {} not in [2, 3]"
if len(v) == 2:
self.blaze_block.append(
self.add_sublayer(
'blaze_{}'.format(k),
BlazeBlock(
in_channels,
v[0],
v[1],
use_5x5kernel=use_5x5kernel,
act=act,
name='blaze_{}'.format(k))))
elif len(v) == 3:
self.blaze_block.append(
self.add_sublayer(
'blaze_{}'.format(k),
BlazeBlock(
in_channels,
v[0],
v[1],
stride=v[2],
use_5x5kernel=use_5x5kernel,
act=act,
name='blaze_{}'.format(k))))
in_channels = v[1]
for k, v in enumerate(double_blaze_filters):
assert len(v) in [3, 4], \
"blaze_filters {} not in [3, 4]"
if len(v) == 3:
self.blaze_block.append(
self.add_sublayer(
'double_blaze_{}'.format(k),
BlazeBlock(
in_channels,
v[0],
v[1],
double_channels=v[2],
use_5x5kernel=use_5x5kernel,
act=act,
name='double_blaze_{}'.format(k))))
elif len(v) == 4:
self.blaze_block.append(
self.add_sublayer(
'double_blaze_{}'.format(k),
BlazeBlock(
in_channels,
v[0],
v[1],
double_channels=v[2],
stride=v[3],
use_5x5kernel=use_5x5kernel,
act=act,
name='double_blaze_{}'.format(k))))
in_channels = v[2]
self._out_channels.append(in_channels)
def forward(self, inputs):
outs = []
y = self.conv1(inputs['image'])
for block in self.blaze_block:
y = block(y)
outs.append(y)
return [outs[-4], outs[-1]]
@property
def out_shape(self):
return [
ShapeSpec(channels=c)
for c in [self._out_channels[-4], self._out_channels[-1]]
]

@ -0,0 +1,340 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.modeling.ops import batch_norm, mish
from ..shape_spec import ShapeSpec
__all__ = ['DarkNet', 'ConvBNLayer']
class ConvBNLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=1,
groups=1,
padding=0,
norm_type='bn',
norm_decay=0.,
act="leaky",
freeze_norm=False,
data_format='NCHW',
name=''):
"""
conv + bn + activation layer
Args:
ch_in (int): input channel
ch_out (int): output channel
filter_size (int): filter size, default 3
stride (int): stride, default 1
groups (int): number of groups of conv layer, default 1
padding (int): padding size, default 0
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
act (str): activation function type, default 'leaky', which means leaky_relu
freeze_norm (bool): whether to freeze norm, default False
data_format (str): data format, NCHW or NHWC
"""
super(ConvBNLayer, self).__init__()
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
data_format=data_format,
bias_attr=False)
self.batch_norm = batch_norm(
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.act = act
def forward(self, inputs):
out = self.conv(inputs)
out = self.batch_norm(out)
if self.act == 'leaky':
out = F.leaky_relu(out, 0.1)
elif self.act == 'mish':
out = mish(out)
return out
class DownSample(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=2,
padding=1,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
data_format='NCHW'):
"""
downsample layer
Args:
ch_in (int): input channel
ch_out (int): output channel
filter_size (int): filter size, default 3
stride (int): stride, default 2
padding (int): padding size, default 1
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
freeze_norm (bool): whether to freeze norm, default False
data_format (str): data format, NCHW or NHWC
"""
super(DownSample, self).__init__()
self.conv_bn_layer = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.ch_out = ch_out
def forward(self, inputs):
out = self.conv_bn_layer(inputs)
return out
class BasicBlock(nn.Layer):
def __init__(self,
ch_in,
ch_out,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
data_format='NCHW'):
"""
BasicBlock layer of DarkNet
Args:
ch_in (int): input channel
ch_out (int): output channel
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
freeze_norm (bool): whether to freeze norm, default False
data_format (str): data format, NCHW or NHWC
"""
super(BasicBlock, self).__init__()
self.conv1 = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
padding=0,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.conv2 = ConvBNLayer(
ch_in=ch_out,
ch_out=ch_out * 2,
filter_size=3,
stride=1,
padding=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
def forward(self, inputs):
conv1 = self.conv1(inputs)
conv2 = self.conv2(conv1)
out = paddle.add(x=inputs, y=conv2)
return out
class Blocks(nn.Layer):
def __init__(self,
ch_in,
ch_out,
count,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
name=None,
data_format='NCHW'):
"""
Blocks layer, which consist of some BaickBlock layers
Args:
ch_in (int): input channel
ch_out (int): output channel
count (int): number of BasicBlock layer
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
freeze_norm (bool): whether to freeze norm, default False
name (str): layer name
data_format (str): data format, NCHW or NHWC
"""
super(Blocks, self).__init__()
self.basicblock0 = BasicBlock(
ch_in,
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.res_out_list = []
for i in range(1, count):
block_name = '{}.{}'.format(name, i)
res_out = self.add_sublayer(
block_name,
BasicBlock(
ch_out * 2,
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format))
self.res_out_list.append(res_out)
self.ch_out = ch_out
def forward(self, inputs):
y = self.basicblock0(inputs)
for basic_block_i in self.res_out_list:
y = basic_block_i(y)
return y
DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
@register
@serializable
class DarkNet(nn.Layer):
__shared__ = ['norm_type', 'data_format']
def __init__(self,
depth=53,
freeze_at=-1,
return_idx=[2, 3, 4],
num_stages=5,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
data_format='NCHW'):
"""
Darknet, see https://pjreddie.com/darknet/yolo/
Args:
depth (int): depth of network
freeze_at (int): freeze the backbone at which stage
filter_size (int): filter size, default 3
return_idx (list): index of stages whose feature maps are returned
norm_type (str): batch norm type, default bn
norm_decay (str): decay for weight and bias of batch norm layer, default 0.
data_format (str): data format, NCHW or NHWC
"""
super(DarkNet, self).__init__()
self.depth = depth
self.freeze_at = freeze_at
self.return_idx = return_idx
self.num_stages = num_stages
self.stages = DarkNet_cfg[self.depth][0:num_stages]
self.conv0 = ConvBNLayer(
ch_in=3,
ch_out=32,
filter_size=3,
stride=1,
padding=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self.downsample0 = DownSample(
ch_in=32,
ch_out=32 * 2,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format)
self._out_channels = []
self.darknet_conv_block_list = []
self.downsample_list = []
ch_in = [64, 128, 256, 512, 1024]
for i, stage in enumerate(self.stages):
name = 'stage.{}'.format(i)
conv_block = self.add_sublayer(
name,
Blocks(
int(ch_in[i]),
32 * (2**i),
stage,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format,
name=name))
self.darknet_conv_block_list.append(conv_block)
if i in return_idx:
self._out_channels.append(64 * (2**i))
for i in range(num_stages - 1):
down_name = 'stage.{}.downsample'.format(i)
downsample = self.add_sublayer(
down_name,
DownSample(
ch_in=32 * (2**(i + 1)),
ch_out=32 * (2**(i + 2)),
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
data_format=data_format))
self.downsample_list.append(downsample)
def forward(self, inputs):
x = inputs['image']
out = self.conv0(x)
out = self.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(self.darknet_conv_block_list):
out = conv_block_i(out)
if i == self.freeze_at:
out.stop_gradient = True
if i in self.return_idx:
blocks.append(out)
if i < self.num_stages - 1:
out = self.downsample_list[i](out)
return blocks
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

@ -0,0 +1,244 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.modeling.layers import ConvNormLayer
from ..shape_spec import ShapeSpec
DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512])}
class BasicBlock(nn.Layer):
def __init__(self, ch_in, ch_out, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = ConvNormLayer(
ch_in,
ch_out,
filter_size=3,
stride=stride,
bias_on=False,
norm_decay=None)
self.conv2 = ConvNormLayer(
ch_out,
ch_out,
filter_size=3,
stride=1,
bias_on=False,
norm_decay=None)
def forward(self, inputs, residual=None):
if residual is None:
residual = inputs
out = self.conv1(inputs)
out = F.relu(out)
out = self.conv2(out)
out = paddle.add(x=out, y=residual)
out = F.relu(out)
return out
class Root(nn.Layer):
def __init__(self, ch_in, ch_out, kernel_size, residual):
super(Root, self).__init__()
self.conv = ConvNormLayer(
ch_in,
ch_out,
filter_size=1,
stride=1,
bias_on=False,
norm_decay=None)
self.residual = residual
def forward(self, inputs):
children = inputs
out = self.conv(paddle.concat(inputs, axis=1))
if self.residual:
out = paddle.add(x=out, y=children[0])
out = F.relu(out)
return out
class Tree(nn.Layer):
def __init__(self,
level,
block,
ch_in,
ch_out,
stride=1,
level_root=False,
root_dim=0,
root_kernel_size=1,
root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * ch_out
if level_root:
root_dim += ch_in
if level == 1:
self.tree1 = block(ch_in, ch_out, stride)
self.tree2 = block(ch_out, ch_out, 1)
else:
self.tree1 = Tree(
level - 1,
block,
ch_in,
ch_out,
stride,
root_dim=0,
root_kernel_size=root_kernel_size,
root_residual=root_residual)
self.tree2 = Tree(
level - 1,
block,
ch_out,
ch_out,
1,
root_dim=root_dim + ch_out,
root_kernel_size=root_kernel_size,
root_residual=root_residual)
if level == 1:
self.root = Root(root_dim, ch_out, root_kernel_size, root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.level = level
if stride > 1:
self.downsample = nn.MaxPool2D(stride, stride=stride)
if ch_in != ch_out:
self.project = ConvNormLayer(
ch_in,
ch_out,
filter_size=1,
stride=1,
bias_on=False,
norm_decay=None)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.level == 1:
x2 = self.tree2(x1)
x = self.root([x2, x1] + children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
@register
@serializable
class DLA(nn.Layer):
"""
DLA, see https://arxiv.org/pdf/1707.06484.pdf
Args:
depth (int): DLA depth, should be 34.
residual_root (bool): whether use a reidual layer in the root block
"""
def __init__(self, depth=34, residual_root=False):
super(DLA, self).__init__()
levels, channels = DLA_cfg[depth]
if depth == 34:
block = BasicBlock
self.channels = channels
self.base_layer = nn.Sequential(
ConvNormLayer(
3,
channels[0],
filter_size=7,
stride=1,
bias_on=False,
norm_decay=None),
nn.ReLU())
self.level0 = self._make_conv_level(channels[0], channels[0],
levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(
levels[2],
block,
channels[1],
channels[2],
2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(
levels[3],
block,
channels[2],
channels[3],
2,
level_root=True,
root_residual=residual_root)
self.level4 = Tree(
levels[4],
block,
channels[3],
channels[4],
2,
level_root=True,
root_residual=residual_root)
self.level5 = Tree(
levels[5],
block,
channels[4],
channels[5],
2,
level_root=True,
root_residual=residual_root)
def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1):
modules = []
for i in range(conv_num):
modules.extend([
ConvNormLayer(
ch_in,
ch_out,
filter_size=3,
stride=stride if i == 0 else 1,
bias_on=False,
norm_decay=None), nn.ReLU()
])
ch_in = ch_out
return nn.Sequential(*modules)
@property
def out_shape(self):
return [ShapeSpec(channels=self.channels[i]) for i in range(6)]
def forward(self, inputs):
outs = []
im = inputs['image']
feats = self.base_layer(im)
for i in range(6):
feats = getattr(self, 'level{}'.format(i))(feats)
outs.append(feats)
return outs

@ -0,0 +1,290 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
from paddle.nn.initializer import KaimingNormal
from paddle.regularizer import L2Decay
from paddlers.models.ppdet.core.workspace import register, serializable
from numbers import Integral
from ..shape_spec import ShapeSpec
from paddlers.models.ppdet.modeling.ops import channel_shuffle
from paddlers.models.ppdet.modeling.backbones.shufflenet_v2 import ConvBNLayer
__all__ = ['ESNet']
def make_divisible(v, divisor=16, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class SEModule(nn.Layer):
def __init__(self, channel, reduction=4):
super(SEModule, self).__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(),
bias_attr=ParamAttr())
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(),
bias_attr=ParamAttr())
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = F.hardsigmoid(outputs)
return paddle.multiply(x=inputs, y=outputs)
class InvertedResidual(nn.Layer):
def __init__(self,
in_channels,
mid_channels,
out_channels,
stride,
act="relu"):
super(InvertedResidual, self).__init__()
self._conv_pw = ConvBNLayer(
in_channels=in_channels // 2,
out_channels=mid_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
self._conv_dw = ConvBNLayer(
in_channels=mid_channels // 2,
out_channels=mid_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=mid_channels // 2,
act=None)
self._se = SEModule(mid_channels)
self._conv_linear = ConvBNLayer(
in_channels=mid_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
def forward(self, inputs):
x1, x2 = paddle.split(
inputs,
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
axis=1)
x2 = self._conv_pw(x2)
x3 = self._conv_dw(x2)
x3 = paddle.concat([x2, x3], axis=1)
x3 = self._se(x3)
x3 = self._conv_linear(x3)
out = paddle.concat([x1, x3], axis=1)
return channel_shuffle(out, 2)
class InvertedResidualDS(nn.Layer):
def __init__(self,
in_channels,
mid_channels,
out_channels,
stride,
act="relu"):
super(InvertedResidualDS, self).__init__()
# branch1
self._conv_dw_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=3,
stride=stride,
padding=1,
groups=in_channels,
act=None)
self._conv_linear_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
# branch2
self._conv_pw_2 = ConvBNLayer(
in_channels=in_channels,
out_channels=mid_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
self._conv_dw_2 = ConvBNLayer(
in_channels=mid_channels // 2,
out_channels=mid_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=mid_channels // 2,
act=None)
self._se = SEModule(mid_channels // 2)
self._conv_linear_2 = ConvBNLayer(
in_channels=mid_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
self._conv_dw_mv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=1,
padding=1,
groups=out_channels,
act="hard_swish")
self._conv_pw_mv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act="hard_swish")
def forward(self, inputs):
x1 = self._conv_dw_1(inputs)
x1 = self._conv_linear_1(x1)
x2 = self._conv_pw_2(inputs)
x2 = self._conv_dw_2(x2)
x2 = self._se(x2)
x2 = self._conv_linear_2(x2)
out = paddle.concat([x1, x2], axis=1)
out = self._conv_dw_mv1(out)
out = self._conv_pw_mv1(out)
return out
@register
@serializable
class ESNet(nn.Layer):
def __init__(self,
scale=1.0,
act="hard_swish",
feature_maps=[4, 11, 14],
channel_ratio=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]):
super(ESNet, self).__init__()
self.scale = scale
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
self.feature_maps = feature_maps
stage_repeats = [3, 7, 3]
stage_out_channels = [
-1, 24, make_divisible(128 * scale), make_divisible(256 * scale),
make_divisible(512 * scale), 1024
]
self._out_channels = []
self._feature_idx = 0
# 1. conv1
self._conv1 = ConvBNLayer(
in_channels=3,
out_channels=stage_out_channels[1],
kernel_size=3,
stride=2,
padding=1,
act=act)
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
self._feature_idx += 1
# 2. bottleneck sequences
self._block_list = []
arch_idx = 0
for stage_id, num_repeat in enumerate(stage_repeats):
for i in range(num_repeat):
channels_scales = channel_ratio[arch_idx]
mid_c = make_divisible(
int(stage_out_channels[stage_id + 2] * channels_scales),
divisor=8)
if i == 0:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidualDS(
in_channels=stage_out_channels[stage_id + 1],
mid_channels=mid_c,
out_channels=stage_out_channels[stage_id + 2],
stride=2,
act=act))
else:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidual(
in_channels=stage_out_channels[stage_id + 2],
mid_channels=mid_c,
out_channels=stage_out_channels[stage_id + 2],
stride=1,
act=act))
self._block_list.append(block)
arch_idx += 1
self._feature_idx += 1
self._update_out_channels(stage_out_channels[stage_id + 2],
self._feature_idx, self.feature_maps)
def _update_out_channels(self, channel, feature_idx, feature_maps):
if feature_idx in feature_maps:
self._out_channels.append(channel)
def forward(self, inputs):
y = self._conv1(inputs['image'])
y = self._max_pool(y)
outs = []
for i, inv in enumerate(self._block_list):
y = inv(y)
if i + 2 in self.feature_maps:
outs.append(y)
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

@ -0,0 +1,470 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import AdaptiveAvgPool2D, Linear
from paddle.nn.initializer import Uniform
from paddlers.models.ppdet.core.workspace import register, serializable
from numbers import Integral
from ..shape_spec import ShapeSpec
from .mobilenet_v3 import make_divisible, ConvBNLayer
__all__ = ['GhostNet']
class ExtraBlockDW(nn.Layer):
def __init__(self,
in_c,
ch_1,
ch_2,
stride,
lr_mult,
conv_decay=0.,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
name=None):
super(ExtraBlockDW, self).__init__()
self.pointwise_conv = ConvBNLayer(
in_c=in_c,
out_c=ch_1,
filter_size=1,
stride=1,
padding=0,
act='relu6',
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_extra1")
self.depthwise_conv = ConvBNLayer(
in_c=ch_1,
out_c=ch_2,
filter_size=3,
stride=stride,
padding=1, #
num_groups=int(ch_1),
act='relu6',
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_extra2_dw")
self.normal_conv = ConvBNLayer(
in_c=ch_2,
out_c=ch_2,
filter_size=1,
stride=1,
padding=0,
act='relu6',
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_extra2_sep")
def forward(self, inputs):
x = self.pointwise_conv(inputs)
x = self.depthwise_conv(x)
x = self.normal_conv(x)
return x
class SEBlock(nn.Layer):
def __init__(self, num_channels, lr_mult, reduction_ratio=4, name=None):
super(SEBlock, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
stdv = 1.0 / math.sqrt(num_channels * 1.0)
med_ch = num_channels // reduction_ratio
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(learning_rate=lr_mult))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_channels,
weight_attr=ParamAttr(
learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(learning_rate=lr_mult))
def forward(self, inputs):
pool = self.pool2d_gap(inputs)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = paddle.clip(x=excitation, min=0, max=1)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = paddle.multiply(inputs, excitation)
return out
class GhostModule(nn.Layer):
def __init__(self,
in_channels,
output_channels,
kernel_size=1,
ratio=2,
dw_size=3,
stride=1,
relu=True,
lr_mult=1.,
conv_decay=0.,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
name=None):
super(GhostModule, self).__init__()
init_channels = int(math.ceil(output_channels / ratio))
new_channels = int(init_channels * (ratio - 1))
self.primary_conv = ConvBNLayer(
in_c=in_channels,
out_c=init_channels,
filter_size=kernel_size,
stride=stride,
padding=int((kernel_size - 1) // 2),
num_groups=1,
act="relu" if relu else None,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_primary_conv")
self.cheap_operation = ConvBNLayer(
in_c=init_channels,
out_c=new_channels,
filter_size=dw_size,
stride=1,
padding=int((dw_size - 1) // 2),
num_groups=init_channels,
act="relu" if relu else None,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_cheap_operation")
def forward(self, inputs):
x = self.primary_conv(inputs)
y = self.cheap_operation(x)
out = paddle.concat([x, y], axis=1)
return out
class GhostBottleneck(nn.Layer):
def __init__(self,
in_channels,
hidden_dim,
output_channels,
kernel_size,
stride,
use_se,
lr_mult,
conv_decay=0.,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
return_list=False,
name=None):
super(GhostBottleneck, self).__init__()
self._stride = stride
self._use_se = use_se
self._num_channels = in_channels
self._output_channels = output_channels
self.return_list = return_list
self.ghost_module_1 = GhostModule(
in_channels=in_channels,
output_channels=hidden_dim,
kernel_size=1,
stride=1,
relu=True,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_ghost_module_1")
if stride == 2:
self.depthwise_conv = ConvBNLayer(
in_c=hidden_dim,
out_c=hidden_dim,
filter_size=kernel_size,
stride=stride,
padding=int((kernel_size - 1) // 2),
num_groups=hidden_dim,
act=None,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name +
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
if use_se:
self.se_block = SEBlock(hidden_dim, lr_mult, name=name + "_se")
self.ghost_module_2 = GhostModule(
in_channels=hidden_dim,
output_channels=output_channels,
kernel_size=1,
relu=False,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_ghost_module_2")
if stride != 1 or in_channels != output_channels:
self.shortcut_depthwise = ConvBNLayer(
in_c=in_channels,
out_c=in_channels,
filter_size=kernel_size,
stride=stride,
padding=int((kernel_size - 1) // 2),
num_groups=in_channels,
act=None,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name +
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
self.shortcut_conv = ConvBNLayer(
in_c=in_channels,
out_c=output_channels,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
act=None,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_shortcut_conv")
def forward(self, inputs):
y = self.ghost_module_1(inputs)
x = y
if self._stride == 2:
x = self.depthwise_conv(x)
if self._use_se:
x = self.se_block(x)
x = self.ghost_module_2(x)
if self._stride == 1 and self._num_channels == self._output_channels:
shortcut = inputs
else:
shortcut = self.shortcut_depthwise(inputs)
shortcut = self.shortcut_conv(shortcut)
x = paddle.add(x=x, y=shortcut)
if self.return_list:
return [y, x]
else:
return x
@register
@serializable
class GhostNet(nn.Layer):
__shared__ = ['norm_type']
def __init__(self,
scale=1.3,
feature_maps=[6, 12, 15],
with_extra_blocks=False,
extra_block_filters=[[256, 512], [128, 256], [128, 256],
[64, 128]],
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
conv_decay=0.,
norm_type='bn',
norm_decay=0.0,
freeze_norm=False):
super(GhostNet, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
if norm_type == 'sync_bn' and freeze_norm:
raise ValueError(
"The norm_type should not be sync_bn when freeze_norm is True")
self.feature_maps = feature_maps
self.with_extra_blocks = with_extra_blocks
self.extra_block_filters = extra_block_filters
inplanes = 16
self.cfgs = [
# k, t, c, SE, s
[3, 16, 16, 0, 1],
[3, 48, 24, 0, 2],
[3, 72, 24, 0, 1],
[5, 72, 40, 1, 2],
[5, 120, 40, 1, 1],
[3, 240, 80, 0, 2],
[3, 200, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 480, 112, 1, 1],
[3, 672, 112, 1, 1],
[5, 672, 160, 1, 2], # SSDLite output
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1]
]
self.scale = scale
conv1_out_ch = int(make_divisible(inplanes * self.scale, 4))
self.conv1 = ConvBNLayer(
in_c=3,
out_c=conv1_out_ch,
filter_size=3,
stride=2,
padding=1,
num_groups=1,
act="relu",
lr_mult=1.,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="conv1")
# build inverted residual blocks
self._out_channels = []
self.ghost_bottleneck_list = []
idx = 0
inplanes = conv1_out_ch
for k, exp_size, c, use_se, s in self.cfgs:
lr_idx = min(idx // 3, len(lr_mult_list) - 1)
lr_mult = lr_mult_list[lr_idx]
# for SSD/SSDLite, first head input is after ResidualUnit expand_conv
return_list = self.with_extra_blocks and idx + 2 in self.feature_maps
ghost_bottleneck = self.add_sublayer(
"_ghostbottleneck_" + str(idx),
sublayer=GhostBottleneck(
in_channels=inplanes,
hidden_dim=int(make_divisible(exp_size * self.scale, 4)),
output_channels=int(make_divisible(c * self.scale, 4)),
kernel_size=k,
stride=s,
use_se=use_se,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
return_list=return_list,
name="_ghostbottleneck_" + str(idx)))
self.ghost_bottleneck_list.append(ghost_bottleneck)
inplanes = int(make_divisible(c * self.scale, 4))
idx += 1
self._update_out_channels(
int(make_divisible(exp_size * self.scale, 4))
if return_list else inplanes, idx + 1, feature_maps)
if self.with_extra_blocks:
self.extra_block_list = []
extra_out_c = int(make_divisible(self.scale * self.cfgs[-1][1], 4))
lr_idx = min(idx // 3, len(lr_mult_list) - 1)
lr_mult = lr_mult_list[lr_idx]
conv_extra = self.add_sublayer(
"conv" + str(idx + 2),
sublayer=ConvBNLayer(
in_c=inplanes,
out_c=extra_out_c,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
act="relu6",
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="conv" + str(idx + 2)))
self.extra_block_list.append(conv_extra)
idx += 1
self._update_out_channels(extra_out_c, idx + 1, feature_maps)
for j, block_filter in enumerate(self.extra_block_filters):
in_c = extra_out_c if j == 0 else self.extra_block_filters[
j - 1][1]
conv_extra = self.add_sublayer(
"conv" + str(idx + 2),
sublayer=ExtraBlockDW(
in_c,
block_filter[0],
block_filter[1],
stride=2,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name='conv' + str(idx + 2)))
self.extra_block_list.append(conv_extra)
idx += 1
self._update_out_channels(block_filter[1], idx + 1,
feature_maps)
def _update_out_channels(self, channel, feature_idx, feature_maps):
if feature_idx in feature_maps:
self._out_channels.append(channel)
def forward(self, inputs):
x = self.conv1(inputs['image'])
outs = []
for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
x = ghost_bottleneck(x)
if idx + 2 in self.feature_maps:
if isinstance(x, list):
outs.append(x[0])
x = x[1]
else:
outs.append(x)
if not self.with_extra_blocks:
return outs
for i, block in enumerate(self.extra_block_list):
idx = i + len(self.ghost_bottleneck_list)
x = block(x)
if idx + 2 in self.feature_maps:
outs.append(x)
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

@ -0,0 +1,224 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddlers.models.ppdet.core.workspace import register
from ..shape_spec import ShapeSpec
__all__ = ['HarDNet']
def ConvLayer(in_channels,
out_channels,
kernel_size=3,
stride=1,
bias_attr=False):
layer = nn.Sequential(
('conv', nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=kernel_size // 2,
groups=1,
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
('relu', nn.ReLU6()))
return layer
def DWConvLayer(in_channels,
out_channels,
kernel_size=3,
stride=1,
bias_attr=False):
layer = nn.Sequential(
('dwconv', nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=1,
groups=out_channels,
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
return layer
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
layer = nn.Sequential(
('layer1', ConvLayer(
in_channels, out_channels, kernel_size=kernel_size)),
('layer2', DWConvLayer(
out_channels, out_channels, stride=stride)))
return layer
class HarDBlock(nn.Layer):
def __init__(self,
in_channels,
growth_rate,
grmul,
n_layers,
keepBase=False,
residual_out=False,
dwconv=False):
super().__init__()
self.keepBase = keepBase
self.links = []
layers_ = []
self.out_channels = 0
for i in range(n_layers):
outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
grmul)
self.links.append(link)
if dwconv:
layers_.append(CombConvLayer(inch, outch))
else:
layers_.append(ConvLayer(inch, outch))
if (i % 2 == 0) or (i == n_layers - 1):
self.out_channels += outch
self.layers = nn.LayerList(layers_)
def get_out_ch(self):
return self.out_channels
def get_link(self, layer, base_ch, growth_rate, grmul):
if layer == 0:
return base_ch, 0, []
out_channels = growth_rate
link = []
for i in range(10):
dv = 2**i
if layer % dv == 0:
k = layer - dv
link.append(k)
if i > 0:
out_channels *= grmul
out_channels = int(int(out_channels + 1) / 2) * 2
in_channels = 0
for i in link:
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
in_channels += ch
return out_channels, in_channels, link
def forward(self, x):
layers_ = [x]
for layer in range(len(self.layers)):
link = self.links[layer]
tin = []
for i in link:
tin.append(layers_[i])
if len(tin) > 1:
x = paddle.concat(tin, 1)
else:
x = tin[0]
out = self.layers[layer](x)
layers_.append(out)
t = len(layers_)
out_ = []
for i in range(t):
if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
out_.append(layers_[i])
out = paddle.concat(out_, 1)
return out
@register
class HarDNet(nn.Layer):
def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85):
super(HarDNet, self).__init__()
assert arch in [39, 68, 85], "HarDNet-{} not support.".format(arch)
if arch == 85:
first_ch = [48, 96]
second_kernel = 3
ch_list = [192, 256, 320, 480, 720]
grmul = 1.7
gr = [24, 24, 28, 36, 48]
n_layers = [8, 16, 16, 16, 16]
elif arch == 68:
first_ch = [32, 64]
second_kernel = 3
ch_list = [128, 256, 320, 640]
grmul = 1.7
gr = [14, 16, 20, 40]
n_layers = [8, 16, 16, 16]
self.return_idx = return_idx
self._out_channels = [96, 214, 458, 784]
avg_pool = True
if depth_wise:
second_kernel = 1
avg_pool = False
blks = len(n_layers)
self.base = nn.LayerList([])
# First Layer: Standard Conv3x3, Stride=2
self.base.append(
ConvLayer(
in_channels=3,
out_channels=first_ch[0],
kernel_size=3,
stride=2,
bias_attr=False))
# Second Layer
self.base.append(
ConvLayer(
first_ch[0], first_ch[1], kernel_size=second_kernel))
# Avgpooling or DWConv3x3 downsampling
if avg_pool:
self.base.append(nn.AvgPool2D(kernel_size=3, stride=2, padding=1))
else:
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
# Build all HarDNet blocks
ch = first_ch[1]
for i in range(blks):
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
ch = blk.out_channels
self.base.append(blk)
if i != blks - 1:
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
ch = ch_list[i]
if i == 0:
self.base.append(
nn.AvgPool2D(
kernel_size=2, stride=2, ceil_mode=True))
elif i != blks - 1 and i != 1 and i != 3:
self.base.append(nn.AvgPool2D(kernel_size=2, stride=2))
def forward(self, inputs):
x = inputs['image']
outs = []
for i, layer in enumerate(self.base):
x = layer(x)
if i in self.return_idx:
outs.append(x)
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=self._out_channels[i]) for i in range(4)]

@ -0,0 +1,727 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import AdaptiveAvgPool2D, Linear
from paddle.regularizer import L2Decay
from paddle import ParamAttr
from paddle.nn.initializer import Normal, Uniform
from numbers import Integral
import math
from paddlers.models.ppdet.core.workspace import register
from ..shape_spec import ShapeSpec
__all__ = ['HRNet']
class ConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
norm_type='bn',
norm_groups=32,
use_dcn=False,
norm_decay=0.,
freeze_norm=False,
act=None,
name=None):
super(ConvNormLayer, self).__init__()
assert norm_type in ['bn', 'sync_bn', 'gn']
self.act = act
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=1,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.01)),
bias_attr=False)
norm_lr = 0. if freeze_norm else 1.
param_attr = ParamAttr(
learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
bias_attr = ParamAttr(
learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
global_stats = True if freeze_norm else None
if norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(
ch_out,
weight_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats)
elif norm_type == 'gn':
self.norm = nn.GroupNorm(
num_groups=norm_groups,
num_channels=ch_out,
weight_attr=param_attr,
bias_attr=bias_attr)
norm_params = self.norm.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
def forward(self, inputs):
out = self.conv(inputs)
out = self.norm(out)
if self.act == 'relu':
out = F.relu(out)
return out
class Layer1(nn.Layer):
def __init__(self,
num_channels,
has_se=False,
norm_decay=0.,
freeze_norm=True,
name=None):
super(Layer1, self).__init__()
self.bottleneck_block_list = []
for i in range(4):
bottleneck_block = self.add_sublayer(
"block_{}_{}".format(name, i + 1),
BottleneckBlock(
num_channels=num_channels if i == 0 else 256,
num_filters=64,
has_se=has_se,
stride=1,
downsample=True if i == 0 else False,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + '_' + str(i + 1)))
self.bottleneck_block_list.append(bottleneck_block)
def forward(self, input):
conv = input
for block_func in self.bottleneck_block_list:
conv = block_func(conv)
return conv
class TransitionLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
norm_decay=0.,
freeze_norm=True,
name=None):
super(TransitionLayer, self).__init__()
num_in = len(in_channels)
num_out = len(out_channels)
out = []
self.conv_bn_func_list = []
for i in range(num_out):
residual = None
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvNormLayer(
ch_in=in_channels[i],
ch_out=out_channels[i],
filter_size=3,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act='relu',
name=name + '_layer_' + str(i + 1)))
else:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvNormLayer(
ch_in=in_channels[-1],
ch_out=out_channels[i],
filter_size=3,
stride=2,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act='relu',
name=name + '_layer_' + str(i + 1)))
self.conv_bn_func_list.append(residual)
def forward(self, input):
outs = []
for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
if conv_bn_func is None:
outs.append(input[idx])
else:
if idx < len(input):
outs.append(conv_bn_func(input[idx]))
else:
outs.append(conv_bn_func(input[-1]))
return outs
class Branches(nn.Layer):
def __init__(self,
block_num,
in_channels,
out_channels,
has_se=False,
norm_decay=0.,
freeze_norm=True,
name=None):
super(Branches, self).__init__()
self.basic_block_list = []
for i in range(len(out_channels)):
self.basic_block_list.append([])
for j in range(block_num):
in_ch = in_channels[i] if j == 0 else out_channels[i]
basic_block_func = self.add_sublayer(
"bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
BasicBlock(
num_channels=in_ch,
num_filters=out_channels[i],
has_se=has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.basic_block_list[i].append(basic_block_func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
conv = input
basic_block_list = self.basic_block_list[idx]
for basic_block_func in basic_block_list:
conv = basic_block_func(conv)
outs.append(conv)
return outs
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
has_se,
stride=1,
downsample=False,
norm_decay=0.,
freeze_norm=True,
name=None):
super(BottleneckBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvNormLayer(
ch_in=num_channels,
ch_out=num_filters,
filter_size=1,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act="relu",
name=name + "_conv1")
self.conv2 = ConvNormLayer(
ch_in=num_filters,
ch_out=num_filters,
filter_size=3,
stride=stride,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act="relu",
name=name + "_conv2")
self.conv3 = ConvNormLayer(
ch_in=num_filters,
ch_out=num_filters * 4,
filter_size=1,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act=None,
name=name + "_conv3")
if self.downsample:
self.conv_down = ConvNormLayer(
ch_in=num_channels,
ch_out=num_filters * 4,
filter_size=1,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act=None,
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=16,
name='fc' + name)
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv3 = self.se(conv3)
y = paddle.add(x=residual, y=conv3)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride=1,
has_se=False,
downsample=False,
norm_decay=0.,
freeze_norm=True,
name=None):
super(BasicBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvNormLayer(
ch_in=num_channels,
ch_out=num_filters,
filter_size=3,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
stride=stride,
act="relu",
name=name + "_conv1")
self.conv2 = ConvNormLayer(
ch_in=num_filters,
ch_out=num_filters,
filter_size=3,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
stride=1,
act=None,
name=name + "_conv2")
if self.downsample:
self.conv_down = ConvNormLayer(
ch_in=num_channels,
ch_out=num_filters * 4,
filter_size=1,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act=None,
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=16,
name='fc' + name)
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv2 = self.se(conv2)
y = paddle.add(x=residual, y=conv2)
y = F.relu(y)
return y
class SELayer(nn.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
def forward(self, input):
pool = self.pool2d_gap(input)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = F.sigmoid(excitation)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = input * excitation
return out
class Stage(nn.Layer):
def __init__(self,
num_channels,
num_modules,
num_filters,
has_se=False,
norm_decay=0.,
freeze_norm=True,
multi_scale_output=True,
name=None):
super(Stage, self).__init__()
self._num_modules = num_modules
self.stage_func_list = []
for i in range(num_modules):
if i == num_modules - 1 and not multi_scale_output:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_filters=num_filters,
has_se=has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
multi_scale_output=False,
name=name + '_' + str(i + 1)))
else:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_filters=num_filters,
has_se=has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + '_' + str(i + 1)))
self.stage_func_list.append(stage_func)
def forward(self, input):
out = input
for idx in range(self._num_modules):
out = self.stage_func_list[idx](out)
return out
class HighResolutionModule(nn.Layer):
def __init__(self,
num_channels,
num_filters,
has_se=False,
multi_scale_output=True,
norm_decay=0.,
freeze_norm=True,
name=None):
super(HighResolutionModule, self).__init__()
self.branches_func = Branches(
block_num=4,
in_channels=num_channels,
out_channels=num_filters,
has_se=has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name)
self.fuse_func = FuseLayers(
in_channels=num_filters,
out_channels=num_filters,
multi_scale_output=multi_scale_output,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name)
def forward(self, input):
out = self.branches_func(input)
out = self.fuse_func(out)
return out
class FuseLayers(nn.Layer):
def __init__(self,
in_channels,
out_channels,
multi_scale_output=True,
norm_decay=0.,
freeze_norm=True,
name=None):
super(FuseLayers, self).__init__()
self._actual_ch = len(in_channels) if multi_scale_output else 1
self._in_channels = in_channels
self.residual_func_list = []
for i in range(self._actual_ch):
for j in range(len(in_channels)):
residual_func = None
if j > i:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
ConvNormLayer(
ch_in=in_channels[j],
ch_out=out_channels[i],
filter_size=1,
stride=1,
act=None,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.residual_func_list.append(residual_func)
elif j < i:
pre_num_filters = in_channels[j]
for k in range(i - j):
if k == i - j - 1:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvNormLayer(
ch_in=pre_num_filters,
ch_out=out_channels[i],
filter_size=3,
stride=2,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[i]
else:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvNormLayer(
ch_in=pre_num_filters,
ch_out=out_channels[j],
filter_size=3,
stride=2,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act="relu",
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[j]
self.residual_func_list.append(residual_func)
def forward(self, input):
outs = []
residual_func_idx = 0
for i in range(self._actual_ch):
residual = input[i]
for j in range(len(self._in_channels)):
if j > i:
y = self.residual_func_list[residual_func_idx](input[j])
residual_func_idx += 1
y = F.interpolate(y, scale_factor=2**(j - i))
residual = paddle.add(x=residual, y=y)
elif j < i:
y = input[j]
for k in range(i - j):
y = self.residual_func_list[residual_func_idx](y)
residual_func_idx += 1
residual = paddle.add(x=residual, y=y)
residual = F.relu(residual)
outs.append(residual)
return outs
@register
class HRNet(nn.Layer):
"""
HRNet, see https://arxiv.org/abs/1908.07919
Args:
width (int): the width of HRNet
has_se (bool): whether to add SE block for each stage
freeze_at (int): the stage to freeze
freeze_norm (bool): whether to freeze norm in HRNet
norm_decay (float): weight decay for normalization layer weights
return_idx (List): the stage to return
upsample (bool): whether to upsample and concat the backbone feats
"""
def __init__(self,
width=18,
has_se=False,
freeze_at=0,
freeze_norm=True,
norm_decay=0.,
return_idx=[0, 1, 2, 3],
upsample=False):
super(HRNet, self).__init__()
self.width = width
self.has_se = has_se
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert len(return_idx) > 0, "need one or more return index"
self.freeze_at = freeze_at
self.return_idx = return_idx
self.upsample = upsample
self.channels = {
18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]]
}
channels_2, channels_3, channels_4 = self.channels[width]
num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
self._out_channels = [sum(channels_4)] if self.upsample else channels_4
self._out_strides = [4] if self.upsample else [4, 8, 16, 32]
self.conv_layer1_1 = ConvNormLayer(
ch_in=3,
ch_out=64,
filter_size=3,
stride=2,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act='relu',
name="layer1_1")
self.conv_layer1_2 = ConvNormLayer(
ch_in=64,
ch_out=64,
filter_size=3,
stride=2,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
act='relu',
name="layer1_2")
self.la1 = Layer1(
num_channels=64,
has_se=has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="layer2")
self.tr1 = TransitionLayer(
in_channels=[256],
out_channels=channels_2,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="tr1")
self.st2 = Stage(
num_channels=channels_2,
num_modules=num_modules_2,
num_filters=channels_2,
has_se=self.has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="st2")
self.tr2 = TransitionLayer(
in_channels=channels_2,
out_channels=channels_3,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="tr2")
self.st3 = Stage(
num_channels=channels_3,
num_modules=num_modules_3,
num_filters=channels_3,
has_se=self.has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="st3")
self.tr3 = TransitionLayer(
in_channels=channels_3,
out_channels=channels_4,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="tr3")
self.st4 = Stage(
num_channels=channels_4,
num_modules=num_modules_4,
num_filters=channels_4,
has_se=self.has_se,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
multi_scale_output=len(return_idx) > 1,
name="st4")
def forward(self, inputs):
x = inputs['image']
conv1 = self.conv_layer1_1(x)
conv2 = self.conv_layer1_2(conv1)
la1 = self.la1(conv2)
tr1 = self.tr1([la1])
st2 = self.st2(tr1)
tr2 = self.tr2(st2)
st3 = self.st3(tr2)
tr3 = self.tr3(st3)
st4 = self.st4(tr3)
if self.upsample:
# Upsampling
x0_h, x0_w = st4[0].shape[2:4]
x1 = F.upsample(st4[1], size=(x0_h, x0_w), mode='bilinear')
x2 = F.upsample(st4[2], size=(x0_h, x0_w), mode='bilinear')
x3 = F.upsample(st4[3], size=(x0_h, x0_w), mode='bilinear')
x = paddle.concat([st4[0], x1, x2, x3], 1)
return x
res = []
for i, layer in enumerate(st4):
if i == self.freeze_at:
layer.stop_gradient = True
if i in self.return_idx:
res.append(layer)
return res
@property
def out_shape(self):
if self.upsample:
self.return_idx = [0]
return [
ShapeSpec(
channels=self._out_channels[i], stride=self._out_strides[i])
for i in self.return_idx
]

@ -0,0 +1,259 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
from paddle import ParamAttr
from paddle.nn import AdaptiveAvgPool2D, Conv2D
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingNormal
from paddlers.models.ppdet.core.workspace import register, serializable
from numbers import Integral
from ..shape_spec import ShapeSpec
__all__ = ['LCNet']
NET_CONFIG = {
"blocks2":
#k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False], ],
"blocks3": [
[3, 32, 64, 2, False],
[3, 64, 64, 1, False],
],
"blocks4": [
[3, 64, 128, 2, False],
[3, 128, 128, 1, False],
],
"blocks5": [
[3, 128, 256, 2, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
}
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
num_groups=1):
super().__init__()
self.conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=num_groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self.bn = nn.BatchNorm2D(
num_filters,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.hardswish = nn.Hardswish()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.hardswish(x)
return x
class DepthwiseSeparable(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
dw_size=3,
use_se=False):
super().__init__()
self.use_se = use_se
self.dw_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_channels,
filter_size=dw_size,
stride=stride,
num_groups=num_channels)
if use_se:
self.se = SEModule(num_channels)
self.pw_conv = ConvBNLayer(
num_channels=num_channels,
filter_size=1,
num_filters=num_filters,
stride=1)
def forward(self, x):
x = self.dw_conv(x)
if self.use_se:
x = self.se(x)
x = self.pw_conv(x)
return x
class SEModule(nn.Layer):
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = nn.Hardsigmoid()
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = paddle.multiply(x=identity, y=x)
return x
@register
@serializable
class LCNet(nn.Layer):
def __init__(self, scale=1.0, feature_maps=[3, 4, 5]):
super().__init__()
self.scale = scale
self.feature_maps = feature_maps
out_channels = []
self.conv1 = ConvBNLayer(
num_channels=3,
filter_size=3,
num_filters=make_divisible(16 * scale),
stride=2)
self.blocks2 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
])
self.blocks3 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
])
out_channels.append(
make_divisible(NET_CONFIG["blocks3"][-1][2] * scale))
self.blocks4 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
])
out_channels.append(
make_divisible(NET_CONFIG["blocks4"][-1][2] * scale))
self.blocks5 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
])
out_channels.append(
make_divisible(NET_CONFIG["blocks5"][-1][2] * scale))
self.blocks6 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
])
out_channels.append(
make_divisible(NET_CONFIG["blocks6"][-1][2] * scale))
self._out_channels = [
ch for idx, ch in enumerate(out_channels)
if idx + 2 in feature_maps
]
def forward(self, inputs):
x = inputs['image']
outs = []
x = self.conv1(x)
x = self.blocks2(x)
x = self.blocks3(x)
outs.append(x)
x = self.blocks4(x)
outs.append(x)
x = self.blocks5(x)
outs.append(x)
x = self.blocks6(x)
outs.append(x)
outs = [o for i, o in enumerate(outs) if i + 2 in self.feature_maps]
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

@ -0,0 +1,886 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on
https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
"""
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from numbers import Integral
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Normal, Constant
from paddlers.models.ppdet.core.workspace import register
from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
from paddlers.models.ppdet.modeling.ops import channel_shuffle
from .. import layers as L
__all__ = ['LiteHRNet']
class ConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
groups=1,
norm_type=None,
norm_groups=32,
norm_decay=0.,
freeze_norm=False,
act=None):
super(ConvNormLayer, self).__init__()
self.act = act
norm_lr = 0. if freeze_norm else 1.
if norm_type is not None:
assert norm_type in ['bn', 'sync_bn', 'gn'], \
"norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
param_attr = ParamAttr(
initializer=Constant(1.0),
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay), )
bias_attr = ParamAttr(
learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
global_stats = True if freeze_norm else None
if norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(
ch_out,
weight_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats, )
elif norm_type == 'gn':
self.norm = nn.GroupNorm(
num_groups=norm_groups,
num_channels=ch_out,
weight_attr=param_attr,
bias_attr=bias_attr)
norm_params = self.norm.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
conv_bias_attr = False
else:
conv_bias_attr = True
self.norm = None
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.001)),
bias_attr=conv_bias_attr)
def forward(self, inputs):
out = self.conv(inputs)
if self.norm is not None:
out = self.norm(out)
if self.act == 'relu':
out = F.relu(out)
elif self.act == 'sigmoid':
out = F.sigmoid(out)
return out
class DepthWiseSeparableConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
dw_norm_type=None,
pw_norm_type=None,
norm_decay=0.,
freeze_norm=False,
dw_act=None,
pw_act=None):
super(DepthWiseSeparableConvNormLayer, self).__init__()
self.depthwise_conv = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_in,
filter_size=filter_size,
stride=stride,
groups=ch_in,
norm_type=dw_norm_type,
act=dw_act,
norm_decay=norm_decay,
freeze_norm=freeze_norm, )
self.pointwise_conv = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
norm_type=pw_norm_type,
act=pw_act,
norm_decay=norm_decay,
freeze_norm=freeze_norm, )
def forward(self, x):
x = self.depthwise_conv(x)
x = self.pointwise_conv(x)
return x
class CrossResolutionWeightingModule(nn.Layer):
def __init__(self,
channels,
ratio=16,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(CrossResolutionWeightingModule, self).__init__()
self.channels = channels
total_channel = sum(channels)
self.conv1 = ConvNormLayer(
ch_in=total_channel,
ch_out=total_channel // ratio,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.conv2 = ConvNormLayer(
ch_in=total_channel // ratio,
ch_out=total_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='sigmoid',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
mini_size = x[-1].shape[-2:]
out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
out = paddle.concat(out, 1)
out = self.conv1(out)
out = self.conv2(out)
out = paddle.split(out, self.channels, 1)
out = [
s * F.interpolate(
a, s.shape[-2:], mode='nearest') for s, a in zip(x, out)
]
return out
class SpatialWeightingModule(nn.Layer):
def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
super(SpatialWeightingModule, self).__init__()
self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
self.conv1 = ConvNormLayer(
ch_in=in_channel,
ch_out=in_channel // ratio,
filter_size=1,
stride=1,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.conv2 = ConvNormLayer(
ch_in=in_channel // ratio,
ch_out=in_channel,
filter_size=1,
stride=1,
act='sigmoid',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
out = self.global_avgpooling(x)
out = self.conv1(out)
out = self.conv2(out)
return x * out
class ConditionalChannelWeightingBlock(nn.Layer):
def __init__(self,
in_channels,
stride,
reduce_ratio,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(ConditionalChannelWeightingBlock, self).__init__()
assert stride in [1, 2]
branch_channels = [channel // 2 for channel in in_channels]
self.cross_resolution_weighting = CrossResolutionWeightingModule(
branch_channels,
ratio=reduce_ratio,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.depthwise_convs = nn.LayerList([
ConvNormLayer(
channel,
channel,
filter_size=3,
stride=stride,
groups=channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay) for channel in branch_channels
])
self.spatial_weighting = nn.LayerList([
SpatialWeightingModule(
channel,
ratio=4,
freeze_norm=freeze_norm,
norm_decay=norm_decay) for channel in branch_channels
])
def forward(self, x):
x = [s.chunk(2, axis=1) for s in x]
x1 = [s[0] for s in x]
x2 = [s[1] for s in x]
x2 = self.cross_resolution_weighting(x2)
x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
out = [channel_shuffle(s, groups=2) for s in out]
return out
class ShuffleUnit(nn.Layer):
def __init__(self,
in_channel,
out_channel,
stride,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(ShuffleUnit, self).__init__()
branch_channel = out_channel // 2
self.stride = stride
if self.stride == 1:
assert in_channel == branch_channel * 2, \
"when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
if stride > 1:
self.branch1 = nn.Sequential(
ConvNormLayer(
ch_in=in_channel,
ch_out=in_channel,
filter_size=3,
stride=self.stride,
groups=in_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=in_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
self.branch2 = nn.Sequential(
ConvNormLayer(
ch_in=branch_channel if stride == 1 else in_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=3,
stride=self.stride,
groups=branch_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
def forward(self, x):
if self.stride > 1:
x1 = self.branch1(x)
x2 = self.branch2(x)
else:
x1, x2 = x.chunk(2, axis=1)
x2 = self.branch2(x2)
out = paddle.concat([x1, x2], axis=1)
out = channel_shuffle(out, groups=2)
return out
class IterativeHead(nn.Layer):
def __init__(self,
in_channels,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(IterativeHead, self).__init__()
num_branches = len(in_channels)
self.in_channels = in_channels[::-1]
projects = []
for i in range(num_branches):
if i != num_branches - 1:
projects.append(
DepthWiseSeparableConvNormLayer(
ch_in=self.in_channels[i],
ch_out=self.in_channels[i + 1],
filter_size=3,
stride=1,
dw_act=None,
pw_act='relu',
dw_norm_type=norm_type,
pw_norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
else:
projects.append(
DepthWiseSeparableConvNormLayer(
ch_in=self.in_channels[i],
ch_out=self.in_channels[i],
filter_size=3,
stride=1,
dw_act=None,
pw_act='relu',
dw_norm_type=norm_type,
pw_norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
self.projects = nn.LayerList(projects)
def forward(self, x):
x = x[::-1]
y = []
last_x = None
for i, s in enumerate(x):
if last_x is not None:
last_x = F.interpolate(
last_x,
size=s.shape[-2:],
mode='bilinear',
align_corners=True)
s = s + last_x
s = self.projects[i](s)
y.append(s)
last_x = s
return y[::-1]
class Stem(nn.Layer):
def __init__(self,
in_channel,
stem_channel,
out_channel,
expand_ratio,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(Stem, self).__init__()
self.conv1 = ConvNormLayer(
in_channel,
stem_channel,
filter_size=3,
stride=2,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
mid_channel = int(round(stem_channel * expand_ratio))
branch_channel = stem_channel // 2
if stem_channel == out_channel:
inc_channel = out_channel - branch_channel
else:
inc_channel = out_channel - stem_channel
self.branch1 = nn.Sequential(
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=3,
stride=2,
groups=branch_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=inc_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
self.expand_conv = ConvNormLayer(
ch_in=branch_channel,
ch_out=mid_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.depthwise_conv = ConvNormLayer(
ch_in=mid_channel,
ch_out=mid_channel,
filter_size=3,
stride=2,
groups=mid_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.linear_conv = ConvNormLayer(
ch_in=mid_channel,
ch_out=branch_channel
if stem_channel == out_channel else stem_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
x = self.conv1(x)
x1, x2 = x.chunk(2, axis=1)
x1 = self.branch1(x1)
x2 = self.expand_conv(x2)
x2 = self.depthwise_conv(x2)
x2 = self.linear_conv(x2)
out = paddle.concat([x1, x2], axis=1)
out = channel_shuffle(out, groups=2)
return out
class LiteHRNetModule(nn.Layer):
def __init__(self,
num_branches,
num_blocks,
in_channels,
reduce_ratio,
module_type,
multiscale_output=False,
with_fuse=True,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(LiteHRNetModule, self).__init__()
assert num_branches == len(in_channels),\
"num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
assert module_type in [
'LITE', 'NAIVE'
], "module_type should be one of ['LITE', 'NAIVE']"
self.num_branches = num_branches
self.in_channels = in_channels
self.multiscale_output = multiscale_output
self.with_fuse = with_fuse
self.norm_type = 'bn'
self.module_type = module_type
if self.module_type == 'LITE':
self.layers = self._make_weighting_blocks(
num_blocks,
reduce_ratio,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
elif self.module_type == 'NAIVE':
self.layers = self._make_naive_branches(
num_branches,
num_blocks,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
if self.with_fuse:
self.fuse_layers = self._make_fuse_layers(
freeze_norm=freeze_norm, norm_decay=norm_decay)
self.relu = nn.ReLU()
def _make_weighting_blocks(self,
num_blocks,
reduce_ratio,
stride=1,
freeze_norm=False,
norm_decay=0.):
layers = []
for i in range(num_blocks):
layers.append(
ConditionalChannelWeightingBlock(
self.in_channels,
stride=stride,
reduce_ratio=reduce_ratio,
norm_type=self.norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
return nn.Sequential(*layers)
def _make_naive_branches(self,
num_branches,
num_blocks,
freeze_norm=False,
norm_decay=0.):
branches = []
for branch_idx in range(num_branches):
layers = []
for i in range(num_blocks):
layers.append(
ShuffleUnit(
self.in_channels[branch_idx],
self.in_channels[branch_idx],
stride=1,
norm_type=self.norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
branches.append(nn.Sequential(*layers))
return nn.LayerList(branches)
def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
if self.num_branches == 1:
return None
fuse_layers = []
num_out_branches = self.num_branches if self.multiscale_output else 1
for i in range(num_out_branches):
fuse_layer = []
for j in range(self.num_branches):
if j > i:
fuse_layer.append(
nn.Sequential(
L.Conv2d(
self.in_channels[j],
self.in_channels[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[i]),
nn.Upsample(
scale_factor=2**(j - i), mode='nearest')))
elif j == i:
fuse_layer.append(None)
else:
conv_downsamples = []
for k in range(i - j):
if k == i - j - 1:
conv_downsamples.append(
nn.Sequential(
L.Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=3,
stride=2,
padding=1,
groups=self.in_channels[j],
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
L.Conv2d(
self.in_channels[j],
self.in_channels[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[i])))
else:
conv_downsamples.append(
nn.Sequential(
L.Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=3,
stride=2,
padding=1,
groups=self.in_channels[j],
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
L.Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
nn.ReLU()))
fuse_layer.append(nn.Sequential(*conv_downsamples))
fuse_layers.append(nn.LayerList(fuse_layer))
return nn.LayerList(fuse_layers)
def forward(self, x):
if self.num_branches == 1:
return [self.layers[0](x[0])]
if self.module_type == 'LITE':
out = self.layers(x)
elif self.module_type == 'NAIVE':
for i in range(self.num_branches):
x[i] = self.layers[i](x[i])
out = x
if self.with_fuse:
out_fuse = []
for i in range(len(self.fuse_layers)):
y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
for j in range(self.num_branches):
if j == 0:
y += y
elif i == j:
y += out[j]
else:
y += self.fuse_layers[i][j](out[j])
if i == 0:
out[i] = y
out_fuse.append(self.relu(y))
out = out_fuse
elif not self.multiscale_output:
out = [out[0]]
return out
@register
class LiteHRNet(nn.Layer):
"""
@inproceedings{Yulitehrnet21,
title={Lite-HRNet: A Lightweight High-Resolution Network},
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
booktitle={CVPR},year={2021}
}
Args:
network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
"naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
"wider_naive": Naive network with wider channels in each block.
"lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
"lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
freeze_at (int): the stage to freeze
freeze_norm (bool): whether to freeze norm in HRNet
norm_decay (float): weight decay for normalization layer weights
return_idx (List): the stage to return
"""
def __init__(self,
network_type,
freeze_at=0,
freeze_norm=True,
norm_decay=0.,
return_idx=[0, 1, 2, 3]):
super(LiteHRNet, self).__init__()
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
"the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
assert len(return_idx) > 0, "need one or more return index"
self.freeze_at = freeze_at
self.freeze_norm = freeze_norm
self.norm_decay = norm_decay
self.return_idx = return_idx
self.norm_type = 'bn'
self.module_configs = {
"lite_18": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["LITE", "LITE", "LITE"],
"reduce_ratios": [8, 8, 8],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
"lite_30": {
"num_modules": [3, 8, 3],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["LITE", "LITE", "LITE"],
"reduce_ratios": [8, 8, 8],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
"naive": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["NAIVE", "NAIVE", "NAIVE"],
"reduce_ratios": [1, 1, 1],
"num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
},
"wider_naive": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["NAIVE", "NAIVE", "NAIVE"],
"reduce_ratios": [1, 1, 1],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
}
self.stages_config = self.module_configs[network_type]
self.stem = Stem(3, 32, 32, 1)
num_channels_pre_layer = [32]
for stage_idx in range(3):
num_channels = self.stages_config["num_channels"][stage_idx]
setattr(self, 'transition{}'.format(stage_idx),
self._make_transition_layer(num_channels_pre_layer,
num_channels, self.freeze_norm,
self.norm_decay))
stage, num_channels_pre_layer = self._make_stage(
self.stages_config, stage_idx, num_channels, True,
self.freeze_norm, self.norm_decay)
setattr(self, 'stage{}'.format(stage_idx), stage)
self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
self.freeze_norm, self.norm_decay)
def _make_transition_layer(self,
num_channels_pre_layer,
num_channels_cur_layer,
freeze_norm=False,
norm_decay=0.):
num_branches_pre = len(num_channels_pre_layer)
num_branches_cur = len(num_channels_cur_layer)
transition_layers = []
for i in range(num_branches_cur):
if i < num_branches_pre:
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
transition_layers.append(
nn.Sequential(
L.Conv2d(
num_channels_pre_layer[i],
num_channels_pre_layer[i],
kernel_size=3,
stride=1,
padding=1,
groups=num_channels_pre_layer[i],
bias=False),
nn.BatchNorm2D(num_channels_pre_layer[i]),
L.Conv2d(
num_channels_pre_layer[i],
num_channels_cur_layer[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(num_channels_cur_layer[i]),
nn.ReLU()))
else:
transition_layers.append(None)
else:
conv_downsamples = []
for j in range(i + 1 - num_branches_pre):
conv_downsamples.append(
nn.Sequential(
L.Conv2d(
num_channels_pre_layer[-1],
num_channels_pre_layer[-1],
groups=num_channels_pre_layer[-1],
kernel_size=3,
stride=2,
padding=1,
bias=False, ),
nn.BatchNorm2D(num_channels_pre_layer[-1]),
L.Conv2d(
num_channels_pre_layer[-1],
num_channels_cur_layer[i]
if j == i - num_branches_pre else
num_channels_pre_layer[-1],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(num_channels_cur_layer[i]
if j == i - num_branches_pre else
num_channels_pre_layer[-1]),
nn.ReLU()))
transition_layers.append(nn.Sequential(*conv_downsamples))
return nn.LayerList(transition_layers)
def _make_stage(self,
stages_config,
stage_idx,
in_channels,
multiscale_output,
freeze_norm=False,
norm_decay=0.):
num_modules = stages_config["num_modules"][stage_idx]
num_branches = stages_config["num_branches"][stage_idx]
num_blocks = stages_config["num_blocks"][stage_idx]
reduce_ratio = stages_config['reduce_ratios'][stage_idx]
module_type = stages_config['module_type'][stage_idx]
modules = []
for i in range(num_modules):
if not multiscale_output and i == num_modules - 1:
reset_multiscale_output = False
else:
reset_multiscale_output = True
modules.append(
LiteHRNetModule(
num_branches,
num_blocks,
in_channels,
reduce_ratio,
module_type,
multiscale_output=reset_multiscale_output,
with_fuse=True,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
in_channels = modules[-1].in_channels
return nn.Sequential(*modules), in_channels
def forward(self, inputs):
x = inputs['image']
x = self.stem(x)
y_list = [x]
for stage_idx in range(3):
x_list = []
transition = getattr(self, 'transition{}'.format(stage_idx))
for j in range(self.stages_config["num_branches"][stage_idx]):
if transition[j] is not None:
if j >= len(y_list):
x_list.append(transition[j](y_list[-1]))
else:
x_list.append(transition[j](y_list[j]))
else:
x_list.append(y_list[j])
y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
x = self.head_layer(y_list)
res = []
for i, layer in enumerate(x):
if i == self.freeze_at:
layer.stop_gradient = True
if i in self.return_idx:
res.append(layer)
return res
@property
def out_shape(self):
return [
ShapeSpec(
channels=self._out_channels[i], stride=self._out_strides[i])
for i in self.return_idx
]

@ -0,0 +1,411 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingNormal
from paddlers.models.ppdet.core.workspace import register, serializable
from numbers import Integral
from ..shape_spec import ShapeSpec
__all__ = ['MobileNet']
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
num_groups=1,
act='relu',
conv_lr=1.,
conv_decay=0.,
norm_decay=0.,
norm_type='bn',
name=None):
super(ConvBNLayer, self).__init__()
self.act = act
self._conv = nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(
learning_rate=conv_lr,
initializer=KaimingNormal(),
regularizer=L2Decay(conv_decay)),
bias_attr=False)
param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
if norm_type in ['sync_bn', 'bn']:
self._batch_norm = nn.BatchNorm2D(
out_channels, weight_attr=param_attr, bias_attr=bias_attr)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
if self.act == "relu":
x = F.relu(x)
elif self.act == "relu6":
x = F.relu6(x)
return x
class DepthwiseSeparable(nn.Layer):
def __init__(self,
in_channels,
out_channels1,
out_channels2,
num_groups,
stride,
scale,
conv_lr=1.,
conv_decay=0.,
norm_decay=0.,
norm_type='bn',
name=None):
super(DepthwiseSeparable, self).__init__()
self._depthwise_conv = ConvBNLayer(
in_channels,
int(out_channels1 * scale),
kernel_size=3,
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
conv_lr=conv_lr,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name=name + "_dw")
self._pointwise_conv = ConvBNLayer(
int(out_channels1 * scale),
int(out_channels2 * scale),
kernel_size=1,
stride=1,
padding=0,
conv_lr=conv_lr,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name=name + "_sep")
def forward(self, x):
x = self._depthwise_conv(x)
x = self._pointwise_conv(x)
return x
class ExtraBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels1,
out_channels2,
num_groups=1,
stride=2,
conv_lr=1.,
conv_decay=0.,
norm_decay=0.,
norm_type='bn',
name=None):
super(ExtraBlock, self).__init__()
self.pointwise_conv = ConvBNLayer(
in_channels,
int(out_channels1),
kernel_size=1,
stride=1,
padding=0,
num_groups=int(num_groups),
act='relu6',
conv_lr=conv_lr,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name=name + "_extra1")
self.normal_conv = ConvBNLayer(
int(out_channels1),
int(out_channels2),
kernel_size=3,
stride=stride,
padding=1,
num_groups=int(num_groups),
act='relu6',
conv_lr=conv_lr,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name=name + "_extra2")
def forward(self, x):
x = self.pointwise_conv(x)
x = self.normal_conv(x)
return x
@register
@serializable
class MobileNet(nn.Layer):
__shared__ = ['norm_type']
def __init__(self,
norm_type='bn',
norm_decay=0.,
conv_decay=0.,
scale=1,
conv_learning_rate=1.0,
feature_maps=[4, 6, 13],
with_extra_blocks=False,
extra_block_filters=[[256, 512], [128, 256], [128, 256],
[64, 128]]):
super(MobileNet, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
self.feature_maps = feature_maps
self.with_extra_blocks = with_extra_blocks
self.extra_block_filters = extra_block_filters
self._out_channels = []
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=int(32 * scale),
kernel_size=3,
stride=2,
padding=1,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv1")
self.dwsl = []
dws21 = self.add_sublayer(
"conv2_1",
sublayer=DepthwiseSeparable(
in_channels=int(32 * scale),
out_channels1=32,
out_channels2=64,
num_groups=32,
stride=1,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv2_1"))
self.dwsl.append(dws21)
self._update_out_channels(
int(64 * scale), len(self.dwsl), feature_maps)
dws22 = self.add_sublayer(
"conv2_2",
sublayer=DepthwiseSeparable(
in_channels=int(64 * scale),
out_channels1=64,
out_channels2=128,
num_groups=64,
stride=2,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv2_2"))
self.dwsl.append(dws22)
self._update_out_channels(
int(128 * scale), len(self.dwsl), feature_maps)
# 1/4
dws31 = self.add_sublayer(
"conv3_1",
sublayer=DepthwiseSeparable(
in_channels=int(128 * scale),
out_channels1=128,
out_channels2=128,
num_groups=128,
stride=1,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv3_1"))
self.dwsl.append(dws31)
self._update_out_channels(
int(128 * scale), len(self.dwsl), feature_maps)
dws32 = self.add_sublayer(
"conv3_2",
sublayer=DepthwiseSeparable(
in_channels=int(128 * scale),
out_channels1=128,
out_channels2=256,
num_groups=128,
stride=2,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv3_2"))
self.dwsl.append(dws32)
self._update_out_channels(
int(256 * scale), len(self.dwsl), feature_maps)
# 1/8
dws41 = self.add_sublayer(
"conv4_1",
sublayer=DepthwiseSeparable(
in_channels=int(256 * scale),
out_channels1=256,
out_channels2=256,
num_groups=256,
stride=1,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv4_1"))
self.dwsl.append(dws41)
self._update_out_channels(
int(256 * scale), len(self.dwsl), feature_maps)
dws42 = self.add_sublayer(
"conv4_2",
sublayer=DepthwiseSeparable(
in_channels=int(256 * scale),
out_channels1=256,
out_channels2=512,
num_groups=256,
stride=2,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv4_2"))
self.dwsl.append(dws42)
self._update_out_channels(
int(512 * scale), len(self.dwsl), feature_maps)
# 1/16
for i in range(5):
tmp = self.add_sublayer(
"conv5_" + str(i + 1),
sublayer=DepthwiseSeparable(
in_channels=int(512 * scale),
out_channels1=512,
out_channels2=512,
num_groups=512,
stride=1,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv5_" + str(i + 1)))
self.dwsl.append(tmp)
self._update_out_channels(
int(512 * scale), len(self.dwsl), feature_maps)
dws56 = self.add_sublayer(
"conv5_6",
sublayer=DepthwiseSeparable(
in_channels=int(512 * scale),
out_channels1=512,
out_channels2=1024,
num_groups=512,
stride=2,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv5_6"))
self.dwsl.append(dws56)
self._update_out_channels(
int(1024 * scale), len(self.dwsl), feature_maps)
# 1/32
dws6 = self.add_sublayer(
"conv6",
sublayer=DepthwiseSeparable(
in_channels=int(1024 * scale),
out_channels1=1024,
out_channels2=1024,
num_groups=1024,
stride=1,
scale=scale,
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv6"))
self.dwsl.append(dws6)
self._update_out_channels(
int(1024 * scale), len(self.dwsl), feature_maps)
if self.with_extra_blocks:
self.extra_blocks = []
for i, block_filter in enumerate(self.extra_block_filters):
in_c = 1024 if i == 0 else self.extra_block_filters[i - 1][1]
conv_extra = self.add_sublayer(
"conv7_" + str(i + 1),
sublayer=ExtraBlock(
in_c,
block_filter[0],
block_filter[1],
conv_lr=conv_learning_rate,
conv_decay=conv_decay,
norm_decay=norm_decay,
norm_type=norm_type,
name="conv7_" + str(i + 1)))
self.extra_blocks.append(conv_extra)
self._update_out_channels(
block_filter[1],
len(self.dwsl) + len(self.extra_blocks), feature_maps)
def _update_out_channels(self, channel, feature_idx, feature_maps):
if feature_idx in feature_maps:
self._out_channels.append(channel)
def forward(self, inputs):
outs = []
y = self.conv1(inputs['image'])
for i, block in enumerate(self.dwsl):
y = block(y)
if i + 1 in self.feature_maps:
outs.append(y)
if not self.with_extra_blocks:
return outs
y = outs[-1]
for i, block in enumerate(self.extra_blocks):
idx = i + len(self.dwsl)
y = block(y)
if idx + 1 in self.feature_maps:
outs.append(y)
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

@ -0,0 +1,479 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddlers.models.ppdet.core.workspace import register, serializable
from numbers import Integral
from ..shape_spec import ShapeSpec
__all__ = ['MobileNetV3']
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNLayer(nn.Layer):
def __init__(self,
in_c,
out_c,
filter_size,
stride,
padding,
num_groups=1,
act=None,
lr_mult=1.,
conv_decay=0.,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
name=""):
super(ConvBNLayer, self).__init__()
self.act = act
self.conv = nn.Conv2D(
in_channels=in_c,
out_channels=out_c,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
bias_attr=False)
norm_lr = 0. if freeze_norm else lr_mult
param_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
trainable=False if freeze_norm else True)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
trainable=False if freeze_norm else True)
global_stats = True if freeze_norm else None
if norm_type in ['sync_bn', 'bn']:
self.bn = nn.BatchNorm2D(
out_c,
weight_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats)
norm_params = self.bn.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.act is not None:
if self.act == "relu":
x = F.relu(x)
elif self.act == "relu6":
x = F.relu6(x)
elif self.act == "hard_swish":
x = F.hardswish(x)
else:
raise NotImplementedError(
"The activation function is selected incorrectly.")
return x
class ResidualUnit(nn.Layer):
def __init__(self,
in_c,
mid_c,
out_c,
filter_size,
stride,
use_se,
lr_mult,
conv_decay=0.,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
act=None,
return_list=False,
name=''):
super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_c == out_c
self.use_se = use_se
self.return_list = return_list
self.expand_conv = ConvBNLayer(
in_c=in_c,
out_c=mid_c,
filter_size=1,
stride=1,
padding=0,
act=act,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_expand")
self.bottleneck_conv = ConvBNLayer(
in_c=mid_c,
out_c=mid_c,
filter_size=filter_size,
stride=stride,
padding=int((filter_size - 1) // 2),
num_groups=mid_c,
act=act,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_depthwise")
if self.use_se:
self.mid_se = SEModule(
mid_c, lr_mult, conv_decay, name=name + "_se")
self.linear_conv = ConvBNLayer(
in_c=mid_c,
out_c=out_c,
filter_size=1,
stride=1,
padding=0,
act=None,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_linear")
def forward(self, inputs):
y = self.expand_conv(inputs)
x = self.bottleneck_conv(y)
if self.use_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = paddle.add(inputs, x)
if self.return_list:
return [y, x]
else:
return x
class SEModule(nn.Layer):
def __init__(self, channel, lr_mult, conv_decay, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
mid_channels = int(channel // reduction)
self.conv1 = nn.Conv2D(
in_channels=channel,
out_channels=mid_channels,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
bias_attr=ParamAttr(
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
self.conv2 = nn.Conv2D(
in_channels=mid_channels,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
bias_attr=ParamAttr(
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
return paddle.multiply(x=inputs, y=outputs)
class ExtraBlockDW(nn.Layer):
def __init__(self,
in_c,
ch_1,
ch_2,
stride,
lr_mult,
conv_decay=0.,
norm_type='bn',
norm_decay=0.,
freeze_norm=False,
name=None):
super(ExtraBlockDW, self).__init__()
self.pointwise_conv = ConvBNLayer(
in_c=in_c,
out_c=ch_1,
filter_size=1,
stride=1,
padding='SAME',
act='relu6',
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_extra1")
self.depthwise_conv = ConvBNLayer(
in_c=ch_1,
out_c=ch_2,
filter_size=3,
stride=stride,
padding='SAME',
num_groups=int(ch_1),
act='relu6',
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_extra2_dw")
self.normal_conv = ConvBNLayer(
in_c=ch_2,
out_c=ch_2,
filter_size=1,
stride=1,
padding='SAME',
act='relu6',
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name=name + "_extra2_sep")
def forward(self, inputs):
x = self.pointwise_conv(inputs)
x = self.depthwise_conv(x)
x = self.normal_conv(x)
return x
@register
@serializable
class MobileNetV3(nn.Layer):
__shared__ = ['norm_type']
def __init__(self,
scale=1.0,
model_name="large",
feature_maps=[6, 12, 15],
with_extra_blocks=False,
extra_block_filters=[[256, 512], [128, 256], [128, 256],
[64, 128]],
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
conv_decay=0.0,
multiplier=1.0,
norm_type='bn',
norm_decay=0.0,
freeze_norm=False):
super(MobileNetV3, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
if norm_type == 'sync_bn' and freeze_norm:
raise ValueError(
"The norm_type should not be sync_bn when freeze_norm is True")
self.feature_maps = feature_maps
self.with_extra_blocks = with_extra_blocks
self.extra_block_filters = extra_block_filters
inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1],
[5, 72, 40, True, "relu", 2], # RCNN output
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # YOLOv3 output
[3, 240, 80, False, "hard_swish", 2], # RCNN output
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish", 1], # YOLOv3 output
[5, 672, 160, True, "hard_swish",
2], # SSD/SSDLite/RCNN output
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish", 1], # YOLOv3 output
]
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2], # RCNN output
[3, 88, 24, False, "relu", 1], # YOLOv3 output
[5, 96, 40, True, "hard_swish", 2], # RCNN output
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1], # YOLOv3 output
[5, 288, 96, True, "hard_swish", 2], # SSD/SSDLite/RCNN output
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1], # YOLOv3 output
]
else:
raise NotImplementedError(
"mode[{}_model] is not implemented!".format(model_name))
if multiplier != 1.0:
self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier)
self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier)
self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier)
self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier)
self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier)
self.conv1 = ConvBNLayer(
in_c=3,
out_c=make_divisible(inplanes * scale),
filter_size=3,
stride=2,
padding=1,
num_groups=1,
act="hard_swish",
lr_mult=lr_mult_list[0],
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="conv1")
self._out_channels = []
self.block_list = []
i = 0
inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in self.cfg:
lr_idx = min(i // 3, len(lr_mult_list) - 1)
lr_mult = lr_mult_list[lr_idx]
# for SSD/SSDLite, first head input is after ResidualUnit expand_conv
return_list = self.with_extra_blocks and i + 2 in self.feature_maps
block = self.add_sublayer(
"conv" + str(i + 2),
sublayer=ResidualUnit(
in_c=inplanes,
mid_c=make_divisible(scale * exp),
out_c=make_divisible(scale * c),
filter_size=k,
stride=s,
use_se=se,
act=nl,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
return_list=return_list,
name="conv" + str(i + 2)))
self.block_list.append(block)
inplanes = make_divisible(scale * c)
i += 1
self._update_out_channels(
make_divisible(scale * exp)
if return_list else inplanes, i + 1, feature_maps)
if self.with_extra_blocks:
self.extra_block_list = []
extra_out_c = make_divisible(scale * self.cfg[-1][1])
lr_idx = min(i // 3, len(lr_mult_list) - 1)
lr_mult = lr_mult_list[lr_idx]
conv_extra = self.add_sublayer(
"conv" + str(i + 2),
sublayer=ConvBNLayer(
in_c=inplanes,
out_c=extra_out_c,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
act="hard_swish",
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name="conv" + str(i + 2)))
self.extra_block_list.append(conv_extra)
i += 1
self._update_out_channels(extra_out_c, i + 1, feature_maps)
for j, block_filter in enumerate(self.extra_block_filters):
in_c = extra_out_c if j == 0 else self.extra_block_filters[
j - 1][1]
conv_extra = self.add_sublayer(
"conv" + str(i + 2),
sublayer=ExtraBlockDW(
in_c,
block_filter[0],
block_filter[1],
stride=2,
lr_mult=lr_mult,
conv_decay=conv_decay,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
name='conv' + str(i + 2)))
self.extra_block_list.append(conv_extra)
i += 1
self._update_out_channels(block_filter[1], i + 1, feature_maps)
def _update_out_channels(self, channel, feature_idx, feature_maps):
if feature_idx in feature_maps:
self._out_channels.append(channel)
def forward(self, inputs):
x = self.conv1(inputs['image'])
outs = []
for idx, block in enumerate(self.block_list):
x = block(x)
if idx + 2 in self.feature_maps:
if isinstance(x, list):
outs.append(x[0])
x = x[1]
else:
outs.append(x)
if not self.with_extra_blocks:
return outs
for i, block in enumerate(self.extra_block_list):
idx = i + len(self.block_list)
x = block(x)
if idx + 2 in self.feature_maps:
outs.append(x)
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

@ -0,0 +1,69 @@
class NameAdapter(object):
"""Fix the backbones variable names for pretrained weight"""
def __init__(self, model):
super(NameAdapter, self).__init__()
self.model = model
@property
def model_type(self):
return getattr(self.model, '_model_type', '')
@property
def variant(self):
return getattr(self.model, 'variant', '')
def fix_conv_norm_name(self, name):
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
# the naming rule is same as pretrained weight
if self.model_type == 'SEResNeXt':
bn_name = name + "_bn"
return bn_name
def fix_shortcut_name(self, name):
if self.model_type == 'SEResNeXt':
name = 'conv' + name + '_prj'
return name
def fix_bottleneck_name(self, name):
if self.model_type == 'SEResNeXt':
conv_name1 = 'conv' + name + '_x1'
conv_name2 = 'conv' + name + '_x2'
conv_name3 = 'conv' + name + '_x3'
shortcut_name = name
else:
conv_name1 = name + "_branch2a"
conv_name2 = name + "_branch2b"
conv_name3 = name + "_branch2c"
shortcut_name = name + "_branch1"
return conv_name1, conv_name2, conv_name3, shortcut_name
def fix_basicblock_name(self, name):
if self.model_type == 'SEResNeXt':
conv_name1 = 'conv' + name + '_x1'
conv_name2 = 'conv' + name + '_x2'
shortcut_name = name
else:
conv_name1 = name + "_branch2a"
conv_name2 = name + "_branch2b"
shortcut_name = name + "_branch1"
return conv_name1, conv_name2, shortcut_name
def fix_layer_warp_name(self, stage_num, count, i):
name = 'res' + str(stage_num)
if count > 10 and stage_num == 4:
if i == 0:
conv_name = name + "a"
else:
conv_name = name + "b" + str(i)
else:
conv_name = name + chr(ord("a") + i)
if self.model_type == 'SEResNeXt':
conv_name = str(stage_num + 2) + '_' + str(i + 1)
return conv_name
def fix_c1_stage_name(self):
return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"

@ -0,0 +1,358 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from numbers import Integral
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
from .resnet import ConvNormLayer
__all__ = ['Res2Net', 'Res2NetC5']
Res2Net_cfg = {
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
200: [3, 12, 48, 3]
}
class BottleNeck(nn.Layer):
def __init__(self,
ch_in,
ch_out,
stride,
shortcut,
width,
scales=4,
variant='b',
groups=1,
lr=1.0,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
dcn_v2=False):
super(BottleNeck, self).__init__()
self.shortcut = shortcut
self.scales = scales
self.stride = stride
if not shortcut:
if variant == 'd' and stride == 2:
self.branch1 = nn.Sequential()
self.branch1.add_sublayer(
'pool',
nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True))
self.branch1.add_sublayer(
'conv',
ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr))
else:
self.branch1 = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=stride,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.branch2a = ConvNormLayer(
ch_in=ch_in,
ch_out=width * scales,
filter_size=1,
stride=stride if variant == 'a' else 1,
groups=1,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.branch2b = nn.LayerList([
ConvNormLayer(
ch_in=width,
ch_out=width,
filter_size=3,
stride=1 if variant == 'a' else stride,
groups=groups,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
dcn_v2=dcn_v2) for _ in range(self.scales - 1)
])
self.branch2c = ConvNormLayer(
ch_in=width * scales,
ch_out=ch_out,
filter_size=1,
stride=1,
groups=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
def forward(self, inputs):
out = self.branch2a(inputs)
feature_split = paddle.split(out, self.scales, 1)
out_split = []
for i in range(self.scales - 1):
if i == 0 or self.stride == 2:
out_split.append(self.branch2b[i](feature_split[i]))
else:
out_split.append(self.branch2b[i](paddle.add(feature_split[i],
out_split[-1])))
if self.stride == 1:
out_split.append(feature_split[-1])
else:
out_split.append(
F.avg_pool2d(feature_split[-1], 3, self.stride, 1))
out = self.branch2c(paddle.concat(out_split, 1))
if self.shortcut:
short = inputs
else:
short = self.branch1(inputs)
out = paddle.add(out, short)
out = F.relu(out)
return out
class Blocks(nn.Layer):
def __init__(self,
ch_in,
ch_out,
count,
stage_num,
width,
scales=4,
variant='b',
groups=1,
lr=1.0,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
dcn_v2=False):
super(Blocks, self).__init__()
self.blocks = nn.Sequential()
for i in range(count):
self.blocks.add_sublayer(
str(i),
BottleNeck(
ch_in=ch_in if i == 0 else ch_out,
ch_out=ch_out,
stride=2 if i == 0 and stage_num != 2 else 1,
shortcut=False if i == 0 else True,
width=width * (2**(stage_num - 2)),
scales=scales,
variant=variant,
groups=groups,
lr=lr,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
dcn_v2=dcn_v2))
def forward(self, inputs):
return self.blocks(inputs)
@register
@serializable
class Res2Net(nn.Layer):
"""
Res2Net, see https://arxiv.org/abs/1904.01169
Args:
depth (int): Res2Net depth, should be 50, 101, 152, 200.
width (int): Res2Net width
scales (int): Res2Net scale
variant (str): Res2Net variant, supports 'a', 'b', 'c', 'd' currently
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
lower learning rate ratio is need for pretrained model
got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
groups (int): The groups number of the Conv Layer.
norm_type (str): normalization type, 'bn' or 'sync_bn'
norm_decay (float): weight decay for normalization layer weights
freeze_norm (bool): freeze normalization layers
freeze_at (int): freeze the backbone at which stage
return_idx (list): index of stages whose feature maps are returned,
index 0 stands for res2
dcn_v2_stages (list): index of stages who select deformable conv v2
num_stages (int): number of stages created
"""
__shared__ = ['norm_type']
def __init__(self,
depth=50,
width=26,
scales=4,
variant='b',
lr_mult_list=[1.0, 1.0, 1.0, 1.0],
groups=1,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
freeze_at=0,
return_idx=[0, 1, 2, 3],
dcn_v2_stages=[-1],
num_stages=4):
super(Res2Net, self).__init__()
self._model_type = 'Res2Net' if groups == 1 else 'Res2NeXt'
assert depth in [50, 101, 152, 200], \
"depth {} not in [50, 101, 152, 200]"
assert variant in ['a', 'b', 'c', 'd'], "invalid Res2Net variant"
assert num_stages >= 1 and num_stages <= 4
self.depth = depth
self.variant = variant
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.freeze_at = freeze_at
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert max(return_idx) < num_stages, \
'the maximum return index must smaller than num_stages, ' \
'but received maximum return index is {} and num_stages ' \
'is {}'.format(max(return_idx), num_stages)
self.return_idx = return_idx
self.num_stages = num_stages
assert len(lr_mult_list) == 4, \
"lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
if isinstance(dcn_v2_stages, Integral):
dcn_v2_stages = [dcn_v2_stages]
assert max(dcn_v2_stages) < num_stages
self.dcn_v2_stages = dcn_v2_stages
block_nums = Res2Net_cfg[depth]
# C1 stage
if self.variant in ['c', 'd']:
conv_def = [
[3, 32, 3, 2, "conv1_1"],
[32, 32, 3, 1, "conv1_2"],
[32, 64, 3, 1, "conv1_3"],
]
else:
conv_def = [[3, 64, 7, 2, "conv1"]]
self.res1 = nn.Sequential()
for (c_in, c_out, k, s, _name) in conv_def:
self.res1.add_sublayer(
_name,
ConvNormLayer(
ch_in=c_in,
ch_out=c_out,
filter_size=k,
stride=s,
groups=1,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=1.0))
self._in_channels = [64, 256, 512, 1024]
self._out_channels = [256, 512, 1024, 2048]
self._out_strides = [4, 8, 16, 32]
# C2-C5 stages
self.res_layers = []
for i in range(num_stages):
lr_mult = lr_mult_list[i]
stage_num = i + 2
self.res_layers.append(
self.add_sublayer(
"res{}".format(stage_num),
Blocks(
self._in_channels[i],
self._out_channels[i],
count=block_nums[i],
stage_num=stage_num,
width=width,
scales=scales,
groups=groups,
lr=lr_mult,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
dcn_v2=(i in self.dcn_v2_stages))))
@property
def out_shape(self):
return [
ShapeSpec(
channels=self._out_channels[i], stride=self._out_strides[i])
for i in self.return_idx
]
def forward(self, inputs):
x = inputs['image']
res1 = self.res1(x)
x = F.max_pool2d(res1, kernel_size=3, stride=2, padding=1)
outs = []
for idx, stage in enumerate(self.res_layers):
x = stage(x)
if idx == self.freeze_at:
x.stop_gradient = True
if idx in self.return_idx:
outs.append(x)
return outs
@register
class Res2NetC5(nn.Layer):
def __init__(self, depth=50, width=26, scales=4, variant='b'):
super(Res2NetC5, self).__init__()
feat_in, feat_out = [1024, 2048]
self.res5 = Blocks(
feat_in,
feat_out,
count=3,
stage_num=5,
width=width,
scales=scales,
variant=variant)
self.feat_out = feat_out
@property
def out_shape(self):
return [ShapeSpec(
channels=self.feat_out,
stride=32, )]
def forward(self, roi_feat, stage=0):
y = self.res5(roi_feat)
return y

@ -0,0 +1,609 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from numbers import Integral
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register, serializable
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Uniform
from paddle import ParamAttr
from paddle.nn.initializer import Constant
from paddle.vision.ops import DeformConv2D
from .name_adapter import NameAdapter
from ..shape_spec import ShapeSpec
__all__ = ['ResNet', 'Res5Head', 'Blocks', 'BasicBlock', 'BottleNeck']
ResNet_cfg = {
18: [2, 2, 2, 2],
34: [3, 4, 6, 3],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
}
class ConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride,
groups=1,
act=None,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
lr=1.0,
dcn_v2=False):
super(ConvNormLayer, self).__init__()
assert norm_type in ['bn', 'sync_bn']
self.norm_type = norm_type
self.act = act
self.dcn_v2 = dcn_v2
if not self.dcn_v2:
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(learning_rate=lr),
bias_attr=False)
else:
self.offset_channel = 2 * filter_size**2
self.mask_channel = filter_size**2
self.conv_offset = nn.Conv2D(
in_channels=ch_in,
out_channels=3 * filter_size**2,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
weight_attr=ParamAttr(initializer=Constant(0.)),
bias_attr=ParamAttr(initializer=Constant(0.)))
self.conv = DeformConv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
dilation=1,
groups=groups,
weight_attr=ParamAttr(learning_rate=lr),
bias_attr=False)
norm_lr = 0. if freeze_norm else lr
param_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
trainable=False if freeze_norm else True)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay),
trainable=False if freeze_norm else True)
global_stats = True if freeze_norm else None
if norm_type in ['sync_bn', 'bn']:
self.norm = nn.BatchNorm2D(
ch_out,
weight_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats)
norm_params = self.norm.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
def forward(self, inputs):
if not self.dcn_v2:
out = self.conv(inputs)
else:
offset_mask = self.conv_offset(inputs)
offset, mask = paddle.split(
offset_mask,
num_or_sections=[self.offset_channel, self.mask_channel],
axis=1)
mask = F.sigmoid(mask)
out = self.conv(inputs, offset, mask=mask)
if self.norm_type in ['bn', 'sync_bn']:
out = self.norm(out)
if self.act:
out = getattr(F, self.act)(out)
return out
class SELayer(nn.Layer):
def __init__(self, ch, reduction_ratio=16):
super(SELayer, self).__init__()
self.pool = nn.AdaptiveAvgPool2D(1)
stdv = 1.0 / math.sqrt(ch)
c_ = ch // reduction_ratio
self.squeeze = nn.Linear(
ch,
c_,
weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=True)
stdv = 1.0 / math.sqrt(c_)
self.extract = nn.Linear(
c_,
ch,
weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=True)
def forward(self, inputs):
out = self.pool(inputs)
out = paddle.squeeze(out, axis=[2, 3])
out = self.squeeze(out)
out = F.relu(out)
out = self.extract(out)
out = F.sigmoid(out)
out = paddle.unsqueeze(out, axis=[2, 3])
scale = out * inputs
return scale
class BasicBlock(nn.Layer):
expansion = 1
def __init__(self,
ch_in,
ch_out,
stride,
shortcut,
variant='b',
groups=1,
base_width=64,
lr=1.0,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
dcn_v2=False,
std_senet=False):
super(BasicBlock, self).__init__()
assert groups == 1 and base_width == 64, 'BasicBlock only supports groups=1 and base_width=64'
self.shortcut = shortcut
if not shortcut:
if variant == 'd' and stride == 2:
self.short = nn.Sequential()
self.short.add_sublayer(
'pool',
nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True))
self.short.add_sublayer(
'conv',
ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr))
else:
self.short = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=stride,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.branch2a = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=3,
stride=stride,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.branch2b = ConvNormLayer(
ch_in=ch_out,
ch_out=ch_out,
filter_size=3,
stride=1,
act=None,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
dcn_v2=dcn_v2)
self.std_senet = std_senet
if self.std_senet:
self.se = SELayer(ch_out)
def forward(self, inputs):
out = self.branch2a(inputs)
out = self.branch2b(out)
if self.std_senet:
out = self.se(out)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
out = paddle.add(x=out, y=short)
out = F.relu(out)
return out
class BottleNeck(nn.Layer):
expansion = 4
def __init__(self,
ch_in,
ch_out,
stride,
shortcut,
variant='b',
groups=1,
base_width=4,
lr=1.0,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
dcn_v2=False,
std_senet=False):
super(BottleNeck, self).__init__()
if variant == 'a':
stride1, stride2 = stride, 1
else:
stride1, stride2 = 1, stride
# ResNeXt
width = int(ch_out * (base_width / 64.)) * groups
self.shortcut = shortcut
if not shortcut:
if variant == 'd' and stride == 2:
self.short = nn.Sequential()
self.short.add_sublayer(
'pool',
nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True))
self.short.add_sublayer(
'conv',
ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out * self.expansion,
filter_size=1,
stride=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr))
else:
self.short = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out * self.expansion,
filter_size=1,
stride=stride,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.branch2a = ConvNormLayer(
ch_in=ch_in,
ch_out=width,
filter_size=1,
stride=stride1,
groups=1,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.branch2b = ConvNormLayer(
ch_in=width,
ch_out=width,
filter_size=3,
stride=stride2,
groups=groups,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr,
dcn_v2=dcn_v2)
self.branch2c = ConvNormLayer(
ch_in=width,
ch_out=ch_out * self.expansion,
filter_size=1,
stride=1,
groups=1,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=lr)
self.std_senet = std_senet
if self.std_senet:
self.se = SELayer(ch_out * self.expansion)
def forward(self, inputs):
out = self.branch2a(inputs)
out = self.branch2b(out)
out = self.branch2c(out)
if self.std_senet:
out = self.se(out)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
out = paddle.add(x=out, y=short)
out = F.relu(out)
return out
class Blocks(nn.Layer):
def __init__(self,
block,
ch_in,
ch_out,
count,
name_adapter,
stage_num,
variant='b',
groups=1,
base_width=64,
lr=1.0,
norm_type='bn',
norm_decay=0.,
freeze_norm=True,
dcn_v2=False,
std_senet=False):
super(Blocks, self).__init__()
self.blocks = []
for i in range(count):
conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i)
layer = self.add_sublayer(
conv_name,
block(
ch_in=ch_in,
ch_out=ch_out,
stride=2 if i == 0 and stage_num != 2 else 1,
shortcut=False if i == 0 else True,
variant=variant,
groups=groups,
base_width=base_width,
lr=lr,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
dcn_v2=dcn_v2,
std_senet=std_senet))
self.blocks.append(layer)
if i == 0:
ch_in = ch_out * block.expansion
def forward(self, inputs):
block_out = inputs
for block in self.blocks:
block_out = block(block_out)
return block_out
@register
@serializable
class ResNet(nn.Layer):
__shared__ = ['norm_type']
def __init__(self,
depth=50,
ch_in=64,
variant='b',
lr_mult_list=[1.0, 1.0, 1.0, 1.0],
groups=1,
base_width=64,
norm_type='bn',
norm_decay=0,
freeze_norm=True,
freeze_at=0,
return_idx=[0, 1, 2, 3],
dcn_v2_stages=[-1],
num_stages=4,
std_senet=False):
"""
Residual Network, see https://arxiv.org/abs/1512.03385
Args:
depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
ch_in (int): output channel of first stage, default 64
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
lower learning rate ratio is need for pretrained model
got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
groups (int): group convolution cardinality
base_width (int): base width of each group convolution
norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
norm_decay (float): weight decay for normalization layer weights
freeze_norm (bool): freeze normalization layers
freeze_at (int): freeze the backbone at which stage
return_idx (list): index of the stages whose feature maps are returned
dcn_v2_stages (list): index of stages who select deformable conv v2
num_stages (int): total num of stages
std_senet (bool): whether use senet, default True
"""
super(ResNet, self).__init__()
self._model_type = 'ResNet' if groups == 1 else 'ResNeXt'
assert num_stages >= 1 and num_stages <= 4
self.depth = depth
self.variant = variant
self.groups = groups
self.base_width = base_width
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.freeze_at = freeze_at
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert max(return_idx) < num_stages, \
'the maximum return index must smaller than num_stages, ' \
'but received maximum return index is {} and num_stages ' \
'is {}'.format(max(return_idx), num_stages)
self.return_idx = return_idx
self.num_stages = num_stages
assert len(lr_mult_list) == 4, \
"lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
if isinstance(dcn_v2_stages, Integral):
dcn_v2_stages = [dcn_v2_stages]
assert max(dcn_v2_stages) < num_stages
if isinstance(dcn_v2_stages, Integral):
dcn_v2_stages = [dcn_v2_stages]
assert max(dcn_v2_stages) < num_stages
self.dcn_v2_stages = dcn_v2_stages
block_nums = ResNet_cfg[depth]
na = NameAdapter(self)
conv1_name = na.fix_c1_stage_name()
if variant in ['c', 'd']:
conv_def = [
[3, ch_in // 2, 3, 2, "conv1_1"],
[ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
[ch_in // 2, ch_in, 3, 1, "conv1_3"],
]
else:
conv_def = [[3, ch_in, 7, 2, conv1_name]]
self.conv1 = nn.Sequential()
for (c_in, c_out, k, s, _name) in conv_def:
self.conv1.add_sublayer(
_name,
ConvNormLayer(
ch_in=c_in,
ch_out=c_out,
filter_size=k,
stride=s,
groups=1,
act='relu',
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
lr=1.0))
self.ch_in = ch_in
ch_out_list = [64, 128, 256, 512]
block = BottleNeck if depth >= 50 else BasicBlock
self._out_channels = [block.expansion * v for v in ch_out_list]
self._out_strides = [4, 8, 16, 32]
self.res_layers = []
for i in range(num_stages):
lr_mult = lr_mult_list[i]
stage_num = i + 2
res_name = "res{}".format(stage_num)
res_layer = self.add_sublayer(
res_name,
Blocks(
block,
self.ch_in,
ch_out_list[i],
count=block_nums[i],
name_adapter=na,
stage_num=stage_num,
variant=variant,
groups=groups,
base_width=base_width,
lr=lr_mult,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
dcn_v2=(i in self.dcn_v2_stages),
std_senet=std_senet))
self.res_layers.append(res_layer)
self.ch_in = self._out_channels[i]
if freeze_at >= 0:
self._freeze_parameters(self.conv1)
for i in range(min(freeze_at + 1, num_stages)):
self._freeze_parameters(self.res_layers[i])
def _freeze_parameters(self, m):
for p in m.parameters():
p.stop_gradient = True
@property
def out_shape(self):
return [
ShapeSpec(
channels=self._out_channels[i], stride=self._out_strides[i])
for i in self.return_idx
]
def forward(self, inputs):
x = inputs['image']
conv1 = self.conv1(x)
x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
outs = []
for idx, stage in enumerate(self.res_layers):
x = stage(x)
if idx in self.return_idx:
outs.append(x)
return outs
@register
class Res5Head(nn.Layer):
def __init__(self, depth=50):
super(Res5Head, self).__init__()
feat_in, feat_out = [1024, 512]
if depth < 50:
feat_in = 256
na = NameAdapter(self)
block = BottleNeck if depth >= 50 else BasicBlock
self.res5 = Blocks(
block, feat_in, feat_out, count=3, name_adapter=na, stage_num=5)
self.feat_out = feat_out if depth < 50 else feat_out * 4
@property
def out_shape(self):
return [ShapeSpec(
channels=self.feat_out,
stride=16, )]
def forward(self, roi_feat, stage=0):
y = self.res5(roi_feat)
return y

@ -0,0 +1,139 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn as nn
from paddlers.models.ppdet.core.workspace import register, serializable
from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
__all__ = ['SENet', 'SERes5Head']
@register
@serializable
class SENet(ResNet):
__shared__ = ['norm_type']
def __init__(self,
depth=50,
variant='b',
lr_mult_list=[1.0, 1.0, 1.0, 1.0],
groups=1,
base_width=64,
norm_type='bn',
norm_decay=0,
freeze_norm=True,
freeze_at=0,
return_idx=[0, 1, 2, 3],
dcn_v2_stages=[-1],
std_senet=True,
num_stages=4):
"""
Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
Args:
depth (int): SENet depth, should be 50, 101, 152
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
lower learning rate ratio is need for pretrained model
got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
groups (int): group convolution cardinality
base_width (int): base width of each group convolution
norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
norm_decay (float): weight decay for normalization layer weights
freeze_norm (bool): freeze normalization layers
freeze_at (int): freeze the backbone at which stage
return_idx (list): index of the stages whose feature maps are returned
dcn_v2_stages (list): index of stages who select deformable conv v2
std_senet (bool): whether use senet, default True
num_stages (int): total num of stages
"""
super(SENet, self).__init__(
depth=depth,
variant=variant,
lr_mult_list=lr_mult_list,
ch_in=128,
groups=groups,
base_width=base_width,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
freeze_at=freeze_at,
return_idx=return_idx,
dcn_v2_stages=dcn_v2_stages,
std_senet=std_senet,
num_stages=num_stages)
@register
class SERes5Head(nn.Layer):
def __init__(self,
depth=50,
variant='b',
lr_mult=1.0,
groups=1,
base_width=64,
norm_type='bn',
norm_decay=0,
dcn_v2=False,
freeze_norm=False,
std_senet=True):
"""
SERes5Head layer
Args:
depth (int): SENet depth, should be 50, 101, 152
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
lr_mult (list): learning rate ratio of SERes5Head, default as 1.0.
groups (int): group convolution cardinality
base_width (int): base width of each group convolution
norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
norm_decay (float): weight decay for normalization layer weights
dcn_v2_stages (list): index of stages who select deformable conv v2
std_senet (bool): whether use senet, default True
"""
super(SERes5Head, self).__init__()
ch_out = 512
ch_in = 256 if depth < 50 else 1024
na = NameAdapter(self)
block = BottleNeck if depth >= 50 else BasicBlock
self.res5 = Blocks(
block,
ch_in,
ch_out,
count=3,
name_adapter=na,
stage_num=5,
variant=variant,
groups=groups,
base_width=base_width,
lr=lr_mult,
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
dcn_v2=dcn_v2,
std_senet=std_senet)
self.ch_out = ch_out * block.expansion
@property
def out_shape(self):
return [ShapeSpec(
channels=self.ch_out,
stride=16, )]
def forward(self, roi_feat):
y = self.res5(roi_feat)
return y

@ -0,0 +1,251 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
from paddle import ParamAttr
import paddle.nn.functional as F
from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm2D
from paddle.nn.initializer import KaimingNormal
from paddle.regularizer import L2Decay
from paddlers.models.ppdet.core.workspace import register, serializable
from numbers import Integral
from ..shape_spec import ShapeSpec
from paddlers.models.ppdet.modeling.ops import channel_shuffle
__all__ = ['ShuffleNetV2']
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self._batch_norm = BatchNorm2D(
out_channels,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
if act == "hard_swish":
act = 'hardswish'
self.act = act
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act:
y = getattr(F, self.act)(y)
return y
class InvertedResidual(nn.Layer):
def __init__(self, in_channels, out_channels, stride, act="relu"):
super(InvertedResidual, self).__init__()
self._conv_pw = ConvBNLayer(
in_channels=in_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
self._conv_dw = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=out_channels // 2,
act=None)
self._conv_linear = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
def forward(self, inputs):
x1, x2 = paddle.split(
inputs,
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
axis=1)
x2 = self._conv_pw(x2)
x2 = self._conv_dw(x2)
x2 = self._conv_linear(x2)
out = paddle.concat([x1, x2], axis=1)
return channel_shuffle(out, 2)
class InvertedResidualDS(nn.Layer):
def __init__(self, in_channels, out_channels, stride, act="relu"):
super(InvertedResidualDS, self).__init__()
# branch1
self._conv_dw_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=3,
stride=stride,
padding=1,
groups=in_channels,
act=None)
self._conv_linear_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
# branch2
self._conv_pw_2 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
self._conv_dw_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=out_channels // 2,
act=None)
self._conv_linear_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act)
def forward(self, inputs):
x1 = self._conv_dw_1(inputs)
x1 = self._conv_linear_1(x1)
x2 = self._conv_pw_2(inputs)
x2 = self._conv_dw_2(x2)
x2 = self._conv_linear_2(x2)
out = paddle.concat([x1, x2], axis=1)
return channel_shuffle(out, 2)
@register
@serializable
class ShuffleNetV2(nn.Layer):
def __init__(self, scale=1.0, act="relu", feature_maps=[5, 13, 17]):
super(ShuffleNetV2, self).__init__()
self.scale = scale
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
self.feature_maps = feature_maps
stage_repeats = [4, 8, 4]
if scale == 0.25:
stage_out_channels = [-1, 24, 24, 48, 96, 512]
elif scale == 0.33:
stage_out_channels = [-1, 24, 32, 64, 128, 512]
elif scale == 0.5:
stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif scale == 1.0:
stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise NotImplementedError("This scale size:[" + str(scale) +
"] is not implemented!")
self._out_channels = []
self._feature_idx = 0
# 1. conv1
self._conv1 = ConvBNLayer(
in_channels=3,
out_channels=stage_out_channels[1],
kernel_size=3,
stride=2,
padding=1,
act=act)
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
self._feature_idx += 1
# 2. bottleneck sequences
self._block_list = []
for stage_id, num_repeat in enumerate(stage_repeats):
for i in range(num_repeat):
if i == 0:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidualDS(
in_channels=stage_out_channels[stage_id + 1],
out_channels=stage_out_channels[stage_id + 2],
stride=2,
act=act))
else:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidual(
in_channels=stage_out_channels[stage_id + 2],
out_channels=stage_out_channels[stage_id + 2],
stride=1,
act=act))
self._block_list.append(block)
self._feature_idx += 1
self._update_out_channels(stage_out_channels[stage_id + 2],
self._feature_idx, self.feature_maps)
def _update_out_channels(self, channel, feature_idx, feature_maps):
if feature_idx in feature_maps:
self._out_channels.append(channel)
def forward(self, inputs):
y = self._conv1(inputs['image'])
y = self._max_pool(y)
outs = []
for i, inv in enumerate(self._block_list):
y = inv(y)
if i + 2 in self.feature_maps:
outs.append(y)
return outs
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save