commit
16c85bf3c2
234 changed files with 64367 additions and 1 deletions
@ -1 +1,5 @@ |
||||
from . import datasets, transforms, utils, tools |
||||
from . import tasks, datasets, transforms, utils, tools, models |
||||
|
||||
# TODO, add these info in installation |
||||
env_info = {'place': 'gpu', 'num': 1} |
||||
__version__ = 0.1 |
||||
|
@ -0,0 +1 @@ |
||||
from .voc import VOCDetection |
@ -0,0 +1,445 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
import copy |
||||
import os |
||||
import os.path as osp |
||||
import random |
||||
import re |
||||
import numpy as np |
||||
from collections import OrderedDict |
||||
import xml.etree.ElementTree as ET |
||||
from paddle.io import Dataset |
||||
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic |
||||
from paddlers.transforms import Decode, MixupImage |
||||
from paddlers.tools import YOLOAnchorCluster |
||||
|
||||
|
||||
class VOCDetection(Dataset): |
||||
"""读取PascalVOC格式的检测数据集,并对样本进行相应的处理。 |
||||
|
||||
Args: |
||||
data_dir (str): 数据集所在的目录路径。 |
||||
file_list (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对data_dir的相对路)。 |
||||
label_list (str): 描述数据集包含的类别信息文件路径。 |
||||
transforms (paddlers.det.transforms): 数据集中每个样本的预处理/增强算子。 |
||||
num_workers (int|str): 数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据 |
||||
系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的 |
||||
一半。 |
||||
shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。 |
||||
allow_empty (bool): 是否加载负样本。默认为False。 |
||||
empty_ratio (float): 用于指定负样本占总样本数的比例。如果小于0或大于等于1,则保留全部的负样本。默认为1。 |
||||
""" |
||||
|
||||
def __init__(self, |
||||
data_dir, |
||||
file_list, |
||||
label_list, |
||||
transforms=None, |
||||
num_workers='auto', |
||||
shuffle=False, |
||||
allow_empty=False, |
||||
empty_ratio=1.): |
||||
# matplotlib.use() must be called *before* pylab, matplotlib.pyplot, |
||||
# or matplotlib.backends is imported for the first time |
||||
# pycocotools import matplotlib |
||||
import matplotlib |
||||
matplotlib.use('Agg') |
||||
from pycocotools.coco import COCO |
||||
super(VOCDetection, self).__init__() |
||||
self.data_dir = data_dir |
||||
self.data_fields = None |
||||
self.transforms = copy.deepcopy(transforms) |
||||
self.num_max_boxes = 50 |
||||
|
||||
self.use_mix = False |
||||
if self.transforms is not None: |
||||
for op in self.transforms.transforms: |
||||
if isinstance(op, MixupImage): |
||||
self.mixup_op = copy.deepcopy(op) |
||||
self.use_mix = True |
||||
self.num_max_boxes *= 2 |
||||
break |
||||
|
||||
self.batch_transforms = None |
||||
self.num_workers = get_num_workers(num_workers) |
||||
self.shuffle = shuffle |
||||
self.allow_empty = allow_empty |
||||
self.empty_ratio = empty_ratio |
||||
self.file_list = list() |
||||
neg_file_list = list() |
||||
self.labels = list() |
||||
|
||||
annotations = dict() |
||||
annotations['images'] = list() |
||||
annotations['categories'] = list() |
||||
annotations['annotations'] = list() |
||||
|
||||
cname2cid = OrderedDict() |
||||
label_id = 0 |
||||
with open(label_list, 'r', encoding=get_encoding(label_list)) as f: |
||||
for line in f.readlines(): |
||||
cname2cid[line.strip()] = label_id |
||||
label_id += 1 |
||||
self.labels.append(line.strip()) |
||||
logging.info("Starting to read file list from dataset...") |
||||
for k, v in cname2cid.items(): |
||||
annotations['categories'].append({ |
||||
'supercategory': 'component', |
||||
'id': v + 1, |
||||
'name': k |
||||
}) |
||||
ct = 0 |
||||
ann_ct = 0 |
||||
with open(file_list, 'r', encoding=get_encoding(file_list)) as f: |
||||
while True: |
||||
line = f.readline() |
||||
if not line: |
||||
break |
||||
if len(line.strip().split()) > 2: |
||||
raise Exception("A space is defined as the separator, " |
||||
"but it exists in image or label name {}." |
||||
.format(line)) |
||||
img_file, xml_file = [ |
||||
osp.join(data_dir, x) for x in line.strip().split()[:2] |
||||
] |
||||
img_file = path_normalization(img_file) |
||||
xml_file = path_normalization(xml_file) |
||||
if not is_pic(img_file): |
||||
continue |
||||
if not osp.isfile(xml_file): |
||||
continue |
||||
if not osp.exists(img_file): |
||||
logging.warning('The image file {} does not exist!'.format( |
||||
img_file)) |
||||
continue |
||||
if not osp.exists(xml_file): |
||||
logging.warning('The annotation file {} does not exist!'. |
||||
format(xml_file)) |
||||
continue |
||||
tree = ET.parse(xml_file) |
||||
if tree.find('id') is None: |
||||
im_id = np.asarray([ct]) |
||||
else: |
||||
ct = int(tree.find('id').text) |
||||
im_id = np.asarray([int(tree.find('id').text)]) |
||||
pattern = re.compile('<size>', re.IGNORECASE) |
||||
size_tag = pattern.findall( |
||||
str(ET.tostringlist(tree.getroot()))) |
||||
if len(size_tag) > 0: |
||||
size_tag = size_tag[0][1:-1] |
||||
size_element = tree.find(size_tag) |
||||
pattern = re.compile('<width>', re.IGNORECASE) |
||||
width_tag = pattern.findall( |
||||
str(ET.tostringlist(size_element)))[0][1:-1] |
||||
im_w = float(size_element.find(width_tag).text) |
||||
pattern = re.compile('<height>', re.IGNORECASE) |
||||
height_tag = pattern.findall( |
||||
str(ET.tostringlist(size_element)))[0][1:-1] |
||||
im_h = float(size_element.find(height_tag).text) |
||||
else: |
||||
im_w = 0 |
||||
im_h = 0 |
||||
|
||||
pattern = re.compile('<object>', re.IGNORECASE) |
||||
obj_match = pattern.findall( |
||||
str(ET.tostringlist(tree.getroot()))) |
||||
if len(obj_match) > 0: |
||||
obj_tag = obj_match[0][1:-1] |
||||
objs = tree.findall(obj_tag) |
||||
else: |
||||
objs = list() |
||||
|
||||
num_bbox, i = len(objs), 0 |
||||
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) |
||||
gt_class = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
gt_score = np.zeros((num_bbox, 1), dtype=np.float32) |
||||
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
difficult = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
for obj in objs: |
||||
pattern = re.compile('<name>', re.IGNORECASE) |
||||
name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][ |
||||
1:-1] |
||||
cname = obj.find(name_tag).text.strip() |
||||
pattern = re.compile('<difficult>', re.IGNORECASE) |
||||
diff_tag = pattern.findall(str(ET.tostringlist(obj))) |
||||
if len(diff_tag) == 0: |
||||
_difficult = 0 |
||||
else: |
||||
diff_tag = diff_tag[0][1:-1] |
||||
try: |
||||
_difficult = int(obj.find(diff_tag).text) |
||||
except Exception: |
||||
_difficult = 0 |
||||
pattern = re.compile('<bndbox>', re.IGNORECASE) |
||||
box_tag = pattern.findall(str(ET.tostringlist(obj))) |
||||
if len(box_tag) == 0: |
||||
logging.warning( |
||||
"There's no field '<bndbox>' in one of object, " |
||||
"so this object will be ignored. xml file: {}". |
||||
format(xml_file)) |
||||
continue |
||||
box_tag = box_tag[0][1:-1] |
||||
box_element = obj.find(box_tag) |
||||
pattern = re.compile('<xmin>', re.IGNORECASE) |
||||
xmin_tag = pattern.findall( |
||||
str(ET.tostringlist(box_element)))[0][1:-1] |
||||
x1 = float(box_element.find(xmin_tag).text) |
||||
pattern = re.compile('<ymin>', re.IGNORECASE) |
||||
ymin_tag = pattern.findall( |
||||
str(ET.tostringlist(box_element)))[0][1:-1] |
||||
y1 = float(box_element.find(ymin_tag).text) |
||||
pattern = re.compile('<xmax>', re.IGNORECASE) |
||||
xmax_tag = pattern.findall( |
||||
str(ET.tostringlist(box_element)))[0][1:-1] |
||||
x2 = float(box_element.find(xmax_tag).text) |
||||
pattern = re.compile('<ymax>', re.IGNORECASE) |
||||
ymax_tag = pattern.findall( |
||||
str(ET.tostringlist(box_element)))[0][1:-1] |
||||
y2 = float(box_element.find(ymax_tag).text) |
||||
x1 = max(0, x1) |
||||
y1 = max(0, y1) |
||||
if im_w > 0.5 and im_h > 0.5: |
||||
x2 = min(im_w - 1, x2) |
||||
y2 = min(im_h - 1, y2) |
||||
|
||||
if not (x2 >= x1 and y2 >= y1): |
||||
logging.warning( |
||||
"Bounding box for object {} does not satisfy xmin {} <= xmax {} and ymin {} <= ymax {}, " |
||||
"so this object is skipped. xml file: {}".format(i, x1, x2, y1, y2, xml_file)) |
||||
continue |
||||
|
||||
gt_bbox[i, :] = [x1, y1, x2, y2] |
||||
gt_class[i, 0] = cname2cid[cname] |
||||
gt_score[i, 0] = 1. |
||||
is_crowd[i, 0] = 0 |
||||
difficult[i, 0] = _difficult |
||||
i += 1 |
||||
annotations['annotations'].append({ |
||||
'iscrowd': 0, |
||||
'image_id': int(im_id[0]), |
||||
'bbox': [x1, y1, x2 - x1, y2 - y1], |
||||
'area': float((x2 - x1) * (y2 - y1)), |
||||
'category_id': cname2cid[cname] + 1, |
||||
'id': ann_ct, |
||||
'difficult': _difficult |
||||
}) |
||||
ann_ct += 1 |
||||
|
||||
gt_bbox = gt_bbox[:i, :] |
||||
gt_class = gt_class[:i, :] |
||||
gt_score = gt_score[:i, :] |
||||
is_crowd = is_crowd[:i, :] |
||||
difficult = difficult[:i, :] |
||||
|
||||
im_info = { |
||||
'im_id': im_id, |
||||
'image_shape': np.array( |
||||
[im_h, im_w], dtype=np.int32) |
||||
} |
||||
label_info = { |
||||
'is_crowd': is_crowd, |
||||
'gt_class': gt_class, |
||||
'gt_bbox': gt_bbox, |
||||
'gt_score': gt_score, |
||||
'difficult': difficult |
||||
} |
||||
|
||||
if gt_bbox.size > 0: |
||||
self.file_list.append({ |
||||
'image': img_file, |
||||
** |
||||
im_info, |
||||
** |
||||
label_info |
||||
}) |
||||
annotations['images'].append({ |
||||
'height': im_h, |
||||
'width': im_w, |
||||
'id': int(im_id[0]), |
||||
'file_name': osp.split(img_file)[1] |
||||
}) |
||||
else: |
||||
neg_file_list.append({ |
||||
'image': img_file, |
||||
** |
||||
im_info, |
||||
** |
||||
label_info |
||||
}) |
||||
ct += 1 |
||||
|
||||
if self.use_mix: |
||||
self.num_max_boxes = max(self.num_max_boxes, 2 * len(objs)) |
||||
else: |
||||
self.num_max_boxes = max(self.num_max_boxes, len(objs)) |
||||
|
||||
if not ct: |
||||
logging.error( |
||||
"No voc record found in %s' % (file_list)", exit=True) |
||||
self.pos_num = len(self.file_list) |
||||
if self.allow_empty and neg_file_list: |
||||
self.file_list += self._sample_empty(neg_file_list) |
||||
logging.info( |
||||
"{} samples in file {}, including {} positive samples and {} negative samples.". |
||||
format( |
||||
len(self.file_list), file_list, self.pos_num, |
||||
len(self.file_list) - self.pos_num)) |
||||
self.num_samples = len(self.file_list) |
||||
self.coco_gt = COCO() |
||||
self.coco_gt.dataset = annotations |
||||
self.coco_gt.createIndex() |
||||
|
||||
self._epoch = 0 |
||||
|
||||
def __getitem__(self, idx): |
||||
sample = copy.deepcopy(self.file_list[idx]) |
||||
if self.data_fields is not None: |
||||
sample = {k: sample[k] for k in self.data_fields} |
||||
if self.use_mix and (self.mixup_op.mixup_epoch == -1 or |
||||
self._epoch < self.mixup_op.mixup_epoch): |
||||
if self.num_samples > 1: |
||||
mix_idx = random.randint(1, self.num_samples - 1) |
||||
mix_pos = (mix_idx + idx) % self.num_samples |
||||
else: |
||||
mix_pos = 0 |
||||
sample_mix = copy.deepcopy(self.file_list[mix_pos]) |
||||
if self.data_fields is not None: |
||||
sample_mix = {k: sample_mix[k] for k in self.data_fields} |
||||
sample = self.mixup_op(sample=[ |
||||
Decode(to_rgb=False)(sample), Decode(to_rgb=False)(sample_mix) |
||||
]) |
||||
sample = self.transforms(sample) |
||||
return sample |
||||
|
||||
def __len__(self): |
||||
return self.num_samples |
||||
|
||||
def set_epoch(self, epoch_id): |
||||
self._epoch = epoch_id |
||||
|
||||
def cluster_yolo_anchor(self, |
||||
num_anchors, |
||||
image_size, |
||||
cache=True, |
||||
cache_path=None, |
||||
iters=300, |
||||
gen_iters=1000, |
||||
thresh=.25): |
||||
""" |
||||
Cluster YOLO anchors. |
||||
|
||||
Reference: |
||||
https://github.com/ultralytics/yolov5/blob/master/utils/autoanchor.py |
||||
|
||||
Args: |
||||
num_anchors (int): number of clusters |
||||
image_size (list or int): [h, w], being an int means image height and image width are the same. |
||||
cache (bool): whether using cache |
||||
cache_path (str or None, optional): cache directory path. If None, use `data_dir` of dataset. |
||||
iters (int, optional): iters of kmeans algorithm |
||||
gen_iters (int, optional): iters of genetic algorithm |
||||
threshold (float, optional): anchor scale threshold |
||||
verbose (bool, optional): whether print results |
||||
""" |
||||
if cache_path is None: |
||||
cache_path = self.data_dir |
||||
cluster = YOLOAnchorCluster( |
||||
num_anchors=num_anchors, |
||||
dataset=self, |
||||
image_size=image_size, |
||||
cache=cache, |
||||
cache_path=cache_path, |
||||
iters=iters, |
||||
gen_iters=gen_iters, |
||||
thresh=thresh) |
||||
anchors = cluster() |
||||
return anchors |
||||
|
||||
def add_negative_samples(self, image_dir, empty_ratio=1): |
||||
"""将背景图片加入训练 |
||||
|
||||
Args: |
||||
image_dir (str):背景图片所在的文件夹目录。 |
||||
empty_ratio (float or None): 用于指定负样本占总样本数的比例。如果为None,保留数据集初始化是设置的`empty_ratio`值, |
||||
否则更新原有`empty_ratio`值。如果小于0或大于等于1,则保留全部的负样本。默认为1。 |
||||
|
||||
""" |
||||
import cv2 |
||||
if not osp.isdir(image_dir): |
||||
raise Exception("{} is not a valid image directory.".format( |
||||
image_dir)) |
||||
if empty_ratio is not None: |
||||
self.empty_ratio = empty_ratio |
||||
image_list = os.listdir(image_dir) |
||||
max_img_id = max( |
||||
len(self.file_list) - 1, max(self.coco_gt.getImgIds())) |
||||
neg_file_list = list() |
||||
for image in image_list: |
||||
if not is_pic(image): |
||||
continue |
||||
gt_bbox = np.zeros((0, 4), dtype=np.float32) |
||||
gt_class = np.zeros((0, 1), dtype=np.int32) |
||||
gt_score = np.zeros((0, 1), dtype=np.float32) |
||||
is_crowd = np.zeros((0, 1), dtype=np.int32) |
||||
difficult = np.zeros((0, 1), dtype=np.int32) |
||||
|
||||
max_img_id += 1 |
||||
im_fname = osp.join(image_dir, image) |
||||
img_data = cv2.imread(im_fname, cv2.IMREAD_UNCHANGED) |
||||
im_h, im_w, im_c = img_data.shape |
||||
|
||||
im_info = { |
||||
'im_id': np.asarray([max_img_id]), |
||||
'image_shape': np.array( |
||||
[im_h, im_w], dtype=np.int32) |
||||
} |
||||
label_info = { |
||||
'is_crowd': is_crowd, |
||||
'gt_class': gt_class, |
||||
'gt_bbox': gt_bbox, |
||||
'gt_score': gt_score, |
||||
'difficult': difficult |
||||
} |
||||
if 'gt_poly' in self.file_list[0]: |
||||
label_info['gt_poly'] = [] |
||||
|
||||
neg_file_list.append({ |
||||
'image': im_fname, |
||||
** |
||||
im_info, |
||||
** |
||||
label_info |
||||
}) |
||||
if neg_file_list: |
||||
self.allow_empty = True |
||||
self.file_list += self._sample_empty(neg_file_list) |
||||
logging.info( |
||||
"{} negative samples added. Dataset contains {} positive samples and {} negative samples.". |
||||
format( |
||||
len(self.file_list) - self.num_samples, self.pos_num, |
||||
len(self.file_list) - self.pos_num)) |
||||
self.num_samples = len(self.file_list) |
||||
|
||||
def _sample_empty(self, neg_file_list): |
||||
if 0. <= self.empty_ratio < 1.: |
||||
import random |
||||
total_num = len(self.file_list) |
||||
neg_num = total_num - self.pos_num |
||||
sample_num = min((total_num * self.empty_ratio - neg_num) // |
||||
(1 - self.empty_ratio), len(neg_file_list)) |
||||
return random.sample(neg_file_list, sample_num) |
||||
else: |
||||
return neg_file_list |
@ -0,0 +1,15 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import ppcd, ppcls, ppdet, ppseg |
@ -0,0 +1,16 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import (core, data, engine, modeling, model_zoo, optimizer, metrics, |
||||
utils, slim) |
@ -0,0 +1,15 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import config |
@ -0,0 +1,13 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
@ -0,0 +1,248 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import print_function |
||||
from __future__ import division |
||||
|
||||
import inspect |
||||
import importlib |
||||
import re |
||||
|
||||
try: |
||||
from docstring_parser import parse as doc_parse |
||||
except Exception: |
||||
|
||||
def doc_parse(*args): |
||||
pass |
||||
|
||||
|
||||
try: |
||||
from typeguard import check_type |
||||
except Exception: |
||||
|
||||
def check_type(*args): |
||||
pass |
||||
|
||||
|
||||
__all__ = ['SchemaValue', 'SchemaDict', 'SharedConfig', 'extract_schema'] |
||||
|
||||
|
||||
class SchemaValue(object): |
||||
def __init__(self, name, doc='', type=None): |
||||
super(SchemaValue, self).__init__() |
||||
self.name = name |
||||
self.doc = doc |
||||
self.type = type |
||||
|
||||
def set_default(self, value): |
||||
self.default = value |
||||
|
||||
def has_default(self): |
||||
return hasattr(self, 'default') |
||||
|
||||
|
||||
class SchemaDict(dict): |
||||
def __init__(self, **kwargs): |
||||
super(SchemaDict, self).__init__() |
||||
self.schema = {} |
||||
self.strict = False |
||||
self.doc = "" |
||||
self.update(kwargs) |
||||
|
||||
def __setitem__(self, key, value): |
||||
# XXX also update regular dict to SchemaDict?? |
||||
if isinstance(value, dict) and key in self and isinstance(self[key], |
||||
SchemaDict): |
||||
self[key].update(value) |
||||
else: |
||||
super(SchemaDict, self).__setitem__(key, value) |
||||
|
||||
def __missing__(self, key): |
||||
if self.has_default(key): |
||||
return self.schema[key].default |
||||
elif key in self.schema: |
||||
return self.schema[key] |
||||
else: |
||||
raise KeyError(key) |
||||
|
||||
def copy(self): |
||||
newone = SchemaDict() |
||||
newone.__dict__.update(self.__dict__) |
||||
newone.update(self) |
||||
return newone |
||||
|
||||
def set_schema(self, key, value): |
||||
assert isinstance(value, SchemaValue) |
||||
self.schema[key] = value |
||||
|
||||
def set_strict(self, strict): |
||||
self.strict = strict |
||||
|
||||
def has_default(self, key): |
||||
return key in self.schema and self.schema[key].has_default() |
||||
|
||||
def is_default(self, key): |
||||
if not self.has_default(key): |
||||
return False |
||||
if hasattr(self[key], '__dict__'): |
||||
return True |
||||
else: |
||||
return key not in self or self[key] == self.schema[key].default |
||||
|
||||
def find_default_keys(self): |
||||
return [ |
||||
k for k in list(self.keys()) + list(self.schema.keys()) |
||||
if self.is_default(k) |
||||
] |
||||
|
||||
def mandatory(self): |
||||
return any([k for k in self.schema.keys() if not self.has_default(k)]) |
||||
|
||||
def find_missing_keys(self): |
||||
missing = [ |
||||
k for k in self.schema.keys() |
||||
if k not in self and not self.has_default(k) |
||||
] |
||||
placeholders = [k for k in self if self[k] in ('<missing>', '<value>')] |
||||
return missing + placeholders |
||||
|
||||
def find_extra_keys(self): |
||||
return list(set(self.keys()) - set(self.schema.keys())) |
||||
|
||||
def find_mismatch_keys(self): |
||||
mismatch_keys = [] |
||||
for arg in self.schema.values(): |
||||
if arg.type is not None: |
||||
try: |
||||
check_type("{}.{}".format(self.name, arg.name), |
||||
self[arg.name], arg.type) |
||||
except Exception: |
||||
mismatch_keys.append(arg.name) |
||||
return mismatch_keys |
||||
|
||||
def validate(self): |
||||
missing_keys = self.find_missing_keys() |
||||
if missing_keys: |
||||
raise ValueError("Missing param for class<{}>: {}".format( |
||||
self.name, ", ".join(missing_keys))) |
||||
extra_keys = self.find_extra_keys() |
||||
if extra_keys and self.strict: |
||||
raise ValueError("Extraneous param for class<{}>: {}".format( |
||||
self.name, ", ".join(extra_keys))) |
||||
mismatch_keys = self.find_mismatch_keys() |
||||
if mismatch_keys: |
||||
raise TypeError("Wrong param type for class<{}>: {}".format( |
||||
self.name, ", ".join(mismatch_keys))) |
||||
|
||||
|
||||
class SharedConfig(object): |
||||
""" |
||||
Representation class for `__shared__` annotations, which work as follows: |
||||
|
||||
- if `key` is set for the module in config file, its value will take |
||||
precedence |
||||
- if `key` is not set for the module but present in the config file, its |
||||
value will be used |
||||
- otherwise, use the provided `default_value` as fallback |
||||
|
||||
Args: |
||||
key: config[key] will be injected |
||||
default_value: fallback value |
||||
""" |
||||
|
||||
def __init__(self, key, default_value=None): |
||||
super(SharedConfig, self).__init__() |
||||
self.key = key |
||||
self.default_value = default_value |
||||
|
||||
|
||||
def extract_schema(cls): |
||||
""" |
||||
Extract schema from a given class |
||||
|
||||
Args: |
||||
cls (type): Class from which to extract. |
||||
|
||||
Returns: |
||||
schema (SchemaDict): Extracted schema. |
||||
""" |
||||
ctor = cls.__init__ |
||||
# python 2 compatibility |
||||
if hasattr(inspect, 'getfullargspec'): |
||||
argspec = inspect.getfullargspec(ctor) |
||||
annotations = argspec.annotations |
||||
has_kwargs = argspec.varkw is not None |
||||
else: |
||||
argspec = inspect.getfullargspec(ctor) |
||||
# python 2 type hinting workaround, see pep-3107 |
||||
# however, since `typeguard` does not support python 2, type checking |
||||
# is still python 3 only for now |
||||
annotations = getattr(ctor, '__annotations__', {}) |
||||
has_kwargs = argspec.varkw is not None |
||||
|
||||
names = [arg for arg in argspec.args if arg != 'self'] |
||||
defaults = argspec.defaults |
||||
num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0 |
||||
num_required = len(names) - num_defaults |
||||
|
||||
docs = cls.__doc__ |
||||
if docs is None and getattr(cls, '__category__', None) == 'op': |
||||
docs = cls.__call__.__doc__ |
||||
try: |
||||
docstring = doc_parse(docs) |
||||
except Exception: |
||||
docstring = None |
||||
|
||||
if docstring is None: |
||||
comments = {} |
||||
else: |
||||
comments = {} |
||||
for p in docstring.params: |
||||
match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name) |
||||
if match_obj is not None: |
||||
comments[match_obj.group(1)] = p.description |
||||
|
||||
schema = SchemaDict() |
||||
schema.name = cls.__name__ |
||||
schema.doc = "" |
||||
if docs is not None: |
||||
start_pos = docs[0] == '\n' and 1 or 0 |
||||
schema.doc = docs[start_pos:].split("\n")[0].strip() |
||||
# XXX handle paddle's weird doc convention |
||||
if '**' == schema.doc[:2] and '**' == schema.doc[-2:]: |
||||
schema.doc = schema.doc[2:-2].strip() |
||||
schema.category = hasattr(cls, '__category__') and getattr( |
||||
cls, '__category__') or 'module' |
||||
schema.strict = not has_kwargs |
||||
schema.pymodule = importlib.import_module(cls.__module__) |
||||
schema.inject = getattr(cls, '__inject__', []) |
||||
schema.shared = getattr(cls, '__shared__', []) |
||||
for idx, name in enumerate(names): |
||||
comment = name in comments and comments[name] or name |
||||
if name in schema.inject: |
||||
type_ = None |
||||
else: |
||||
type_ = name in annotations and annotations[name] or None |
||||
value_schema = SchemaValue(name, comment, type_) |
||||
if name in schema.shared: |
||||
assert idx >= num_required, "shared config must have default value" |
||||
default = defaults[idx - num_required] |
||||
value_schema.set_default(SharedConfig(name, default)) |
||||
elif idx >= num_required: |
||||
default = defaults[idx - num_required] |
||||
value_schema.set_default(default) |
||||
schema.set_schema(name, value_schema) |
||||
|
||||
return schema |
@ -0,0 +1,118 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import importlib |
||||
import inspect |
||||
|
||||
import yaml |
||||
from .schema import SharedConfig |
||||
|
||||
__all__ = ['serializable', 'Callable'] |
||||
|
||||
|
||||
def represent_dictionary_order(self, dict_data): |
||||
return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items()) |
||||
|
||||
|
||||
def setup_orderdict(): |
||||
from collections import OrderedDict |
||||
yaml.add_representer(OrderedDict, represent_dictionary_order) |
||||
|
||||
|
||||
def _make_python_constructor(cls): |
||||
def python_constructor(loader, node): |
||||
if isinstance(node, yaml.SequenceNode): |
||||
args = loader.construct_sequence(node, deep=True) |
||||
return cls(*args) |
||||
else: |
||||
kwargs = loader.construct_mapping(node, deep=True) |
||||
try: |
||||
return cls(**kwargs) |
||||
except Exception as ex: |
||||
print("Error when construct {} instance from yaml config". |
||||
format(cls.__name__)) |
||||
raise ex |
||||
|
||||
return python_constructor |
||||
|
||||
|
||||
def _make_python_representer(cls): |
||||
# python 2 compatibility |
||||
if hasattr(inspect, 'getfullargspec'): |
||||
argspec = inspect.getfullargspec(cls) |
||||
else: |
||||
argspec = inspect.getfullargspec(cls.__init__) |
||||
argnames = [arg for arg in argspec.args if arg != 'self'] |
||||
|
||||
def python_representer(dumper, obj): |
||||
if argnames: |
||||
data = {name: getattr(obj, name) for name in argnames} |
||||
else: |
||||
data = obj.__dict__ |
||||
if '_id' in data: |
||||
del data['_id'] |
||||
return dumper.represent_mapping(u'!{}'.format(cls.__name__), data) |
||||
|
||||
return python_representer |
||||
|
||||
|
||||
def serializable(cls): |
||||
""" |
||||
Add loader and dumper for given class, which must be |
||||
"trivially serializable" |
||||
|
||||
Args: |
||||
cls: class to be serialized |
||||
|
||||
Returns: cls |
||||
""" |
||||
yaml.add_constructor(u'!{}'.format(cls.__name__), |
||||
_make_python_constructor(cls)) |
||||
yaml.add_representer(cls, _make_python_representer(cls)) |
||||
return cls |
||||
|
||||
|
||||
yaml.add_representer(SharedConfig, |
||||
lambda d, o: d.represent_data(o.default_value)) |
||||
|
||||
|
||||
@serializable |
||||
class Callable(object): |
||||
""" |
||||
Helper to be used in Yaml for creating arbitrary class objects |
||||
|
||||
Args: |
||||
full_type (str): the full module path to target function |
||||
""" |
||||
|
||||
def __init__(self, full_type, args=[], kwargs={}): |
||||
super(Callable, self).__init__() |
||||
self.full_type = full_type |
||||
self.args = args |
||||
self.kwargs = kwargs |
||||
|
||||
def __call__(self): |
||||
if '.' in self.full_type: |
||||
idx = self.full_type.rfind('.') |
||||
module = importlib.import_module(self.full_type[:idx]) |
||||
func_name = self.full_type[idx + 1:] |
||||
else: |
||||
try: |
||||
module = importlib.import_module('builtins') |
||||
except Exception: |
||||
module = importlib.import_module('__builtin__') |
||||
func_name = self.full_type |
||||
|
||||
func = getattr(module, func_name) |
||||
return func(*self.args, **self.kwargs) |
@ -0,0 +1,278 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import print_function |
||||
from __future__ import division |
||||
|
||||
import importlib |
||||
import os |
||||
import sys |
||||
|
||||
import yaml |
||||
import collections |
||||
|
||||
try: |
||||
collectionsAbc = collections.abc |
||||
except AttributeError: |
||||
collectionsAbc = collections |
||||
|
||||
from .config.schema import SchemaDict, SharedConfig, extract_schema |
||||
from .config.yaml_helpers import serializable |
||||
|
||||
__all__ = [ |
||||
'global_config', |
||||
'load_config', |
||||
'merge_config', |
||||
'get_registered_modules', |
||||
'create', |
||||
'register', |
||||
'serializable', |
||||
'dump_value', |
||||
] |
||||
|
||||
|
||||
def dump_value(value): |
||||
# XXX this is hackish, but collections.abc is not available in python 2 |
||||
if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)): |
||||
value = yaml.dump(value, default_flow_style=True) |
||||
value = value.replace('\n', '') |
||||
value = value.replace('...', '') |
||||
return "'{}'".format(value) |
||||
else: |
||||
# primitive types |
||||
return str(value) |
||||
|
||||
|
||||
class AttrDict(dict): |
||||
"""Single level attribute dict, NOT recursive""" |
||||
|
||||
def __init__(self, **kwargs): |
||||
super(AttrDict, self).__init__() |
||||
super(AttrDict, self).update(kwargs) |
||||
|
||||
def __getattr__(self, key): |
||||
if key in self: |
||||
return self[key] |
||||
raise AttributeError("object has no attribute '{}'".format(key)) |
||||
|
||||
|
||||
global_config = AttrDict() |
||||
|
||||
BASE_KEY = '_BASE_' |
||||
|
||||
|
||||
# parse and load _BASE_ recursively |
||||
def _load_config_with_base(file_path): |
||||
with open(file_path) as f: |
||||
file_cfg = yaml.load(f, Loader=yaml.Loader) |
||||
|
||||
# NOTE: cfgs outside have higher priority than cfgs in _BASE_ |
||||
if BASE_KEY in file_cfg: |
||||
all_base_cfg = AttrDict() |
||||
base_ymls = list(file_cfg[BASE_KEY]) |
||||
for base_yml in base_ymls: |
||||
if base_yml.startswith("~"): |
||||
base_yml = os.path.expanduser(base_yml) |
||||
if not base_yml.startswith('/'): |
||||
base_yml = os.path.join(os.path.dirname(file_path), base_yml) |
||||
|
||||
with open(base_yml) as f: |
||||
base_cfg = _load_config_with_base(base_yml) |
||||
all_base_cfg = merge_config(base_cfg, all_base_cfg) |
||||
|
||||
del file_cfg[BASE_KEY] |
||||
return merge_config(file_cfg, all_base_cfg) |
||||
|
||||
return file_cfg |
||||
|
||||
|
||||
def load_config(file_path): |
||||
""" |
||||
Load config from file. |
||||
|
||||
Args: |
||||
file_path (str): Path of the config file to be loaded. |
||||
|
||||
Returns: global config |
||||
""" |
||||
_, ext = os.path.splitext(file_path) |
||||
assert ext in ['.yml', '.yaml'], "only support yaml files for now" |
||||
|
||||
# load config from file and merge into global config |
||||
cfg = _load_config_with_base(file_path) |
||||
cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0] |
||||
merge_config(cfg) |
||||
|
||||
return global_config |
||||
|
||||
|
||||
def dict_merge(dct, merge_dct): |
||||
""" Recursive dict merge. Inspired by :meth:``dict.update()``, instead of |
||||
updating only top-level keys, dict_merge recurses down into dicts nested |
||||
to an arbitrary depth, updating keys. The ``merge_dct`` is merged into |
||||
``dct``. |
||||
|
||||
Args: |
||||
dct: dict onto which the merge is executed |
||||
merge_dct: dct merged into dct |
||||
|
||||
Returns: dct |
||||
""" |
||||
for k, v in merge_dct.items(): |
||||
if (k in dct and isinstance(dct[k], dict) and |
||||
isinstance(merge_dct[k], collectionsAbc.Mapping)): |
||||
dict_merge(dct[k], merge_dct[k]) |
||||
else: |
||||
dct[k] = merge_dct[k] |
||||
return dct |
||||
|
||||
|
||||
def merge_config(config, another_cfg=None): |
||||
""" |
||||
Merge config into global config or another_cfg. |
||||
|
||||
Args: |
||||
config (dict): Config to be merged. |
||||
|
||||
Returns: global config |
||||
""" |
||||
global global_config |
||||
dct = another_cfg or global_config |
||||
return dict_merge(dct, config) |
||||
|
||||
|
||||
def get_registered_modules(): |
||||
return { |
||||
k: v |
||||
for k, v in global_config.items() if isinstance(v, SchemaDict) |
||||
} |
||||
|
||||
|
||||
def make_partial(cls): |
||||
op_module = importlib.import_module(cls.__op__.__module__) |
||||
op = getattr(op_module, cls.__op__.__name__) |
||||
cls.__category__ = getattr(cls, '__category__', None) or 'op' |
||||
|
||||
def partial_apply(self, *args, **kwargs): |
||||
kwargs_ = self.__dict__.copy() |
||||
kwargs_.update(kwargs) |
||||
return op(*args, **kwargs_) |
||||
|
||||
if getattr(cls, '__append_doc__', True): # XXX should default to True? |
||||
if sys.version_info[0] > 2: |
||||
cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__) |
||||
cls.__init__.__doc__ = op.__doc__ |
||||
cls.__call__ = partial_apply |
||||
cls.__call__.__doc__ = op.__doc__ |
||||
else: |
||||
# XXX work around for python 2 |
||||
partial_apply.__doc__ = op.__doc__ |
||||
cls.__call__ = partial_apply |
||||
return cls |
||||
|
||||
|
||||
def register(cls): |
||||
""" |
||||
Register a given module class. |
||||
|
||||
Args: |
||||
cls (type): Module class to be registered. |
||||
|
||||
Returns: cls |
||||
""" |
||||
if cls.__name__ in global_config: |
||||
raise ValueError("Module class already registered: {}".format( |
||||
cls.__name__)) |
||||
if hasattr(cls, '__op__'): |
||||
cls = make_partial(cls) |
||||
global_config[cls.__name__] = extract_schema(cls) |
||||
return cls |
||||
|
||||
|
||||
def create(cls_or_name, **kwargs): |
||||
""" |
||||
Create an instance of given module class. |
||||
|
||||
Args: |
||||
cls_or_name (type or str): Class of which to create instance. |
||||
|
||||
Returns: instance of type `cls_or_name` |
||||
""" |
||||
assert type(cls_or_name) in [type, str |
||||
], "should be a class or name of a class" |
||||
name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__ |
||||
assert name in global_config and \ |
||||
isinstance(global_config[name], SchemaDict), \ |
||||
"the module {} is not registered".format(name) |
||||
config = global_config[name] |
||||
cls = getattr(config.pymodule, name) |
||||
cls_kwargs = {} |
||||
cls_kwargs.update(global_config[name]) |
||||
|
||||
# parse `shared` annoation of registered modules |
||||
if getattr(config, 'shared', None): |
||||
for k in config.shared: |
||||
target_key = config[k] |
||||
shared_conf = config.schema[k].default |
||||
assert isinstance(shared_conf, SharedConfig) |
||||
if target_key is not None and not isinstance(target_key, |
||||
SharedConfig): |
||||
continue # value is given for the module |
||||
elif shared_conf.key in global_config: |
||||
# `key` is present in config |
||||
cls_kwargs[k] = global_config[shared_conf.key] |
||||
else: |
||||
cls_kwargs[k] = shared_conf.default_value |
||||
|
||||
# parse `inject` annoation of registered modules |
||||
if getattr(cls, 'from_config', None): |
||||
cls_kwargs.update(cls.from_config(config, **kwargs)) |
||||
|
||||
if getattr(config, 'inject', None): |
||||
for k in config.inject: |
||||
target_key = config[k] |
||||
# optional dependency |
||||
if target_key is None: |
||||
continue |
||||
|
||||
if isinstance(target_key, dict) or hasattr(target_key, '__dict__'): |
||||
if 'name' not in target_key.keys(): |
||||
continue |
||||
inject_name = str(target_key['name']) |
||||
if inject_name not in global_config: |
||||
raise ValueError( |
||||
"Missing injection name {} and check it's name in cfg file". |
||||
format(k)) |
||||
target = global_config[inject_name] |
||||
for i, v in target_key.items(): |
||||
if i == 'name': |
||||
continue |
||||
target[i] = v |
||||
if isinstance(target, SchemaDict): |
||||
cls_kwargs[k] = create(inject_name) |
||||
elif isinstance(target_key, str): |
||||
if target_key not in global_config: |
||||
raise ValueError("Missing injection config:", target_key) |
||||
target = global_config[target_key] |
||||
if isinstance(target, SchemaDict): |
||||
cls_kwargs[k] = create(target_key) |
||||
elif hasattr(target, '__dict__'): # serialized object |
||||
cls_kwargs[k] = target |
||||
else: |
||||
raise ValueError("Unsupported injection type:", target_key) |
||||
# prevent modification of global config values of reference types |
||||
# (e.g., list, dict) from within the created module instances |
||||
#kwargs = copy.deepcopy(kwargs) |
||||
return cls(**cls_kwargs) |
@ -0,0 +1,21 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import source |
||||
from . import transform |
||||
from . import reader |
||||
|
||||
from .source import * |
||||
from .transform import * |
||||
from .reader import * |
@ -0,0 +1,13 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
@ -0,0 +1,585 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import copy |
||||
import math |
||||
import random |
||||
import numpy as np |
||||
from copy import deepcopy |
||||
from typing import List, Tuple |
||||
from collections import defaultdict |
||||
|
||||
from .chip_box_utils import nms, transform_chip_boxes2image_boxes |
||||
from .chip_box_utils import find_chips_to_cover_overlaped_boxes |
||||
from .chip_box_utils import transform_chip_box |
||||
from .chip_box_utils import intersection_over_box |
||||
|
||||
|
||||
class AnnoCropper(object): |
||||
def __init__(self, |
||||
image_target_sizes: List[int], |
||||
valid_box_ratio_ranges: List[List[float]], |
||||
chip_target_size: int, |
||||
chip_target_stride: int, |
||||
use_neg_chip: bool=False, |
||||
max_neg_num_per_im: int=8, |
||||
max_per_img: int=-1, |
||||
nms_thresh: int=0.5): |
||||
""" |
||||
Generate chips by chip_target_size and chip_target_stride. |
||||
These two parameters just like kernel_size and stride in cnn. |
||||
|
||||
Each image has its raw size. After resizing, then get its target size. |
||||
The resizing scale = target_size / raw_size. |
||||
So are chips of the image. |
||||
box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size |
||||
The 'size' above mentioned is the size of long-side of image, box or chip. |
||||
|
||||
:param image_target_sizes: [2000, 1000] |
||||
:param valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]] |
||||
:param chip_target_size: 500 |
||||
:param chip_target_stride: 200 |
||||
""" |
||||
self.target_sizes = image_target_sizes |
||||
self.valid_box_ratio_ranges = valid_box_ratio_ranges |
||||
assert len(self.target_sizes) == len(self.valid_box_ratio_ranges) |
||||
self.scale_num = len(self.target_sizes) |
||||
self.chip_target_size = chip_target_size # is target size |
||||
self.chip_target_stride = chip_target_stride # is target stride |
||||
self.use_neg_chip = use_neg_chip |
||||
self.max_neg_num_per_im = max_neg_num_per_im |
||||
self.max_per_img = max_per_img |
||||
self.nms_thresh = nms_thresh |
||||
|
||||
def crop_anno_records(self, records: List[dict]): |
||||
""" |
||||
The main logic: |
||||
# foreach record(image): |
||||
# foreach scale: |
||||
# 1 generate chips by chip size and stride for each scale |
||||
# 2 get pos chips |
||||
# - validate boxes: current scale; h,w >= 1 |
||||
# - find pos chips greedily by valid gt boxes in each scale |
||||
# - for every valid gt box, find its corresponding pos chips in each scale |
||||
# 3 get neg chips |
||||
# - If given proposals, find neg boxes in them which are not in pos chips |
||||
# - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2. |
||||
# 4 sample neg chips if too much each image |
||||
# transform this image-scale annotations to chips(pos chips&neg chips) annotations |
||||
|
||||
:param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1 |
||||
model and maybe have neg boxes in them. |
||||
:return: new_records, list of dict like |
||||
{ |
||||
'im_file': 'fake_image1.jpg', |
||||
'im_id': np.array([1]), # new _global_chip_id as im_id |
||||
'h': h, # chip height |
||||
'w': w, # chip width |
||||
'is_crowd': is_crowd, # Nx1 -> Mx1 |
||||
'gt_class': gt_class, # Nx1 -> Mx1 |
||||
'gt_bbox': gt_bbox, # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2] |
||||
'gt_poly': gt_poly, # [None]xN -> [None]xM |
||||
'chip': [x1, y1, x2, y2] # added |
||||
} |
||||
|
||||
Attention: |
||||
------------------------------>x |
||||
| |
||||
| (x1,y1)------ |
||||
| | | |
||||
| | | |
||||
| | | |
||||
| | | |
||||
| | | |
||||
| ---------- |
||||
| (x2,y2) |
||||
| |
||||
↓ |
||||
y |
||||
|
||||
If we use [x1, y1, x2, y2] to represent boxes or chips, |
||||
(x1,y1) is the left-top point which is in the box, |
||||
but (x2,y2) is the right-bottom point which is not in the box. |
||||
So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h]. |
||||
And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area. |
||||
""" |
||||
|
||||
self.chip_records = [] |
||||
self._global_chip_id = 1 |
||||
for r in records: |
||||
self._cur_im_pos_chips = [ |
||||
] # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int] |
||||
self._cur_im_neg_chips = [] # element: (chip, neg_box_num) |
||||
for scale_i in range(self.scale_num): |
||||
self._get_current_scale_parameters(scale_i, r) |
||||
|
||||
# Cx4 |
||||
chips = self._create_chips(r['h'], r['w'], self._cur_scale) |
||||
|
||||
# # dict: chipid->[box_id, ...] |
||||
pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips( |
||||
r['gt_bbox'], chips) |
||||
|
||||
# dict: chipid->neg_box_num |
||||
neg_chip2box_num = self._get_neg_boxes_and_chips( |
||||
chips, |
||||
list(pos_chip2boxes_idx.keys()), r.get('proposals', None)) |
||||
|
||||
self._add_to_cur_im_chips(chips, pos_chip2boxes_idx, |
||||
neg_chip2box_num) |
||||
|
||||
cur_image_records = self._trans_all_chips2annotations(r) |
||||
self.chip_records.extend(cur_image_records) |
||||
return self.chip_records |
||||
|
||||
def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx, |
||||
neg_chip2box_num): |
||||
for pos_chipid, boxes_idx in pos_chip2boxes_idx.items(): |
||||
chip = np.array(chips[pos_chipid]) # copy chips slice |
||||
self._cur_im_pos_chips.append((chip, boxes_idx)) |
||||
|
||||
if neg_chip2box_num is None: |
||||
return |
||||
|
||||
for neg_chipid, neg_box_num in neg_chip2box_num.items(): |
||||
chip = np.array(chips[neg_chipid]) |
||||
self._cur_im_neg_chips.append((chip, neg_box_num)) |
||||
|
||||
def _trans_all_chips2annotations(self, r): |
||||
gt_bbox = r['gt_bbox'] |
||||
im_file = r['im_file'] |
||||
is_crowd = r['is_crowd'] |
||||
gt_class = r['gt_class'] |
||||
# gt_poly = r['gt_poly'] # [None]xN |
||||
# remaining keys: im_id, h, w |
||||
chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox, |
||||
is_crowd, gt_class) |
||||
|
||||
if not self.use_neg_chip: |
||||
return chip_records |
||||
|
||||
sampled_neg_chips = self._sample_neg_chips() |
||||
neg_chip_records = self._trans_neg_chips2annotations(im_file, |
||||
sampled_neg_chips) |
||||
chip_records.extend(neg_chip_records) |
||||
return chip_records |
||||
|
||||
def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd, |
||||
gt_class): |
||||
chip_records = [] |
||||
for chip, boxes_idx in self._cur_im_pos_chips: |
||||
chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx, |
||||
chip) |
||||
x1, y1, x2, y2 = chip |
||||
chip_h = y2 - y1 |
||||
chip_w = x2 - x1 |
||||
rec = { |
||||
'im_file': im_file, |
||||
'im_id': np.array([self._global_chip_id]), |
||||
'h': chip_h, |
||||
'w': chip_w, |
||||
'gt_bbox': chip_bbox, |
||||
'is_crowd': is_crowd[final_boxes_idx].copy(), |
||||
'gt_class': gt_class[final_boxes_idx].copy(), |
||||
# 'gt_poly': [None] * len(final_boxes_idx), |
||||
'chip': chip |
||||
} |
||||
self._global_chip_id += 1 |
||||
chip_records.append(rec) |
||||
return chip_records |
||||
|
||||
def _sample_neg_chips(self): |
||||
pos_num = len(self._cur_im_pos_chips) |
||||
neg_num = len(self._cur_im_neg_chips) |
||||
sample_num = min(pos_num + 2, self.max_neg_num_per_im) |
||||
assert sample_num >= 1 |
||||
if neg_num <= sample_num: |
||||
return self._cur_im_neg_chips |
||||
|
||||
candidate_num = int(sample_num * 1.5) |
||||
candidate_neg_chips = sorted( |
||||
self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num] |
||||
random.shuffle(candidate_neg_chips) |
||||
sampled_neg_chips = candidate_neg_chips[:sample_num] |
||||
return sampled_neg_chips |
||||
|
||||
def _trans_neg_chips2annotations(self, |
||||
im_file: str, |
||||
sampled_neg_chips: List[Tuple]): |
||||
chip_records = [] |
||||
for chip, neg_box_num in sampled_neg_chips: |
||||
x1, y1, x2, y2 = chip |
||||
chip_h = y2 - y1 |
||||
chip_w = x2 - x1 |
||||
rec = { |
||||
'im_file': im_file, |
||||
'im_id': np.array([self._global_chip_id]), |
||||
'h': chip_h, |
||||
'w': chip_w, |
||||
'gt_bbox': np.zeros( |
||||
(0, 4), dtype=np.float32), |
||||
'is_crowd': np.zeros( |
||||
(0, 1), dtype=np.int32), |
||||
'gt_class': np.zeros( |
||||
(0, 1), dtype=np.int32), |
||||
# 'gt_poly': [], |
||||
'chip': chip |
||||
} |
||||
self._global_chip_id += 1 |
||||
chip_records.append(rec) |
||||
return chip_records |
||||
|
||||
def _get_current_scale_parameters(self, scale_i, r): |
||||
im_size = max(r['h'], r['w']) |
||||
im_target_size = self.target_sizes[scale_i] |
||||
self._cur_im_size, self._cur_im_target_size = im_size, im_target_size |
||||
self._cur_scale = self._get_current_scale(im_target_size, im_size) |
||||
self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i] |
||||
|
||||
def _get_current_scale(self, im_target_size, im_size): |
||||
return im_target_size / im_size |
||||
|
||||
def _create_chips(self, h: int, w: int, scale: float): |
||||
""" |
||||
Generate chips by chip_target_size and chip_target_stride. |
||||
These two parameters just like kernel_size and stride in cnn. |
||||
:return: chips, Cx4, xy in raw size dimension |
||||
""" |
||||
chip_size = self.chip_target_size # omit target for simplicity |
||||
stride = self.chip_target_stride |
||||
width = int(scale * w) |
||||
height = int(scale * h) |
||||
min_chip_location_diff = 20 # in target size |
||||
|
||||
assert chip_size >= stride |
||||
chip_overlap = chip_size - stride |
||||
if (width - chip_overlap |
||||
) % stride > min_chip_location_diff: # 不能被stride整除的部分比较大,则保留 |
||||
w_steps = max(1, int(math.ceil((width - chip_overlap) / stride))) |
||||
else: # 不能被stride整除的部分比较小,则丢弃 |
||||
w_steps = max(1, int(math.floor((width - chip_overlap) / stride))) |
||||
if (height - chip_overlap) % stride > min_chip_location_diff: |
||||
h_steps = max(1, int(math.ceil((height - chip_overlap) / stride))) |
||||
else: |
||||
h_steps = max(1, int(math.floor((height - chip_overlap) / stride))) |
||||
|
||||
chips = list() |
||||
for j in range(h_steps): |
||||
for i in range(w_steps): |
||||
x1 = i * stride |
||||
y1 = j * stride |
||||
x2 = min(x1 + chip_size, width) |
||||
y2 = min(y1 + chip_size, height) |
||||
chips.append([x1, y1, x2, y2]) |
||||
|
||||
# check chip size |
||||
for item in chips: |
||||
if item[2] - item[0] > chip_size * 1.1 or item[3] - item[ |
||||
1] > chip_size * 1.1: |
||||
raise ValueError(item) |
||||
chips = np.array(chips, dtype=np.float) |
||||
|
||||
raw_size_chips = chips / scale |
||||
return raw_size_chips |
||||
|
||||
def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips): |
||||
valid_ratio_range = self._cur_valid_ratio_range |
||||
im_size = self._cur_im_size |
||||
scale = self._cur_scale |
||||
# Nx4 N |
||||
valid_boxes, valid_boxes_idx = self._validate_boxes( |
||||
valid_ratio_range, im_size, gt_bbox, scale) |
||||
# dict: chipid->[box_id, ...] |
||||
pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes, |
||||
valid_boxes_idx) |
||||
return pos_chip2boxes_idx |
||||
|
||||
def _validate_boxes(self, |
||||
valid_ratio_range: List[float], |
||||
im_size: int, |
||||
gt_boxes: 'np.array of Nx4', |
||||
scale: float): |
||||
""" |
||||
:return: valid_boxes: Nx4, valid_boxes_idx: N |
||||
""" |
||||
ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32) |
||||
hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32) |
||||
maxs = np.maximum(ws, hs) |
||||
box_ratio = maxs / im_size |
||||
mins = np.minimum(ws, hs) |
||||
target_mins = mins * scale |
||||
|
||||
low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0 |
||||
high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo( |
||||
np.float).max |
||||
|
||||
valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & |
||||
(target_mins >= 2))[0] |
||||
valid_boxes = gt_boxes[valid_boxes_idx] |
||||
return valid_boxes, valid_boxes_idx |
||||
|
||||
def _find_pos_chips(self, |
||||
chips: 'Cx4', |
||||
valid_boxes: 'Bx4', |
||||
valid_boxes_idx: 'B'): |
||||
""" |
||||
:return: pos_chip2boxes_idx, dict: chipid->[box_id, ...] |
||||
""" |
||||
iob = intersection_over_box(chips, valid_boxes) # overlap, CxB |
||||
|
||||
iob_threshold_to_find_chips = 1. |
||||
pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes( |
||||
iob, iob_threshold_to_find_chips) |
||||
pos_chip_ids = set(pos_chip_ids) |
||||
|
||||
iob_threshold_to_assign_box = 0.5 |
||||
pos_chip2boxes_idx = self._assign_boxes_to_pos_chips( |
||||
iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx) |
||||
return pos_chip2boxes_idx |
||||
|
||||
def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold): |
||||
return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold) |
||||
|
||||
def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids, |
||||
valid_boxes_idx): |
||||
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold) |
||||
pos_chip2boxes_idx = defaultdict(list) |
||||
for chip_id, box_id in zip(chip_ids, box_ids): |
||||
if chip_id not in pos_chip_ids: |
||||
continue |
||||
raw_gt_box_idx = valid_boxes_idx[box_id] |
||||
pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx) |
||||
return pos_chip2boxes_idx |
||||
|
||||
def _get_neg_boxes_and_chips(self, |
||||
chips: 'Cx4', |
||||
pos_chip_ids: 'D', |
||||
proposals: 'Px4'): |
||||
""" |
||||
:param chips: |
||||
:param pos_chip_ids: |
||||
:param proposals: |
||||
:return: neg_chip2box_num, None or dict: chipid->neg_box_num |
||||
""" |
||||
if not self.use_neg_chip: |
||||
return None |
||||
|
||||
# train proposals maybe None |
||||
if proposals is None or len(proposals) < 1: |
||||
return None |
||||
|
||||
valid_ratio_range = self._cur_valid_ratio_range |
||||
im_size = self._cur_im_size |
||||
scale = self._cur_scale |
||||
|
||||
valid_props, _ = self._validate_boxes(valid_ratio_range, im_size, |
||||
proposals, scale) |
||||
neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props) |
||||
neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes) |
||||
return neg_chip2box_num |
||||
|
||||
def _find_neg_boxes(self, |
||||
chips: 'Cx4', |
||||
pos_chip_ids: 'D', |
||||
valid_props: 'Px4'): |
||||
""" |
||||
:return: neg_boxes: Nx4 |
||||
""" |
||||
if len(pos_chip_ids) == 0: |
||||
return valid_props |
||||
|
||||
pos_chips = chips[pos_chip_ids] |
||||
iob = intersection_over_box(pos_chips, valid_props) |
||||
overlap_per_prop = np.max(iob, axis=0) |
||||
non_overlap_props_idx = overlap_per_prop < 0.5 |
||||
neg_boxes = valid_props[non_overlap_props_idx] |
||||
return neg_boxes |
||||
|
||||
def _find_neg_chips(self, |
||||
chips: 'Cx4', |
||||
pos_chip_ids: 'D', |
||||
neg_boxes: 'Nx4'): |
||||
""" |
||||
:return: neg_chip2box_num, dict: chipid->neg_box_num |
||||
""" |
||||
neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids) |
||||
neg_chips = chips[neg_chip_ids] |
||||
|
||||
iob = intersection_over_box(neg_chips, neg_boxes) |
||||
iob_threshold_to_find_chips = 0.7 |
||||
chosen_neg_chip_ids, chip_id2overlap_box_num = \ |
||||
self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips) |
||||
|
||||
neg_chipid2box_num = {} |
||||
for cid in chosen_neg_chip_ids: |
||||
box_num = chip_id2overlap_box_num[cid] |
||||
raw_chip_id = neg_chip_ids[cid] |
||||
neg_chipid2box_num[raw_chip_id] = box_num |
||||
return neg_chipid2box_num |
||||
|
||||
def crop_infer_anno_records(self, records: List[dict]): |
||||
""" |
||||
transform image record to chips record |
||||
:param records: |
||||
:return: new_records, list of dict like |
||||
{ |
||||
'im_file': 'fake_image1.jpg', |
||||
'im_id': np.array([1]), # new _global_chip_id as im_id |
||||
'h': h, # chip height |
||||
'w': w, # chip width |
||||
'chip': [x1, y1, x2, y2] # added |
||||
'ori_im_h': ori_im_h # added, origin image height |
||||
'ori_im_w': ori_im_w # added, origin image width |
||||
'scale_i': 0 # added, |
||||
} |
||||
""" |
||||
self.chip_records = [] |
||||
self._global_chip_id = 1 # im_id start from 1 |
||||
self._global_chip_id2img_id = {} |
||||
|
||||
for r in records: |
||||
for scale_i in range(self.scale_num): |
||||
self._get_current_scale_parameters(scale_i, r) |
||||
# Cx4 |
||||
chips = self._create_chips(r['h'], r['w'], self._cur_scale) |
||||
cur_img_chip_record = self._get_chips_records(r, chips, |
||||
scale_i) |
||||
self.chip_records.extend(cur_img_chip_record) |
||||
|
||||
return self.chip_records |
||||
|
||||
def _get_chips_records(self, rec, chips, scale_i): |
||||
cur_img_chip_records = [] |
||||
ori_im_h = rec["h"] |
||||
ori_im_w = rec["w"] |
||||
im_file = rec["im_file"] |
||||
ori_im_id = rec["im_id"] |
||||
for id, chip in enumerate(chips): |
||||
chip_rec = {} |
||||
x1, y1, x2, y2 = chip |
||||
chip_h = y2 - y1 |
||||
chip_w = x2 - x1 |
||||
chip_rec["im_file"] = im_file |
||||
chip_rec["im_id"] = self._global_chip_id |
||||
chip_rec["h"] = chip_h |
||||
chip_rec["w"] = chip_w |
||||
chip_rec["chip"] = chip |
||||
chip_rec["ori_im_h"] = ori_im_h |
||||
chip_rec["ori_im_w"] = ori_im_w |
||||
chip_rec["scale_i"] = scale_i |
||||
|
||||
self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id) |
||||
self._global_chip_id += 1 |
||||
cur_img_chip_records.append(chip_rec) |
||||
|
||||
return cur_img_chip_records |
||||
|
||||
def aggregate_chips_detections(self, results, records=None): |
||||
""" |
||||
# 1. transform chip dets to image dets |
||||
# 2. nms boxes per image; |
||||
# 3. format output results |
||||
:param results: |
||||
:param roidb: |
||||
:return: |
||||
""" |
||||
results = deepcopy(results) |
||||
records = records if records else self.chip_records |
||||
img_id2bbox = self._transform_chip2image_bboxes(results, records) |
||||
nms_img_id2bbox = self._nms_dets(img_id2bbox) |
||||
aggregate_results = self._reformat_results(nms_img_id2bbox) |
||||
return aggregate_results |
||||
|
||||
def _transform_chip2image_bboxes(self, results, records): |
||||
# 1. Transform chip dets to image dets; |
||||
# 2. Filter valid range; |
||||
# 3. Reformat and Aggregate chip dets to Get scale_cls_dets |
||||
img_id2bbox = defaultdict(list) |
||||
for result in results: |
||||
bbox_locs = result['bbox'] |
||||
bbox_nums = result['bbox_num'] |
||||
if len(bbox_locs) == 1 and bbox_locs[0][ |
||||
0] == -1: # current batch has no detections |
||||
# bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]] |
||||
# MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1. |
||||
continue |
||||
im_ids = result['im_id'] # replace with range(len(bbox_nums)) |
||||
|
||||
last_bbox_num = 0 |
||||
for idx, im_id in enumerate(im_ids): |
||||
|
||||
cur_bbox_len = bbox_nums[idx] |
||||
bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len] |
||||
last_bbox_num += cur_bbox_len |
||||
# box: [num_id, score, xmin, ymin, xmax, ymax] |
||||
if len(bboxes) == 0: # current image has no detections |
||||
continue |
||||
|
||||
chip_rec = records[int(im_id) - |
||||
1] # im_id starts from 1, type is np.int64 |
||||
image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"]) |
||||
|
||||
bboxes = transform_chip_boxes2image_boxes( |
||||
bboxes, chip_rec["chip"], chip_rec["ori_im_h"], |
||||
chip_rec["ori_im_w"]) |
||||
|
||||
scale_i = chip_rec["scale_i"] |
||||
cur_scale = self._get_current_scale(self.target_sizes[scale_i], |
||||
image_size) |
||||
_, valid_boxes_idx = self._validate_boxes( |
||||
self.valid_box_ratio_ranges[scale_i], image_size, |
||||
bboxes[:, 2:], cur_scale) |
||||
ori_img_id = self._global_chip_id2img_id[int(im_id)] |
||||
|
||||
img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx]) |
||||
|
||||
return img_id2bbox |
||||
|
||||
def _nms_dets(self, img_id2bbox): |
||||
# 1. NMS on each image-class |
||||
# 2. Limit number of detections to MAX_PER_IMAGE if requested |
||||
max_per_img = self.max_per_img |
||||
nms_thresh = self.nms_thresh |
||||
|
||||
for img_id in img_id2bbox: |
||||
box = img_id2bbox[ |
||||
img_id] # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2] |
||||
box = np.concatenate(box, axis=0) |
||||
nms_dets = nms(box, nms_thresh) |
||||
if max_per_img > 0: |
||||
if len(nms_dets) > max_per_img: |
||||
keep = np.argsort(-nms_dets[:, 1])[:max_per_img] |
||||
nms_dets = nms_dets[keep] |
||||
|
||||
img_id2bbox[img_id] = nms_dets |
||||
|
||||
return img_id2bbox |
||||
|
||||
def _reformat_results(self, img_id2bbox): |
||||
"""reformat results""" |
||||
im_ids = img_id2bbox.keys() |
||||
results = [] |
||||
for img_id in im_ids: # output by original im_id order |
||||
if len(img_id2bbox[img_id]) == 0: |
||||
bbox = np.array( |
||||
[[-1., 0., 0., 0., 0., 0.]]) # edge case: no detections |
||||
bbox_num = np.array([0]) |
||||
else: |
||||
# np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2] |
||||
bbox = img_id2bbox[img_id] |
||||
bbox_num = np.array([len(bbox)]) |
||||
res = dict( |
||||
im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num) |
||||
results.append(res) |
||||
return results |
@ -0,0 +1,170 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
def bbox_area(boxes): |
||||
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) |
||||
|
||||
|
||||
def intersection_over_box(chips, boxes): |
||||
""" |
||||
intersection area over box area |
||||
:param chips: C |
||||
:param boxes: B |
||||
:return: iob, CxB |
||||
""" |
||||
M = chips.shape[0] |
||||
N = boxes.shape[0] |
||||
if M * N == 0: |
||||
return np.zeros([M, N], dtype='float32') |
||||
|
||||
box_area = bbox_area(boxes) # B |
||||
|
||||
inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:], |
||||
boxes[:, 2:]) # CxBX2 |
||||
inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2], |
||||
boxes[:, :2]) # CxBx2 |
||||
inter_wh = inter_x2y2 - inter_x1y1 |
||||
inter_wh = np.clip(inter_wh, a_min=0, a_max=None) |
||||
inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1] # CxB |
||||
|
||||
iob = inter_area / np.expand_dims(box_area, 0) |
||||
return iob |
||||
|
||||
|
||||
def clip_boxes(boxes, im_shape): |
||||
""" |
||||
Clip boxes to image boundaries. |
||||
:param boxes: [N, 4] |
||||
:param im_shape: tuple of 2, [h, w] |
||||
:return: [N, 4] |
||||
""" |
||||
# x1 >= 0 |
||||
boxes[:, 0] = np.clip(boxes[:, 0], 0, im_shape[1] - 1) |
||||
# y1 >= 0 |
||||
boxes[:, 1] = np.clip(boxes[:, 1], 0, im_shape[0] - 1) |
||||
# x2 < im_shape[1] |
||||
boxes[:, 2] = np.clip(boxes[:, 2], 1, im_shape[1]) |
||||
# y2 < im_shape[0] |
||||
boxes[:, 3] = np.clip(boxes[:, 3], 1, im_shape[0]) |
||||
return boxes |
||||
|
||||
|
||||
def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'): |
||||
boxes_idx = np.array(boxes_idx) |
||||
cur_gt_bbox = gt_bbox[boxes_idx].copy() # Bx4 |
||||
x1, y1, x2, y2 = chip |
||||
cur_gt_bbox[:, 0] -= x1 |
||||
cur_gt_bbox[:, 1] -= y1 |
||||
cur_gt_bbox[:, 2] -= x1 |
||||
cur_gt_bbox[:, 3] -= y1 |
||||
h = y2 - y1 |
||||
w = x2 - x1 |
||||
cur_gt_bbox = clip_boxes(cur_gt_bbox, (h, w)) |
||||
ws = (cur_gt_bbox[:, 2] - cur_gt_bbox[:, 0]).astype(np.int32) |
||||
hs = (cur_gt_bbox[:, 3] - cur_gt_bbox[:, 1]).astype(np.int32) |
||||
valid_idx = (ws >= 2) & (hs >= 2) |
||||
return cur_gt_bbox[valid_idx], boxes_idx[valid_idx] |
||||
|
||||
|
||||
def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold): |
||||
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold) |
||||
chip_id2overlap_box_num = np.bincount(chip_ids) # 1d array |
||||
chip_id2overlap_box_num = np.pad( |
||||
chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)), |
||||
constant_values=0) |
||||
|
||||
chosen_chip_ids = [] |
||||
while len(box_ids) > 0: |
||||
value_counts = np.bincount(chip_ids) # 1d array |
||||
max_count_chip_id = np.argmax(value_counts) |
||||
assert max_count_chip_id not in chosen_chip_ids |
||||
chosen_chip_ids.append(max_count_chip_id) |
||||
|
||||
box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id] |
||||
ids_not_in_cur_boxes_mask = np.logical_not( |
||||
np.isin(box_ids, box_ids_in_cur_chip)) |
||||
chip_ids = chip_ids[ids_not_in_cur_boxes_mask] |
||||
box_ids = box_ids[ids_not_in_cur_boxes_mask] |
||||
return chosen_chip_ids, chip_id2overlap_box_num |
||||
|
||||
|
||||
def transform_chip_boxes2image_boxes(chip_boxes, chip, img_h, img_w): |
||||
chip_boxes = np.array(sorted(chip_boxes, key=lambda item: -item[1])) |
||||
xmin, ymin, _, _ = chip |
||||
# Transform to origin image loc |
||||
chip_boxes[:, 2] += xmin |
||||
chip_boxes[:, 4] += xmin |
||||
chip_boxes[:, 3] += ymin |
||||
chip_boxes[:, 5] += ymin |
||||
chip_boxes = clip_boxes(chip_boxes, (img_h, img_w)) |
||||
return chip_boxes |
||||
|
||||
|
||||
def nms(dets, thresh): |
||||
"""Apply classic DPM-style greedy NMS.""" |
||||
if dets.shape[0] == 0: |
||||
return dets[[], :] |
||||
scores = dets[:, 1] |
||||
x1 = dets[:, 2] |
||||
y1 = dets[:, 3] |
||||
x2 = dets[:, 4] |
||||
y2 = dets[:, 5] |
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1) |
||||
order = scores.argsort()[::-1] |
||||
|
||||
ndets = dets.shape[0] |
||||
suppressed = np.zeros((ndets), dtype=np.int) |
||||
|
||||
# nominal indices |
||||
# _i, _j |
||||
# sorted indices |
||||
# i, j |
||||
# temp variables for box i's (the box currently under consideration) |
||||
# ix1, iy1, ix2, iy2, iarea |
||||
|
||||
# variables for computing overlap with box j (lower scoring box) |
||||
# xx1, yy1, xx2, yy2 |
||||
# w, h |
||||
# inter, ovr |
||||
|
||||
for _i in range(ndets): |
||||
i = order[_i] |
||||
if suppressed[i] == 1: |
||||
continue |
||||
ix1 = x1[i] |
||||
iy1 = y1[i] |
||||
ix2 = x2[i] |
||||
iy2 = y2[i] |
||||
iarea = areas[i] |
||||
for _j in range(_i + 1, ndets): |
||||
j = order[_j] |
||||
if suppressed[j] == 1: |
||||
continue |
||||
xx1 = max(ix1, x1[j]) |
||||
yy1 = max(iy1, y1[j]) |
||||
xx2 = min(ix2, x2[j]) |
||||
yy2 = min(iy2, y2[j]) |
||||
w = max(0.0, xx2 - xx1 + 1) |
||||
h = max(0.0, yy2 - yy1 + 1) |
||||
inter = w * h |
||||
ovr = inter / (iarea + areas[j] - inter) |
||||
if ovr >= thresh: |
||||
suppressed[j] = 1 |
||||
keep = np.where(suppressed == 0)[0] |
||||
dets = dets[keep, :] |
||||
return dets |
@ -0,0 +1,302 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import traceback |
||||
import six |
||||
import sys |
||||
if sys.version_info >= (3, 0): |
||||
pass |
||||
else: |
||||
pass |
||||
import numpy as np |
||||
|
||||
from paddle.io import DataLoader, DistributedBatchSampler |
||||
from paddle.fluid.dataloader.collate import default_collate_fn |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from . import transform |
||||
from .shm_utils import _get_shared_memory_size_in_M |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger('reader') |
||||
|
||||
MAIN_PID = os.getpid() |
||||
|
||||
|
||||
class Compose(object): |
||||
def __init__(self, transforms, num_classes=80): |
||||
self.transforms = transforms |
||||
self.transforms_cls = [] |
||||
for t in self.transforms: |
||||
for k, v in t.items(): |
||||
op_cls = getattr(transform, k) |
||||
f = op_cls(**v) |
||||
if hasattr(f, 'num_classes'): |
||||
f.num_classes = num_classes |
||||
|
||||
self.transforms_cls.append(f) |
||||
|
||||
def __call__(self, data): |
||||
for f in self.transforms_cls: |
||||
try: |
||||
data = f(data) |
||||
except Exception as e: |
||||
stack_info = traceback.format_exc() |
||||
logger.warning("fail to map sample transform [{}] " |
||||
"with error: {} and stack:\n{}".format( |
||||
f, e, str(stack_info))) |
||||
raise e |
||||
|
||||
return data |
||||
|
||||
|
||||
class BatchCompose(Compose): |
||||
def __init__(self, transforms, num_classes=80, collate_batch=True): |
||||
super(BatchCompose, self).__init__(transforms, num_classes) |
||||
self.collate_batch = collate_batch |
||||
|
||||
def __call__(self, data): |
||||
for f in self.transforms_cls: |
||||
try: |
||||
data = f(data) |
||||
except Exception as e: |
||||
stack_info = traceback.format_exc() |
||||
logger.warning("fail to map batch transform [{}] " |
||||
"with error: {} and stack:\n{}".format( |
||||
f, e, str(stack_info))) |
||||
raise e |
||||
|
||||
# remove keys which is not needed by model |
||||
extra_key = ['h', 'w', 'flipped'] |
||||
for k in extra_key: |
||||
for sample in data: |
||||
if k in sample: |
||||
sample.pop(k) |
||||
|
||||
# batch data, if user-define batch function needed |
||||
# use user-defined here |
||||
if self.collate_batch: |
||||
batch_data = default_collate_fn(data) |
||||
else: |
||||
batch_data = {} |
||||
for k in data[0].keys(): |
||||
tmp_data = [] |
||||
for i in range(len(data)): |
||||
tmp_data.append(data[i][k]) |
||||
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k: |
||||
tmp_data = np.stack(tmp_data, axis=0) |
||||
batch_data[k] = tmp_data |
||||
return batch_data |
||||
|
||||
|
||||
class BaseDataLoader(object): |
||||
""" |
||||
Base DataLoader implementation for detection models |
||||
|
||||
Args: |
||||
sample_transforms (list): a list of transforms to perform |
||||
on each sample |
||||
batch_transforms (list): a list of transforms to perform |
||||
on batch |
||||
batch_size (int): batch size for batch collating, default 1. |
||||
shuffle (bool): whether to shuffle samples |
||||
drop_last (bool): whether to drop the last incomplete, |
||||
default False |
||||
num_classes (int): class number of dataset, default 80 |
||||
collate_batch (bool): whether to collate batch in dataloader. |
||||
If set to True, the samples will collate into batch according |
||||
to the batch size. Otherwise, the ground-truth will not collate, |
||||
which is used when the number of ground-truch is different in |
||||
samples. |
||||
use_shared_memory (bool): whether to use shared memory to |
||||
accelerate data loading, enable this only if you |
||||
are sure that the shared memory size of your OS |
||||
is larger than memory cost of input datas of model. |
||||
Note that shared memory will be automatically |
||||
disabled if the shared memory of OS is less than |
||||
1G, which is not enough for detection models. |
||||
Default False. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
sample_transforms=[], |
||||
batch_transforms=[], |
||||
batch_size=1, |
||||
shuffle=False, |
||||
drop_last=False, |
||||
num_classes=80, |
||||
collate_batch=True, |
||||
use_shared_memory=False, |
||||
**kwargs): |
||||
# sample transform |
||||
self._sample_transforms = Compose( |
||||
sample_transforms, num_classes=num_classes) |
||||
|
||||
# batch transfrom |
||||
self._batch_transforms = BatchCompose(batch_transforms, num_classes, |
||||
collate_batch) |
||||
self.batch_size = batch_size |
||||
self.shuffle = shuffle |
||||
self.drop_last = drop_last |
||||
self.use_shared_memory = use_shared_memory |
||||
self.kwargs = kwargs |
||||
|
||||
def __call__(self, |
||||
dataset, |
||||
worker_num, |
||||
batch_sampler=None, |
||||
return_list=False): |
||||
self.dataset = dataset |
||||
self.dataset.check_or_download_dataset() |
||||
self.dataset.parse_dataset() |
||||
# get data |
||||
self.dataset.set_transform(self._sample_transforms) |
||||
# set kwargs |
||||
self.dataset.set_kwargs(**self.kwargs) |
||||
# batch sampler |
||||
if batch_sampler is None: |
||||
self._batch_sampler = DistributedBatchSampler( |
||||
self.dataset, |
||||
batch_size=self.batch_size, |
||||
shuffle=self.shuffle, |
||||
drop_last=self.drop_last) |
||||
else: |
||||
self._batch_sampler = batch_sampler |
||||
|
||||
# DataLoader do not start sub-process in Windows and Mac |
||||
# system, do not need to use shared memory |
||||
use_shared_memory = self.use_shared_memory and \ |
||||
sys.platform not in ['win32', 'darwin'] |
||||
# check whether shared memory size is bigger than 1G(1024M) |
||||
if use_shared_memory: |
||||
shm_size = _get_shared_memory_size_in_M() |
||||
if shm_size is not None and shm_size < 1024.: |
||||
logger.warning("Shared memory size is less than 1G, " |
||||
"disable shared_memory in DataLoader") |
||||
use_shared_memory = False |
||||
|
||||
self.dataloader = DataLoader( |
||||
dataset=self.dataset, |
||||
batch_sampler=self._batch_sampler, |
||||
collate_fn=self._batch_transforms, |
||||
num_workers=worker_num, |
||||
return_list=return_list, |
||||
use_shared_memory=use_shared_memory) |
||||
self.loader = iter(self.dataloader) |
||||
|
||||
return self |
||||
|
||||
def __len__(self): |
||||
return len(self._batch_sampler) |
||||
|
||||
def __iter__(self): |
||||
return self |
||||
|
||||
def __next__(self): |
||||
try: |
||||
return next(self.loader) |
||||
except StopIteration: |
||||
self.loader = iter(self.dataloader) |
||||
six.reraise(*sys.exc_info()) |
||||
|
||||
def next(self): |
||||
# python2 compatibility |
||||
return self.__next__() |
||||
|
||||
|
||||
@register |
||||
class TrainReader(BaseDataLoader): |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
sample_transforms=[], |
||||
batch_transforms=[], |
||||
batch_size=1, |
||||
shuffle=True, |
||||
drop_last=True, |
||||
num_classes=80, |
||||
collate_batch=True, |
||||
**kwargs): |
||||
super(TrainReader, self).__init__(sample_transforms, batch_transforms, |
||||
batch_size, shuffle, drop_last, |
||||
num_classes, collate_batch, **kwargs) |
||||
|
||||
|
||||
@register |
||||
class EvalReader(BaseDataLoader): |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
sample_transforms=[], |
||||
batch_transforms=[], |
||||
batch_size=1, |
||||
shuffle=False, |
||||
drop_last=True, |
||||
num_classes=80, |
||||
**kwargs): |
||||
super(EvalReader, self).__init__(sample_transforms, batch_transforms, |
||||
batch_size, shuffle, drop_last, |
||||
num_classes, **kwargs) |
||||
|
||||
|
||||
@register |
||||
class TestReader(BaseDataLoader): |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
sample_transforms=[], |
||||
batch_transforms=[], |
||||
batch_size=1, |
||||
shuffle=False, |
||||
drop_last=False, |
||||
num_classes=80, |
||||
**kwargs): |
||||
super(TestReader, self).__init__(sample_transforms, batch_transforms, |
||||
batch_size, shuffle, drop_last, |
||||
num_classes, **kwargs) |
||||
|
||||
|
||||
@register |
||||
class EvalMOTReader(BaseDataLoader): |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
sample_transforms=[], |
||||
batch_transforms=[], |
||||
batch_size=1, |
||||
shuffle=False, |
||||
drop_last=False, |
||||
num_classes=1, |
||||
**kwargs): |
||||
super(EvalMOTReader, self).__init__( |
||||
sample_transforms, batch_transforms, batch_size, shuffle, |
||||
drop_last, num_classes, **kwargs) |
||||
|
||||
|
||||
@register |
||||
class TestMOTReader(BaseDataLoader): |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
sample_transforms=[], |
||||
batch_transforms=[], |
||||
batch_size=1, |
||||
shuffle=False, |
||||
drop_last=False, |
||||
num_classes=1, |
||||
**kwargs): |
||||
super(TestMOTReader, self).__init__( |
||||
sample_transforms, batch_transforms, batch_size, shuffle, |
||||
drop_last, num_classes, **kwargs) |
@ -0,0 +1,67 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
|
||||
SIZE_UNIT = ['K', 'M', 'G', 'T'] |
||||
SHM_QUERY_CMD = 'df -h' |
||||
SHM_KEY = 'shm' |
||||
SHM_DEFAULT_MOUNT = '/dev/shm' |
||||
|
||||
# [ shared memory size check ] |
||||
# In detection models, image/target data occupies a lot of memory, and |
||||
# will occupy lots of shared memory in multi-process DataLoader, we use |
||||
# following code to get shared memory size and perform a size check to |
||||
# disable shared memory use if shared memory size is not enough. |
||||
# Shared memory getting process as follows: |
||||
# 1. use `df -h` get all mount info |
||||
# 2. pick up spaces whose mount info contains 'shm' |
||||
# 3. if 'shm' space number is only 1, return its size |
||||
# 4. if there are multiple 'shm' space, try to find the default mount |
||||
# directory '/dev/shm' is Linux-like system, otherwise return the |
||||
# biggest space size. |
||||
|
||||
|
||||
def _parse_size_in_M(size_str): |
||||
num, unit = size_str[:-1], size_str[-1] |
||||
assert unit in SIZE_UNIT, \ |
||||
"unknown shm size unit {}".format(unit) |
||||
return float(num) * \ |
||||
(1024 ** (SIZE_UNIT.index(unit) - 1)) |
||||
|
||||
|
||||
def _get_shared_memory_size_in_M(): |
||||
try: |
||||
df_infos = os.popen(SHM_QUERY_CMD).readlines() |
||||
except: |
||||
return None |
||||
else: |
||||
shm_infos = [] |
||||
for df_info in df_infos: |
||||
info = df_info.strip() |
||||
if info.find(SHM_KEY) >= 0: |
||||
shm_infos.append(info.split()) |
||||
|
||||
if len(shm_infos) == 0: |
||||
return None |
||||
elif len(shm_infos) == 1: |
||||
return _parse_size_in_M(shm_infos[0][3]) |
||||
else: |
||||
default_mount_infos = [ |
||||
si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT |
||||
] |
||||
if default_mount_infos: |
||||
return _parse_size_in_M(default_mount_infos[0][3]) |
||||
else: |
||||
return max([_parse_size_in_M(si[3]) for si in shm_infos]) |
@ -0,0 +1,29 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import coco |
||||
from . import voc |
||||
from . import widerface |
||||
from . import category |
||||
from . import keypoint_coco |
||||
from . import mot |
||||
from . import sniper_coco |
||||
|
||||
from .coco import * |
||||
from .voc import * |
||||
from .widerface import * |
||||
from .category import * |
||||
from .keypoint_coco import * |
||||
from .mot import * |
||||
from .sniper_coco import SniperCOCODataSet |
@ -0,0 +1,904 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
|
||||
from paddlers.models.ppdet.data.source.voc import pascalvoc_label |
||||
from paddlers.models.ppdet.data.source.widerface import widerface_label |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = ['get_categories'] |
||||
|
||||
|
||||
def get_categories(metric_type, anno_file=None, arch=None): |
||||
""" |
||||
Get class id to category id map and category id |
||||
to category name map from annotation file. |
||||
|
||||
Args: |
||||
metric_type (str): metric type, currently support 'coco', 'voc', 'oid' |
||||
and 'widerface'. |
||||
anno_file (str): annotation file path |
||||
""" |
||||
if arch == 'keypoint_arch': |
||||
return (None, {'id': 'keypoint'}) |
||||
|
||||
if metric_type.lower() == 'coco' or metric_type.lower( |
||||
) == 'rbox' or metric_type.lower() == 'snipercoco': |
||||
if anno_file and os.path.isfile(anno_file): |
||||
# lazy import pycocotools here |
||||
from pycocotools.coco import COCO |
||||
|
||||
coco = COCO(anno_file) |
||||
cats = coco.loadCats(coco.getCatIds()) |
||||
|
||||
clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} |
||||
catid2name = {cat['id']: cat['name'] for cat in cats} |
||||
return clsid2catid, catid2name |
||||
|
||||
# anno file not exist, load default categories of COCO17 |
||||
else: |
||||
if metric_type.lower() == 'rbox': |
||||
return _dota_category() |
||||
|
||||
return _coco17_category() |
||||
|
||||
elif metric_type.lower() == 'voc': |
||||
if anno_file and os.path.isfile(anno_file): |
||||
cats = [] |
||||
with open(anno_file) as f: |
||||
for line in f.readlines(): |
||||
cats.append(line.strip()) |
||||
|
||||
if cats[0] == 'background': |
||||
cats = cats[1:] |
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))} |
||||
catid2name = {i: name for i, name in enumerate(cats)} |
||||
|
||||
return clsid2catid, catid2name |
||||
|
||||
# anno file not exist, load default categories of |
||||
# VOC all 20 categories |
||||
else: |
||||
return _vocall_category() |
||||
|
||||
elif metric_type.lower() == 'oid': |
||||
if anno_file and os.path.isfile(anno_file): |
||||
logger.warning("only default categories support for OID19") |
||||
return _oid19_category() |
||||
|
||||
elif metric_type.lower() == 'widerface': |
||||
return _widerface_category() |
||||
|
||||
elif metric_type.lower() == 'keypointtopdowncocoeval' or metric_type.lower( |
||||
) == 'keypointtopdownmpiieval': |
||||
return (None, {'id': 'keypoint'}) |
||||
|
||||
elif metric_type.lower() in ['mot', 'motdet', 'reid']: |
||||
if anno_file and os.path.isfile(anno_file): |
||||
cats = [] |
||||
with open(anno_file) as f: |
||||
for line in f.readlines(): |
||||
cats.append(line.strip()) |
||||
if cats[0] == 'background': |
||||
cats = cats[1:] |
||||
clsid2catid = {i: i for i in range(len(cats))} |
||||
catid2name = {i: name for i, name in enumerate(cats)} |
||||
return clsid2catid, catid2name |
||||
# anno file not exist, load default category 'pedestrian'. |
||||
else: |
||||
return _mot_category(category='pedestrian') |
||||
|
||||
elif metric_type.lower() in ['kitti', 'bdd100kmot']: |
||||
return _mot_category(category='vehicle') |
||||
|
||||
elif metric_type.lower() in ['mcmot']: |
||||
if anno_file and os.path.isfile(anno_file): |
||||
cats = [] |
||||
with open(anno_file) as f: |
||||
for line in f.readlines(): |
||||
cats.append(line.strip()) |
||||
if cats[0] == 'background': |
||||
cats = cats[1:] |
||||
clsid2catid = {i: i for i in range(len(cats))} |
||||
catid2name = {i: name for i, name in enumerate(cats)} |
||||
return clsid2catid, catid2name |
||||
# anno file not exist, load default categories of visdrone all 10 categories |
||||
else: |
||||
return _visdrone_category() |
||||
|
||||
else: |
||||
raise ValueError("unknown metric type {}".format(metric_type)) |
||||
|
||||
|
||||
def _mot_category(category='pedestrian'): |
||||
""" |
||||
Get class id to category id map and category id |
||||
to category name map of mot dataset |
||||
""" |
||||
label_map = {category: 0} |
||||
label_map = sorted(label_map.items(), key=lambda x: x[1]) |
||||
cats = [l[0] for l in label_map] |
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))} |
||||
catid2name = {i: name for i, name in enumerate(cats)} |
||||
|
||||
return clsid2catid, catid2name |
||||
|
||||
|
||||
def _coco17_category(): |
||||
""" |
||||
Get class id to category id map and category id |
||||
to category name map of COCO2017 dataset |
||||
|
||||
""" |
||||
clsid2catid = { |
||||
1: 1, |
||||
2: 2, |
||||
3: 3, |
||||
4: 4, |
||||
5: 5, |
||||
6: 6, |
||||
7: 7, |
||||
8: 8, |
||||
9: 9, |
||||
10: 10, |
||||
11: 11, |
||||
12: 13, |
||||
13: 14, |
||||
14: 15, |
||||
15: 16, |
||||
16: 17, |
||||
17: 18, |
||||
18: 19, |
||||
19: 20, |
||||
20: 21, |
||||
21: 22, |
||||
22: 23, |
||||
23: 24, |
||||
24: 25, |
||||
25: 27, |
||||
26: 28, |
||||
27: 31, |
||||
28: 32, |
||||
29: 33, |
||||
30: 34, |
||||
31: 35, |
||||
32: 36, |
||||
33: 37, |
||||
34: 38, |
||||
35: 39, |
||||
36: 40, |
||||
37: 41, |
||||
38: 42, |
||||
39: 43, |
||||
40: 44, |
||||
41: 46, |
||||
42: 47, |
||||
43: 48, |
||||
44: 49, |
||||
45: 50, |
||||
46: 51, |
||||
47: 52, |
||||
48: 53, |
||||
49: 54, |
||||
50: 55, |
||||
51: 56, |
||||
52: 57, |
||||
53: 58, |
||||
54: 59, |
||||
55: 60, |
||||
56: 61, |
||||
57: 62, |
||||
58: 63, |
||||
59: 64, |
||||
60: 65, |
||||
61: 67, |
||||
62: 70, |
||||
63: 72, |
||||
64: 73, |
||||
65: 74, |
||||
66: 75, |
||||
67: 76, |
||||
68: 77, |
||||
69: 78, |
||||
70: 79, |
||||
71: 80, |
||||
72: 81, |
||||
73: 82, |
||||
74: 84, |
||||
75: 85, |
||||
76: 86, |
||||
77: 87, |
||||
78: 88, |
||||
79: 89, |
||||
80: 90 |
||||
} |
||||
|
||||
catid2name = { |
||||
0: 'background', |
||||
1: 'person', |
||||
2: 'bicycle', |
||||
3: 'car', |
||||
4: 'motorcycle', |
||||
5: 'airplane', |
||||
6: 'bus', |
||||
7: 'train', |
||||
8: 'truck', |
||||
9: 'boat', |
||||
10: 'traffic light', |
||||
11: 'fire hydrant', |
||||
13: 'stop sign', |
||||
14: 'parking meter', |
||||
15: 'bench', |
||||
16: 'bird', |
||||
17: 'cat', |
||||
18: 'dog', |
||||
19: 'horse', |
||||
20: 'sheep', |
||||
21: 'cow', |
||||
22: 'elephant', |
||||
23: 'bear', |
||||
24: 'zebra', |
||||
25: 'giraffe', |
||||
27: 'backpack', |
||||
28: 'umbrella', |
||||
31: 'handbag', |
||||
32: 'tie', |
||||
33: 'suitcase', |
||||
34: 'frisbee', |
||||
35: 'skis', |
||||
36: 'snowboard', |
||||
37: 'sports ball', |
||||
38: 'kite', |
||||
39: 'baseball bat', |
||||
40: 'baseball glove', |
||||
41: 'skateboard', |
||||
42: 'surfboard', |
||||
43: 'tennis racket', |
||||
44: 'bottle', |
||||
46: 'wine glass', |
||||
47: 'cup', |
||||
48: 'fork', |
||||
49: 'knife', |
||||
50: 'spoon', |
||||
51: 'bowl', |
||||
52: 'banana', |
||||
53: 'apple', |
||||
54: 'sandwich', |
||||
55: 'orange', |
||||
56: 'broccoli', |
||||
57: 'carrot', |
||||
58: 'hot dog', |
||||
59: 'pizza', |
||||
60: 'donut', |
||||
61: 'cake', |
||||
62: 'chair', |
||||
63: 'couch', |
||||
64: 'potted plant', |
||||
65: 'bed', |
||||
67: 'dining table', |
||||
70: 'toilet', |
||||
72: 'tv', |
||||
73: 'laptop', |
||||
74: 'mouse', |
||||
75: 'remote', |
||||
76: 'keyboard', |
||||
77: 'cell phone', |
||||
78: 'microwave', |
||||
79: 'oven', |
||||
80: 'toaster', |
||||
81: 'sink', |
||||
82: 'refrigerator', |
||||
84: 'book', |
||||
85: 'clock', |
||||
86: 'vase', |
||||
87: 'scissors', |
||||
88: 'teddy bear', |
||||
89: 'hair drier', |
||||
90: 'toothbrush' |
||||
} |
||||
|
||||
clsid2catid = {k - 1: v for k, v in clsid2catid.items()} |
||||
catid2name.pop(0) |
||||
|
||||
return clsid2catid, catid2name |
||||
|
||||
|
||||
def _dota_category(): |
||||
""" |
||||
Get class id to category id map and category id |
||||
to category name map of dota dataset |
||||
""" |
||||
catid2name = { |
||||
0: 'background', |
||||
1: 'plane', |
||||
2: 'baseball-diamond', |
||||
3: 'bridge', |
||||
4: 'ground-track-field', |
||||
5: 'small-vehicle', |
||||
6: 'large-vehicle', |
||||
7: 'ship', |
||||
8: 'tennis-court', |
||||
9: 'basketball-court', |
||||
10: 'storage-tank', |
||||
11: 'soccer-ball-field', |
||||
12: 'roundabout', |
||||
13: 'harbor', |
||||
14: 'swimming-pool', |
||||
15: 'helicopter' |
||||
} |
||||
catid2name.pop(0) |
||||
clsid2catid = {i: i + 1 for i in range(len(catid2name))} |
||||
return clsid2catid, catid2name |
||||
|
||||
|
||||
def _vocall_category(): |
||||
""" |
||||
Get class id to category id map and category id |
||||
to category name map of mixup voc dataset |
||||
|
||||
""" |
||||
label_map = pascalvoc_label() |
||||
label_map = sorted(label_map.items(), key=lambda x: x[1]) |
||||
cats = [l[0] for l in label_map] |
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))} |
||||
catid2name = {i: name for i, name in enumerate(cats)} |
||||
|
||||
return clsid2catid, catid2name |
||||
|
||||
|
||||
def _widerface_category(): |
||||
label_map = widerface_label() |
||||
label_map = sorted(label_map.items(), key=lambda x: x[1]) |
||||
cats = [l[0] for l in label_map] |
||||
clsid2catid = {i: i for i in range(len(cats))} |
||||
catid2name = {i: name for i, name in enumerate(cats)} |
||||
|
||||
return clsid2catid, catid2name |
||||
|
||||
|
||||
def _oid19_category(): |
||||
clsid2catid = {k: k + 1 for k in range(500)} |
||||
|
||||
catid2name = { |
||||
0: "background", |
||||
1: "Infant bed", |
||||
2: "Rose", |
||||
3: "Flag", |
||||
4: "Flashlight", |
||||
5: "Sea turtle", |
||||
6: "Camera", |
||||
7: "Animal", |
||||
8: "Glove", |
||||
9: "Crocodile", |
||||
10: "Cattle", |
||||
11: "House", |
||||
12: "Guacamole", |
||||
13: "Penguin", |
||||
14: "Vehicle registration plate", |
||||
15: "Bench", |
||||
16: "Ladybug", |
||||
17: "Human nose", |
||||
18: "Watermelon", |
||||
19: "Flute", |
||||
20: "Butterfly", |
||||
21: "Washing machine", |
||||
22: "Raccoon", |
||||
23: "Segway", |
||||
24: "Taco", |
||||
25: "Jellyfish", |
||||
26: "Cake", |
||||
27: "Pen", |
||||
28: "Cannon", |
||||
29: "Bread", |
||||
30: "Tree", |
||||
31: "Shellfish", |
||||
32: "Bed", |
||||
33: "Hamster", |
||||
34: "Hat", |
||||
35: "Toaster", |
||||
36: "Sombrero", |
||||
37: "Tiara", |
||||
38: "Bowl", |
||||
39: "Dragonfly", |
||||
40: "Moths and butterflies", |
||||
41: "Antelope", |
||||
42: "Vegetable", |
||||
43: "Torch", |
||||
44: "Building", |
||||
45: "Power plugs and sockets", |
||||
46: "Blender", |
||||
47: "Billiard table", |
||||
48: "Cutting board", |
||||
49: "Bronze sculpture", |
||||
50: "Turtle", |
||||
51: "Broccoli", |
||||
52: "Tiger", |
||||
53: "Mirror", |
||||
54: "Bear", |
||||
55: "Zucchini", |
||||
56: "Dress", |
||||
57: "Volleyball", |
||||
58: "Guitar", |
||||
59: "Reptile", |
||||
60: "Golf cart", |
||||
61: "Tart", |
||||
62: "Fedora", |
||||
63: "Carnivore", |
||||
64: "Car", |
||||
65: "Lighthouse", |
||||
66: "Coffeemaker", |
||||
67: "Food processor", |
||||
68: "Truck", |
||||
69: "Bookcase", |
||||
70: "Surfboard", |
||||
71: "Footwear", |
||||
72: "Bench", |
||||
73: "Necklace", |
||||
74: "Flower", |
||||
75: "Radish", |
||||
76: "Marine mammal", |
||||
77: "Frying pan", |
||||
78: "Tap", |
||||
79: "Peach", |
||||
80: "Knife", |
||||
81: "Handbag", |
||||
82: "Laptop", |
||||
83: "Tent", |
||||
84: "Ambulance", |
||||
85: "Christmas tree", |
||||
86: "Eagle", |
||||
87: "Limousine", |
||||
88: "Kitchen & dining room table", |
||||
89: "Polar bear", |
||||
90: "Tower", |
||||
91: "Football", |
||||
92: "Willow", |
||||
93: "Human head", |
||||
94: "Stop sign", |
||||
95: "Banana", |
||||
96: "Mixer", |
||||
97: "Binoculars", |
||||
98: "Dessert", |
||||
99: "Bee", |
||||
100: "Chair", |
||||
101: "Wood-burning stove", |
||||
102: "Flowerpot", |
||||
103: "Beaker", |
||||
104: "Oyster", |
||||
105: "Woodpecker", |
||||
106: "Harp", |
||||
107: "Bathtub", |
||||
108: "Wall clock", |
||||
109: "Sports uniform", |
||||
110: "Rhinoceros", |
||||
111: "Beehive", |
||||
112: "Cupboard", |
||||
113: "Chicken", |
||||
114: "Man", |
||||
115: "Blue jay", |
||||
116: "Cucumber", |
||||
117: "Balloon", |
||||
118: "Kite", |
||||
119: "Fireplace", |
||||
120: "Lantern", |
||||
121: "Missile", |
||||
122: "Book", |
||||
123: "Spoon", |
||||
124: "Grapefruit", |
||||
125: "Squirrel", |
||||
126: "Orange", |
||||
127: "Coat", |
||||
128: "Punching bag", |
||||
129: "Zebra", |
||||
130: "Billboard", |
||||
131: "Bicycle", |
||||
132: "Door handle", |
||||
133: "Mechanical fan", |
||||
134: "Ring binder", |
||||
135: "Table", |
||||
136: "Parrot", |
||||
137: "Sock", |
||||
138: "Vase", |
||||
139: "Weapon", |
||||
140: "Shotgun", |
||||
141: "Glasses", |
||||
142: "Seahorse", |
||||
143: "Belt", |
||||
144: "Watercraft", |
||||
145: "Window", |
||||
146: "Giraffe", |
||||
147: "Lion", |
||||
148: "Tire", |
||||
149: "Vehicle", |
||||
150: "Canoe", |
||||
151: "Tie", |
||||
152: "Shelf", |
||||
153: "Picture frame", |
||||
154: "Printer", |
||||
155: "Human leg", |
||||
156: "Boat", |
||||
157: "Slow cooker", |
||||
158: "Croissant", |
||||
159: "Candle", |
||||
160: "Pancake", |
||||
161: "Pillow", |
||||
162: "Coin", |
||||
163: "Stretcher", |
||||
164: "Sandal", |
||||
165: "Woman", |
||||
166: "Stairs", |
||||
167: "Harpsichord", |
||||
168: "Stool", |
||||
169: "Bus", |
||||
170: "Suitcase", |
||||
171: "Human mouth", |
||||
172: "Juice", |
||||
173: "Skull", |
||||
174: "Door", |
||||
175: "Violin", |
||||
176: "Chopsticks", |
||||
177: "Digital clock", |
||||
178: "Sunflower", |
||||
179: "Leopard", |
||||
180: "Bell pepper", |
||||
181: "Harbor seal", |
||||
182: "Snake", |
||||
183: "Sewing machine", |
||||
184: "Goose", |
||||
185: "Helicopter", |
||||
186: "Seat belt", |
||||
187: "Coffee cup", |
||||
188: "Microwave oven", |
||||
189: "Hot dog", |
||||
190: "Countertop", |
||||
191: "Serving tray", |
||||
192: "Dog bed", |
||||
193: "Beer", |
||||
194: "Sunglasses", |
||||
195: "Golf ball", |
||||
196: "Waffle", |
||||
197: "Palm tree", |
||||
198: "Trumpet", |
||||
199: "Ruler", |
||||
200: "Helmet", |
||||
201: "Ladder", |
||||
202: "Office building", |
||||
203: "Tablet computer", |
||||
204: "Toilet paper", |
||||
205: "Pomegranate", |
||||
206: "Skirt", |
||||
207: "Gas stove", |
||||
208: "Cookie", |
||||
209: "Cart", |
||||
210: "Raven", |
||||
211: "Egg", |
||||
212: "Burrito", |
||||
213: "Goat", |
||||
214: "Kitchen knife", |
||||
215: "Skateboard", |
||||
216: "Salt and pepper shakers", |
||||
217: "Lynx", |
||||
218: "Boot", |
||||
219: "Platter", |
||||
220: "Ski", |
||||
221: "Swimwear", |
||||
222: "Swimming pool", |
||||
223: "Drinking straw", |
||||
224: "Wrench", |
||||
225: "Drum", |
||||
226: "Ant", |
||||
227: "Human ear", |
||||
228: "Headphones", |
||||
229: "Fountain", |
||||
230: "Bird", |
||||
231: "Jeans", |
||||
232: "Television", |
||||
233: "Crab", |
||||
234: "Microphone", |
||||
235: "Home appliance", |
||||
236: "Snowplow", |
||||
237: "Beetle", |
||||
238: "Artichoke", |
||||
239: "Jet ski", |
||||
240: "Stationary bicycle", |
||||
241: "Human hair", |
||||
242: "Brown bear", |
||||
243: "Starfish", |
||||
244: "Fork", |
||||
245: "Lobster", |
||||
246: "Corded phone", |
||||
247: "Drink", |
||||
248: "Saucer", |
||||
249: "Carrot", |
||||
250: "Insect", |
||||
251: "Clock", |
||||
252: "Castle", |
||||
253: "Tennis racket", |
||||
254: "Ceiling fan", |
||||
255: "Asparagus", |
||||
256: "Jaguar", |
||||
257: "Musical instrument", |
||||
258: "Train", |
||||
259: "Cat", |
||||
260: "Rifle", |
||||
261: "Dumbbell", |
||||
262: "Mobile phone", |
||||
263: "Taxi", |
||||
264: "Shower", |
||||
265: "Pitcher", |
||||
266: "Lemon", |
||||
267: "Invertebrate", |
||||
268: "Turkey", |
||||
269: "High heels", |
||||
270: "Bust", |
||||
271: "Elephant", |
||||
272: "Scarf", |
||||
273: "Barrel", |
||||
274: "Trombone", |
||||
275: "Pumpkin", |
||||
276: "Box", |
||||
277: "Tomato", |
||||
278: "Frog", |
||||
279: "Bidet", |
||||
280: "Human face", |
||||
281: "Houseplant", |
||||
282: "Van", |
||||
283: "Shark", |
||||
284: "Ice cream", |
||||
285: "Swim cap", |
||||
286: "Falcon", |
||||
287: "Ostrich", |
||||
288: "Handgun", |
||||
289: "Whiteboard", |
||||
290: "Lizard", |
||||
291: "Pasta", |
||||
292: "Snowmobile", |
||||
293: "Light bulb", |
||||
294: "Window blind", |
||||
295: "Muffin", |
||||
296: "Pretzel", |
||||
297: "Computer monitor", |
||||
298: "Horn", |
||||
299: "Furniture", |
||||
300: "Sandwich", |
||||
301: "Fox", |
||||
302: "Convenience store", |
||||
303: "Fish", |
||||
304: "Fruit", |
||||
305: "Earrings", |
||||
306: "Curtain", |
||||
307: "Grape", |
||||
308: "Sofa bed", |
||||
309: "Horse", |
||||
310: "Luggage and bags", |
||||
311: "Desk", |
||||
312: "Crutch", |
||||
313: "Bicycle helmet", |
||||
314: "Tick", |
||||
315: "Airplane", |
||||
316: "Canary", |
||||
317: "Spatula", |
||||
318: "Watch", |
||||
319: "Lily", |
||||
320: "Kitchen appliance", |
||||
321: "Filing cabinet", |
||||
322: "Aircraft", |
||||
323: "Cake stand", |
||||
324: "Candy", |
||||
325: "Sink", |
||||
326: "Mouse", |
||||
327: "Wine", |
||||
328: "Wheelchair", |
||||
329: "Goldfish", |
||||
330: "Refrigerator", |
||||
331: "French fries", |
||||
332: "Drawer", |
||||
333: "Treadmill", |
||||
334: "Picnic basket", |
||||
335: "Dice", |
||||
336: "Cabbage", |
||||
337: "Football helmet", |
||||
338: "Pig", |
||||
339: "Person", |
||||
340: "Shorts", |
||||
341: "Gondola", |
||||
342: "Honeycomb", |
||||
343: "Doughnut", |
||||
344: "Chest of drawers", |
||||
345: "Land vehicle", |
||||
346: "Bat", |
||||
347: "Monkey", |
||||
348: "Dagger", |
||||
349: "Tableware", |
||||
350: "Human foot", |
||||
351: "Mug", |
||||
352: "Alarm clock", |
||||
353: "Pressure cooker", |
||||
354: "Human hand", |
||||
355: "Tortoise", |
||||
356: "Baseball glove", |
||||
357: "Sword", |
||||
358: "Pear", |
||||
359: "Miniskirt", |
||||
360: "Traffic sign", |
||||
361: "Girl", |
||||
362: "Roller skates", |
||||
363: "Dinosaur", |
||||
364: "Porch", |
||||
365: "Human beard", |
||||
366: "Submarine sandwich", |
||||
367: "Screwdriver", |
||||
368: "Strawberry", |
||||
369: "Wine glass", |
||||
370: "Seafood", |
||||
371: "Racket", |
||||
372: "Wheel", |
||||
373: "Sea lion", |
||||
374: "Toy", |
||||
375: "Tea", |
||||
376: "Tennis ball", |
||||
377: "Waste container", |
||||
378: "Mule", |
||||
379: "Cricket ball", |
||||
380: "Pineapple", |
||||
381: "Coconut", |
||||
382: "Doll", |
||||
383: "Coffee table", |
||||
384: "Snowman", |
||||
385: "Lavender", |
||||
386: "Shrimp", |
||||
387: "Maple", |
||||
388: "Cowboy hat", |
||||
389: "Goggles", |
||||
390: "Rugby ball", |
||||
391: "Caterpillar", |
||||
392: "Poster", |
||||
393: "Rocket", |
||||
394: "Organ", |
||||
395: "Saxophone", |
||||
396: "Traffic light", |
||||
397: "Cocktail", |
||||
398: "Plastic bag", |
||||
399: "Squash", |
||||
400: "Mushroom", |
||||
401: "Hamburger", |
||||
402: "Light switch", |
||||
403: "Parachute", |
||||
404: "Teddy bear", |
||||
405: "Winter melon", |
||||
406: "Deer", |
||||
407: "Musical keyboard", |
||||
408: "Plumbing fixture", |
||||
409: "Scoreboard", |
||||
410: "Baseball bat", |
||||
411: "Envelope", |
||||
412: "Adhesive tape", |
||||
413: "Briefcase", |
||||
414: "Paddle", |
||||
415: "Bow and arrow", |
||||
416: "Telephone", |
||||
417: "Sheep", |
||||
418: "Jacket", |
||||
419: "Boy", |
||||
420: "Pizza", |
||||
421: "Otter", |
||||
422: "Office supplies", |
||||
423: "Couch", |
||||
424: "Cello", |
||||
425: "Bull", |
||||
426: "Camel", |
||||
427: "Ball", |
||||
428: "Duck", |
||||
429: "Whale", |
||||
430: "Shirt", |
||||
431: "Tank", |
||||
432: "Motorcycle", |
||||
433: "Accordion", |
||||
434: "Owl", |
||||
435: "Porcupine", |
||||
436: "Sun hat", |
||||
437: "Nail", |
||||
438: "Scissors", |
||||
439: "Swan", |
||||
440: "Lamp", |
||||
441: "Crown", |
||||
442: "Piano", |
||||
443: "Sculpture", |
||||
444: "Cheetah", |
||||
445: "Oboe", |
||||
446: "Tin can", |
||||
447: "Mango", |
||||
448: "Tripod", |
||||
449: "Oven", |
||||
450: "Mouse", |
||||
451: "Barge", |
||||
452: "Coffee", |
||||
453: "Snowboard", |
||||
454: "Common fig", |
||||
455: "Salad", |
||||
456: "Marine invertebrates", |
||||
457: "Umbrella", |
||||
458: "Kangaroo", |
||||
459: "Human arm", |
||||
460: "Measuring cup", |
||||
461: "Snail", |
||||
462: "Loveseat", |
||||
463: "Suit", |
||||
464: "Teapot", |
||||
465: "Bottle", |
||||
466: "Alpaca", |
||||
467: "Kettle", |
||||
468: "Trousers", |
||||
469: "Popcorn", |
||||
470: "Centipede", |
||||
471: "Spider", |
||||
472: "Sparrow", |
||||
473: "Plate", |
||||
474: "Bagel", |
||||
475: "Personal care", |
||||
476: "Apple", |
||||
477: "Brassiere", |
||||
478: "Bathroom cabinet", |
||||
479: "studio couch", |
||||
480: "Computer keyboard", |
||||
481: "Table tennis racket", |
||||
482: "Sushi", |
||||
483: "Cabinetry", |
||||
484: "Street light", |
||||
485: "Towel", |
||||
486: "Nightstand", |
||||
487: "Rabbit", |
||||
488: "Dolphin", |
||||
489: "Dog", |
||||
490: "Jug", |
||||
491: "Wok", |
||||
492: "Fire hydrant", |
||||
493: "Human eye", |
||||
494: "Skyscraper", |
||||
495: "Backpack", |
||||
496: "Potato", |
||||
497: "Paper towel", |
||||
498: "Lifejacket", |
||||
499: "Bicycle wheel", |
||||
500: "Toilet", |
||||
} |
||||
|
||||
return clsid2catid, catid2name |
||||
|
||||
|
||||
def _visdrone_category(): |
||||
clsid2catid = {i: i for i in range(10)} |
||||
|
||||
catid2name = { |
||||
0: 'pedestrian', |
||||
1: 'people', |
||||
2: 'bicycle', |
||||
3: 'car', |
||||
4: 'van', |
||||
5: 'truck', |
||||
6: 'tricycle', |
||||
7: 'awning-tricycle', |
||||
8: 'bus', |
||||
9: 'motor' |
||||
} |
||||
return clsid2catid, catid2name |
@ -0,0 +1,251 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import numpy as np |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from .dataset import DetDataset |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class COCODataSet(DetDataset): |
||||
""" |
||||
Load dataset with COCO format. |
||||
|
||||
Args: |
||||
dataset_dir (str): root directory for dataset. |
||||
image_dir (str): directory for images. |
||||
anno_path (str): coco annotation file path. |
||||
data_fields (list): key name of data dictionary, at least have 'image'. |
||||
sample_num (int): number of samples to load, -1 means all. |
||||
load_crowd (bool): whether to load crowded ground-truth. |
||||
False as default |
||||
allow_empty (bool): whether to load empty entry. False as default |
||||
empty_ratio (float): the ratio of empty record number to total |
||||
record's, if empty_ratio is out of [0. ,1.), do not sample the |
||||
records and use all the empty entries. 1. as default |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_dir=None, |
||||
anno_path=None, |
||||
data_fields=['image'], |
||||
sample_num=-1, |
||||
load_crowd=False, |
||||
allow_empty=False, |
||||
empty_ratio=1.): |
||||
super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path, |
||||
data_fields, sample_num) |
||||
self.load_image_only = False |
||||
self.load_semantic = False |
||||
self.load_crowd = load_crowd |
||||
self.allow_empty = allow_empty |
||||
self.empty_ratio = empty_ratio |
||||
|
||||
def _sample_empty(self, records, num): |
||||
# if empty_ratio is out of [0. ,1.), do not sample the records |
||||
if self.empty_ratio < 0. or self.empty_ratio >= 1.: |
||||
return records |
||||
import random |
||||
sample_num = min( |
||||
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records)) |
||||
records = random.sample(records, sample_num) |
||||
return records |
||||
|
||||
def parse_dataset(self): |
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path) |
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir) |
||||
|
||||
assert anno_path.endswith('.json'), \ |
||||
'invalid coco annotation file: ' + anno_path |
||||
from pycocotools.coco import COCO |
||||
coco = COCO(anno_path) |
||||
img_ids = coco.getImgIds() |
||||
img_ids.sort() |
||||
cat_ids = coco.getCatIds() |
||||
records = [] |
||||
empty_records = [] |
||||
ct = 0 |
||||
|
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)}) |
||||
self.cname2cid = dict({ |
||||
coco.loadCats(catid)[0]['name']: clsid |
||||
for catid, clsid in self.catid2clsid.items() |
||||
}) |
||||
|
||||
if 'annotations' not in coco.dataset: |
||||
self.load_image_only = True |
||||
logger.warning( |
||||
'Annotation file: {} does not contains ground truth ' |
||||
'and load image information only.'.format(anno_path)) |
||||
|
||||
for img_id in img_ids: |
||||
img_anno = coco.loadImgs([img_id])[0] |
||||
im_fname = img_anno['file_name'] |
||||
im_w = float(img_anno['width']) |
||||
im_h = float(img_anno['height']) |
||||
|
||||
im_path = os.path.join(image_dir, |
||||
im_fname) if image_dir else im_fname |
||||
is_empty = False |
||||
if not os.path.exists(im_path): |
||||
logger.warning('Illegal image file: {}, and it will be ' |
||||
'ignored'.format(im_path)) |
||||
continue |
||||
|
||||
if im_w < 0 or im_h < 0: |
||||
logger.warning( |
||||
'Illegal width: {} or height: {} in annotation, ' |
||||
'and im_id: {} will be ignored'.format(im_w, im_h, img_id)) |
||||
continue |
||||
|
||||
coco_rec = { |
||||
'im_file': im_path, |
||||
'im_id': np.array([img_id]), |
||||
'h': im_h, |
||||
'w': im_w, |
||||
} if 'image' in self.data_fields else {} |
||||
|
||||
if not self.load_image_only: |
||||
ins_anno_ids = coco.getAnnIds( |
||||
imgIds=[img_id], |
||||
iscrowd=None if self.load_crowd else False) |
||||
instances = coco.loadAnns(ins_anno_ids) |
||||
|
||||
bboxes = [] |
||||
is_rbox_anno = False |
||||
for inst in instances: |
||||
# check gt bbox |
||||
if inst.get('ignore', False): |
||||
continue |
||||
if 'bbox' not in inst.keys(): |
||||
continue |
||||
else: |
||||
if not any(np.array(inst['bbox'])): |
||||
continue |
||||
|
||||
# read rbox anno or not |
||||
is_rbox_anno = True if len(inst['bbox']) == 5 else False |
||||
if is_rbox_anno: |
||||
xc, yc, box_w, box_h, angle = inst['bbox'] |
||||
x1 = xc - box_w / 2.0 |
||||
y1 = yc - box_h / 2.0 |
||||
x2 = x1 + box_w |
||||
y2 = y1 + box_h |
||||
else: |
||||
x1, y1, box_w, box_h = inst['bbox'] |
||||
x2 = x1 + box_w |
||||
y2 = y1 + box_h |
||||
eps = 1e-5 |
||||
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps: |
||||
inst['clean_bbox'] = [ |
||||
round(float(x), 3) for x in [x1, y1, x2, y2] |
||||
] |
||||
if is_rbox_anno: |
||||
inst['clean_rbox'] = [xc, yc, box_w, box_h, angle] |
||||
bboxes.append(inst) |
||||
else: |
||||
logger.warning( |
||||
'Found an invalid bbox in annotations: im_id: {}, ' |
||||
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( |
||||
img_id, float(inst['area']), x1, y1, x2, y2)) |
||||
|
||||
num_bbox = len(bboxes) |
||||
if num_bbox <= 0 and not self.allow_empty: |
||||
continue |
||||
elif num_bbox <= 0: |
||||
is_empty = True |
||||
|
||||
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) |
||||
if is_rbox_anno: |
||||
gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32) |
||||
gt_theta = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
gt_class = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
gt_poly = [None] * num_bbox |
||||
|
||||
has_segmentation = False |
||||
for i, box in enumerate(bboxes): |
||||
catid = box['category_id'] |
||||
gt_class[i][0] = self.catid2clsid[catid] |
||||
gt_bbox[i, :] = box['clean_bbox'] |
||||
# xc, yc, w, h, theta |
||||
if is_rbox_anno: |
||||
gt_rbox[i, :] = box['clean_rbox'] |
||||
is_crowd[i][0] = box['iscrowd'] |
||||
# check RLE format |
||||
if 'segmentation' in box and box['iscrowd'] == 1: |
||||
gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] |
||||
elif 'segmentation' in box and box['segmentation']: |
||||
if not np.array(box['segmentation'] |
||||
).size > 0 and not self.allow_empty: |
||||
bboxes.pop(i) |
||||
gt_poly.pop(i) |
||||
np.delete(is_crowd, i) |
||||
np.delete(gt_class, i) |
||||
np.delete(gt_bbox, i) |
||||
else: |
||||
gt_poly[i] = box['segmentation'] |
||||
has_segmentation = True |
||||
|
||||
if has_segmentation and not any( |
||||
gt_poly) and not self.allow_empty: |
||||
continue |
||||
|
||||
if is_rbox_anno: |
||||
gt_rec = { |
||||
'is_crowd': is_crowd, |
||||
'gt_class': gt_class, |
||||
'gt_bbox': gt_bbox, |
||||
'gt_rbox': gt_rbox, |
||||
'gt_poly': gt_poly, |
||||
} |
||||
else: |
||||
gt_rec = { |
||||
'is_crowd': is_crowd, |
||||
'gt_class': gt_class, |
||||
'gt_bbox': gt_bbox, |
||||
'gt_poly': gt_poly, |
||||
} |
||||
|
||||
for k, v in gt_rec.items(): |
||||
if k in self.data_fields: |
||||
coco_rec[k] = v |
||||
|
||||
# TODO: remove load_semantic |
||||
if self.load_semantic and 'semantic' in self.data_fields: |
||||
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps', |
||||
'train2017', im_fname[:-3] + 'png') |
||||
coco_rec.update({'semantic': seg_path}) |
||||
|
||||
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format( |
||||
im_path, img_id, im_h, im_w)) |
||||
if is_empty: |
||||
empty_records.append(coco_rec) |
||||
else: |
||||
records.append(coco_rec) |
||||
ct += 1 |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
assert ct > 0, 'not found any coco record in %s' % (anno_path) |
||||
logger.debug('{} samples in file {}'.format(ct, anno_path)) |
||||
if self.allow_empty and len(empty_records) > 0: |
||||
empty_records = self._sample_empty(empty_records, len(records)) |
||||
records += empty_records |
||||
self.roidbs = records |
@ -0,0 +1,197 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import numpy as np |
||||
|
||||
try: |
||||
from collections.abc import Sequence |
||||
except Exception: |
||||
from collections import Sequence |
||||
from paddle.io import Dataset |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddlers.models.ppdet.utils.download import get_dataset_path |
||||
import copy |
||||
|
||||
|
||||
@serializable |
||||
class DetDataset(Dataset): |
||||
""" |
||||
Load detection dataset. |
||||
|
||||
Args: |
||||
dataset_dir (str): root directory for dataset. |
||||
image_dir (str): directory for images. |
||||
anno_path (str): annotation file path. |
||||
data_fields (list): key name of data dictionary, at least have 'image'. |
||||
sample_num (int): number of samples to load, -1 means all. |
||||
use_default_label (bool): whether to load default label list. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_dir=None, |
||||
anno_path=None, |
||||
data_fields=['image'], |
||||
sample_num=-1, |
||||
use_default_label=None, |
||||
**kwargs): |
||||
super(DetDataset, self).__init__() |
||||
self.dataset_dir = dataset_dir if dataset_dir is not None else '' |
||||
self.anno_path = anno_path |
||||
self.image_dir = image_dir if image_dir is not None else '' |
||||
self.data_fields = data_fields |
||||
self.sample_num = sample_num |
||||
self.use_default_label = use_default_label |
||||
self._epoch = 0 |
||||
self._curr_iter = 0 |
||||
|
||||
def __len__(self, ): |
||||
return len(self.roidbs) |
||||
|
||||
def __getitem__(self, idx): |
||||
# data batch |
||||
roidb = copy.deepcopy(self.roidbs[idx]) |
||||
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch: |
||||
n = len(self.roidbs) |
||||
idx = np.random.randint(n) |
||||
roidb = [roidb, copy.deepcopy(self.roidbs[idx])] |
||||
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch: |
||||
n = len(self.roidbs) |
||||
idx = np.random.randint(n) |
||||
roidb = [roidb, copy.deepcopy(self.roidbs[idx])] |
||||
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch: |
||||
n = len(self.roidbs) |
||||
roidb = [roidb, ] + [ |
||||
copy.deepcopy(self.roidbs[np.random.randint(n)]) |
||||
for _ in range(3) |
||||
] |
||||
if isinstance(roidb, Sequence): |
||||
for r in roidb: |
||||
r['curr_iter'] = self._curr_iter |
||||
else: |
||||
roidb['curr_iter'] = self._curr_iter |
||||
self._curr_iter += 1 |
||||
|
||||
return self.transform(roidb) |
||||
|
||||
def check_or_download_dataset(self): |
||||
self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path, |
||||
self.image_dir) |
||||
|
||||
def set_kwargs(self, **kwargs): |
||||
self.mixup_epoch = kwargs.get('mixup_epoch', -1) |
||||
self.cutmix_epoch = kwargs.get('cutmix_epoch', -1) |
||||
self.mosaic_epoch = kwargs.get('mosaic_epoch', -1) |
||||
|
||||
def set_transform(self, transform): |
||||
self.transform = transform |
||||
|
||||
def set_epoch(self, epoch_id): |
||||
self._epoch = epoch_id |
||||
|
||||
def parse_dataset(self, ): |
||||
raise NotImplementedError( |
||||
"Need to implement parse_dataset method of Dataset") |
||||
|
||||
def get_anno(self): |
||||
if self.anno_path is None: |
||||
return |
||||
return os.path.join(self.dataset_dir, self.anno_path) |
||||
|
||||
|
||||
def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')): |
||||
return f.lower().endswith(extensions) |
||||
|
||||
|
||||
def _make_dataset(dir): |
||||
dir = os.path.expanduser(dir) |
||||
if not os.path.isdir(dir): |
||||
raise ('{} should be a dir'.format(dir)) |
||||
images = [] |
||||
for root, _, fnames in sorted(os.walk(dir, followlinks=True)): |
||||
for fname in sorted(fnames): |
||||
path = os.path.join(root, fname) |
||||
if _is_valid_file(path): |
||||
images.append(path) |
||||
return images |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class ImageFolder(DetDataset): |
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_dir=None, |
||||
anno_path=None, |
||||
sample_num=-1, |
||||
use_default_label=None, |
||||
**kwargs): |
||||
super(ImageFolder, self).__init__( |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
sample_num=sample_num, |
||||
use_default_label=use_default_label) |
||||
self._imid2path = {} |
||||
self.roidbs = None |
||||
self.sample_num = sample_num |
||||
|
||||
def check_or_download_dataset(self): |
||||
if self.dataset_dir: |
||||
# NOTE: ImageFolder is only used for prediction, in |
||||
# infer mode, image_dir is set by set_images |
||||
# so we only check anno_path here |
||||
self.dataset_dir = get_dataset_path(self.dataset_dir, |
||||
self.anno_path, None) |
||||
|
||||
def parse_dataset(self, ): |
||||
if not self.roidbs: |
||||
self.roidbs = self._load_images() |
||||
|
||||
def _parse(self): |
||||
image_dir = self.image_dir |
||||
if not isinstance(image_dir, Sequence): |
||||
image_dir = [image_dir] |
||||
images = [] |
||||
for im_dir in image_dir: |
||||
if os.path.isdir(im_dir): |
||||
im_dir = os.path.join(self.dataset_dir, im_dir) |
||||
images.extend(_make_dataset(im_dir)) |
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir): |
||||
images.append(im_dir) |
||||
return images |
||||
|
||||
def _load_images(self): |
||||
images = self._parse() |
||||
ct = 0 |
||||
records = [] |
||||
for image in images: |
||||
assert image != '' and os.path.isfile(image), \ |
||||
"Image {} not found".format(image) |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
rec = {'im_id': np.array([ct]), 'im_file': image} |
||||
self._imid2path[ct] = image |
||||
ct += 1 |
||||
records.append(rec) |
||||
assert len(records) > 0, "No image file found" |
||||
return records |
||||
|
||||
def get_imid2path(self): |
||||
return self._imid2path |
||||
|
||||
def set_images(self, images): |
||||
self.image_dir = images |
||||
self.roidbs = self._load_images() |
@ -0,0 +1,669 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
this code is base on https://github.com/open-mmlab/mmpose |
||||
""" |
||||
import os |
||||
import cv2 |
||||
import numpy as np |
||||
import json |
||||
import copy |
||||
import pycocotools |
||||
from pycocotools.coco import COCO |
||||
from .dataset import DetDataset |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
|
||||
|
||||
@serializable |
||||
class KeypointBottomUpBaseDataset(DetDataset): |
||||
"""Base class for bottom-up datasets. |
||||
|
||||
All datasets should subclass it. |
||||
All subclasses should overwrite: |
||||
Methods:`_get_imganno` |
||||
|
||||
Args: |
||||
dataset_dir (str): Root path to the dataset. |
||||
anno_path (str): Relative path to the annotation file. |
||||
image_dir (str): Path to a directory where images are held. |
||||
Default: None. |
||||
num_joints (int): keypoint numbers |
||||
transform (composed(operators)): A sequence of data transforms. |
||||
shard (list): [rank, worldsize], the distributed env params |
||||
test_mode (bool): Store True when building test or |
||||
validation dataset. Default: False. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
num_joints, |
||||
transform=[], |
||||
shard=[0, 1], |
||||
test_mode=False): |
||||
super().__init__(dataset_dir, image_dir, anno_path) |
||||
self.image_info = {} |
||||
self.ann_info = {} |
||||
|
||||
self.img_prefix = os.path.join(dataset_dir, image_dir) |
||||
self.transform = transform |
||||
self.test_mode = test_mode |
||||
|
||||
self.ann_info['num_joints'] = num_joints |
||||
self.img_ids = [] |
||||
|
||||
def parse_dataset(self): |
||||
pass |
||||
|
||||
def __len__(self): |
||||
"""Get dataset length.""" |
||||
return len(self.img_ids) |
||||
|
||||
def _get_imganno(self, idx): |
||||
"""Get anno for a single image.""" |
||||
raise NotImplementedError |
||||
|
||||
def __getitem__(self, idx): |
||||
"""Prepare image for training given the index.""" |
||||
records = copy.deepcopy(self._get_imganno(idx)) |
||||
records['image'] = cv2.imread(records['image_file']) |
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB) |
||||
records['mask'] = (records['mask'] + 0).astype('uint8') |
||||
records = self.transform(records) |
||||
return records |
||||
|
||||
def parse_dataset(self): |
||||
return |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset): |
||||
"""COCO dataset for bottom-up pose estimation. |
||||
|
||||
The dataset loads raw features and apply specified transforms |
||||
to return a dict containing the image tensors and other information. |
||||
|
||||
COCO keypoint indexes:: |
||||
|
||||
0: 'nose', |
||||
1: 'left_eye', |
||||
2: 'right_eye', |
||||
3: 'left_ear', |
||||
4: 'right_ear', |
||||
5: 'left_shoulder', |
||||
6: 'right_shoulder', |
||||
7: 'left_elbow', |
||||
8: 'right_elbow', |
||||
9: 'left_wrist', |
||||
10: 'right_wrist', |
||||
11: 'left_hip', |
||||
12: 'right_hip', |
||||
13: 'left_knee', |
||||
14: 'right_knee', |
||||
15: 'left_ankle', |
||||
16: 'right_ankle' |
||||
|
||||
Args: |
||||
dataset_dir (str): Root path to the dataset. |
||||
anno_path (str): Relative path to the annotation file. |
||||
image_dir (str): Path to a directory where images are held. |
||||
Default: None. |
||||
num_joints (int): keypoint numbers |
||||
transform (composed(operators)): A sequence of data transforms. |
||||
shard (list): [rank, worldsize], the distributed env params |
||||
test_mode (bool): Store True when building test or |
||||
validation dataset. Default: False. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
num_joints, |
||||
transform=[], |
||||
shard=[0, 1], |
||||
test_mode=False): |
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints, |
||||
transform, shard, test_mode) |
||||
|
||||
self.ann_file = os.path.join(dataset_dir, anno_path) |
||||
self.shard = shard |
||||
self.test_mode = test_mode |
||||
|
||||
def parse_dataset(self): |
||||
self.coco = COCO(self.ann_file) |
||||
|
||||
self.img_ids = self.coco.getImgIds() |
||||
if not self.test_mode: |
||||
self.img_ids = [ |
||||
img_id for img_id in self.img_ids |
||||
if len(self.coco.getAnnIds( |
||||
imgIds=img_id, iscrowd=None)) > 0 |
||||
] |
||||
blocknum = int(len(self.img_ids) / self.shard[1]) |
||||
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * ( |
||||
self.shard[0] + 1))] |
||||
self.num_images = len(self.img_ids) |
||||
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) |
||||
self.dataset_name = 'coco' |
||||
|
||||
cat_ids = self.coco.getCatIds() |
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)}) |
||||
print('=> num_images: {}'.format(self.num_images)) |
||||
|
||||
@staticmethod |
||||
def _get_mapping_id_name(imgs): |
||||
""" |
||||
Args: |
||||
imgs (dict): dict of image info. |
||||
|
||||
Returns: |
||||
tuple: Image name & id mapping dicts. |
||||
|
||||
- id2name (dict): Mapping image id to name. |
||||
- name2id (dict): Mapping image name to id. |
||||
""" |
||||
id2name = {} |
||||
name2id = {} |
||||
for image_id, image in imgs.items(): |
||||
file_name = image['file_name'] |
||||
id2name[image_id] = file_name |
||||
name2id[file_name] = image_id |
||||
|
||||
return id2name, name2id |
||||
|
||||
def _get_imganno(self, idx): |
||||
"""Get anno for a single image. |
||||
|
||||
Args: |
||||
idx (int): image idx |
||||
|
||||
Returns: |
||||
dict: info for model training |
||||
""" |
||||
coco = self.coco |
||||
img_id = self.img_ids[idx] |
||||
ann_ids = coco.getAnnIds(imgIds=img_id) |
||||
anno = coco.loadAnns(ann_ids) |
||||
|
||||
mask = self._get_mask(anno, idx) |
||||
anno = [ |
||||
obj for obj in anno |
||||
if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 |
||||
] |
||||
|
||||
joints, orgsize = self._get_joints(anno, idx) |
||||
|
||||
db_rec = {} |
||||
db_rec['im_id'] = img_id |
||||
db_rec['image_file'] = os.path.join(self.img_prefix, |
||||
self.id2name[img_id]) |
||||
db_rec['mask'] = mask |
||||
db_rec['joints'] = joints |
||||
db_rec['im_shape'] = orgsize |
||||
|
||||
return db_rec |
||||
|
||||
def _get_joints(self, anno, idx): |
||||
"""Get joints for all people in an image.""" |
||||
num_people = len(anno) |
||||
|
||||
joints = np.zeros( |
||||
(num_people, self.ann_info['num_joints'], 3), dtype=np.float32) |
||||
|
||||
for i, obj in enumerate(anno): |
||||
joints[i, :self.ann_info['num_joints'], :3] = \ |
||||
np.array(obj['keypoints']).reshape([-1, 3]) |
||||
|
||||
img_info = self.coco.loadImgs(self.img_ids[idx])[0] |
||||
joints[..., 0] /= img_info['width'] |
||||
joints[..., 1] /= img_info['height'] |
||||
orgsize = np.array([img_info['height'], img_info['width']]) |
||||
|
||||
return joints, orgsize |
||||
|
||||
def _get_mask(self, anno, idx): |
||||
"""Get ignore masks to mask out losses.""" |
||||
coco = self.coco |
||||
img_info = coco.loadImgs(self.img_ids[idx])[0] |
||||
|
||||
m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) |
||||
|
||||
for obj in anno: |
||||
if 'segmentation' in obj: |
||||
if obj['iscrowd']: |
||||
rle = pycocotools.mask.frPyObjects(obj['segmentation'], |
||||
img_info['height'], |
||||
img_info['width']) |
||||
m += pycocotools.mask.decode(rle) |
||||
elif obj['num_keypoints'] == 0: |
||||
rles = pycocotools.mask.frPyObjects(obj['segmentation'], |
||||
img_info['height'], |
||||
img_info['width']) |
||||
for rle in rles: |
||||
m += pycocotools.mask.decode(rle) |
||||
|
||||
return m < 0.5 |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset): |
||||
"""CrowdPose dataset for bottom-up pose estimation. |
||||
|
||||
The dataset loads raw features and apply specified transforms |
||||
to return a dict containing the image tensors and other information. |
||||
|
||||
CrowdPose keypoint indexes:: |
||||
|
||||
0: 'left_shoulder', |
||||
1: 'right_shoulder', |
||||
2: 'left_elbow', |
||||
3: 'right_elbow', |
||||
4: 'left_wrist', |
||||
5: 'right_wrist', |
||||
6: 'left_hip', |
||||
7: 'right_hip', |
||||
8: 'left_knee', |
||||
9: 'right_knee', |
||||
10: 'left_ankle', |
||||
11: 'right_ankle', |
||||
12: 'top_head', |
||||
13: 'neck' |
||||
|
||||
Args: |
||||
dataset_dir (str): Root path to the dataset. |
||||
anno_path (str): Relative path to the annotation file. |
||||
image_dir (str): Path to a directory where images are held. |
||||
Default: None. |
||||
num_joints (int): keypoint numbers |
||||
transform (composed(operators)): A sequence of data transforms. |
||||
shard (list): [rank, worldsize], the distributed env params |
||||
test_mode (bool): Store True when building test or |
||||
validation dataset. Default: False. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
num_joints, |
||||
transform=[], |
||||
shard=[0, 1], |
||||
test_mode=False): |
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints, |
||||
transform, shard, test_mode) |
||||
|
||||
self.ann_file = os.path.join(dataset_dir, anno_path) |
||||
self.shard = shard |
||||
self.test_mode = test_mode |
||||
|
||||
def parse_dataset(self): |
||||
self.coco = COCO(self.ann_file) |
||||
|
||||
self.img_ids = self.coco.getImgIds() |
||||
if not self.test_mode: |
||||
self.img_ids = [ |
||||
img_id for img_id in self.img_ids |
||||
if len(self.coco.getAnnIds( |
||||
imgIds=img_id, iscrowd=None)) > 0 |
||||
] |
||||
blocknum = int(len(self.img_ids) / self.shard[1]) |
||||
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * ( |
||||
self.shard[0] + 1))] |
||||
self.num_images = len(self.img_ids) |
||||
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) |
||||
|
||||
self.dataset_name = 'crowdpose' |
||||
print('=> num_images: {}'.format(self.num_images)) |
||||
|
||||
|
||||
@serializable |
||||
class KeypointTopDownBaseDataset(DetDataset): |
||||
"""Base class for top_down datasets. |
||||
|
||||
All datasets should subclass it. |
||||
All subclasses should overwrite: |
||||
Methods:`_get_db` |
||||
|
||||
Args: |
||||
dataset_dir (str): Root path to the dataset. |
||||
image_dir (str): Path to a directory where images are held. |
||||
anno_path (str): Relative path to the annotation file. |
||||
num_joints (int): keypoint numbers |
||||
transform (composed(operators)): A sequence of data transforms. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
num_joints, |
||||
transform=[]): |
||||
super().__init__(dataset_dir, image_dir, anno_path) |
||||
self.image_info = {} |
||||
self.ann_info = {} |
||||
|
||||
self.img_prefix = os.path.join(dataset_dir, image_dir) |
||||
self.transform = transform |
||||
|
||||
self.ann_info['num_joints'] = num_joints |
||||
self.db = [] |
||||
|
||||
def __len__(self): |
||||
"""Get dataset length.""" |
||||
return len(self.db) |
||||
|
||||
def _get_db(self): |
||||
"""Get a sample""" |
||||
raise NotImplementedError |
||||
|
||||
def __getitem__(self, idx): |
||||
"""Prepare sample for training given the index.""" |
||||
records = copy.deepcopy(self.db[idx]) |
||||
records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR | |
||||
cv2.IMREAD_IGNORE_ORIENTATION) |
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB) |
||||
records['score'] = records['score'] if 'score' in records else 1 |
||||
records = self.transform(records) |
||||
# print('records', records) |
||||
return records |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset): |
||||
"""COCO dataset for top-down pose estimation. |
||||
|
||||
The dataset loads raw features and apply specified transforms |
||||
to return a dict containing the image tensors and other information. |
||||
|
||||
COCO keypoint indexes: |
||||
|
||||
0: 'nose', |
||||
1: 'left_eye', |
||||
2: 'right_eye', |
||||
3: 'left_ear', |
||||
4: 'right_ear', |
||||
5: 'left_shoulder', |
||||
6: 'right_shoulder', |
||||
7: 'left_elbow', |
||||
8: 'right_elbow', |
||||
9: 'left_wrist', |
||||
10: 'right_wrist', |
||||
11: 'left_hip', |
||||
12: 'right_hip', |
||||
13: 'left_knee', |
||||
14: 'right_knee', |
||||
15: 'left_ankle', |
||||
16: 'right_ankle' |
||||
|
||||
Args: |
||||
dataset_dir (str): Root path to the dataset. |
||||
image_dir (str): Path to a directory where images are held. |
||||
anno_path (str): Relative path to the annotation file. |
||||
num_joints (int): Keypoint numbers |
||||
trainsize (list):[w, h] Image target size |
||||
transform (composed(operators)): A sequence of data transforms. |
||||
bbox_file (str): Path to a detection bbox file |
||||
Default: None. |
||||
use_gt_bbox (bool): Whether to use ground truth bbox |
||||
Default: True. |
||||
pixel_std (int): The pixel std of the scale |
||||
Default: 200. |
||||
image_thre (float): The threshold to filter the detection box |
||||
Default: 0.0. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
num_joints, |
||||
trainsize, |
||||
transform=[], |
||||
bbox_file=None, |
||||
use_gt_bbox=True, |
||||
pixel_std=200, |
||||
image_thre=0.0): |
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints, |
||||
transform) |
||||
|
||||
self.bbox_file = bbox_file |
||||
self.use_gt_bbox = use_gt_bbox |
||||
self.trainsize = trainsize |
||||
self.pixel_std = pixel_std |
||||
self.image_thre = image_thre |
||||
self.dataset_name = 'coco' |
||||
|
||||
def parse_dataset(self): |
||||
if self.use_gt_bbox: |
||||
self.db = self._load_coco_keypoint_annotations() |
||||
else: |
||||
self.db = self._load_coco_person_detection_results() |
||||
|
||||
def _load_coco_keypoint_annotations(self): |
||||
coco = COCO(self.get_anno()) |
||||
img_ids = coco.getImgIds() |
||||
gt_db = [] |
||||
for index in img_ids: |
||||
im_ann = coco.loadImgs(index)[0] |
||||
width = im_ann['width'] |
||||
height = im_ann['height'] |
||||
file_name = im_ann['file_name'] |
||||
im_id = int(im_ann["id"]) |
||||
|
||||
annIds = coco.getAnnIds(imgIds=index, iscrowd=False) |
||||
objs = coco.loadAnns(annIds) |
||||
|
||||
valid_objs = [] |
||||
for obj in objs: |
||||
x, y, w, h = obj['bbox'] |
||||
x1 = np.max((0, x)) |
||||
y1 = np.max((0, y)) |
||||
x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) |
||||
y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) |
||||
if obj['area'] > 0 and x2 >= x1 and y2 >= y1: |
||||
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] |
||||
valid_objs.append(obj) |
||||
objs = valid_objs |
||||
|
||||
rec = [] |
||||
for obj in objs: |
||||
if max(obj['keypoints']) == 0: |
||||
continue |
||||
|
||||
joints = np.zeros( |
||||
(self.ann_info['num_joints'], 3), dtype=np.float) |
||||
joints_vis = np.zeros( |
||||
(self.ann_info['num_joints'], 3), dtype=np.float) |
||||
for ipt in range(self.ann_info['num_joints']): |
||||
joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0] |
||||
joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1] |
||||
joints[ipt, 2] = 0 |
||||
t_vis = obj['keypoints'][ipt * 3 + 2] |
||||
if t_vis > 1: |
||||
t_vis = 1 |
||||
joints_vis[ipt, 0] = t_vis |
||||
joints_vis[ipt, 1] = t_vis |
||||
joints_vis[ipt, 2] = 0 |
||||
|
||||
center, scale = self._box2cs(obj['clean_bbox'][:4]) |
||||
rec.append({ |
||||
'image_file': os.path.join(self.img_prefix, file_name), |
||||
'center': center, |
||||
'scale': scale, |
||||
'joints': joints, |
||||
'joints_vis': joints_vis, |
||||
'im_id': im_id, |
||||
}) |
||||
gt_db.extend(rec) |
||||
|
||||
return gt_db |
||||
|
||||
def _box2cs(self, box): |
||||
x, y, w, h = box[:4] |
||||
center = np.zeros((2), dtype=np.float32) |
||||
center[0] = x + w * 0.5 |
||||
center[1] = y + h * 0.5 |
||||
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1] |
||||
|
||||
if w > aspect_ratio * h: |
||||
h = w * 1.0 / aspect_ratio |
||||
elif w < aspect_ratio * h: |
||||
w = h * aspect_ratio |
||||
scale = np.array( |
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], |
||||
dtype=np.float32) |
||||
if center[0] != -1: |
||||
scale = scale * 1.25 |
||||
|
||||
return center, scale |
||||
|
||||
def _load_coco_person_detection_results(self): |
||||
all_boxes = None |
||||
bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file) |
||||
with open(bbox_file_path, 'r') as f: |
||||
all_boxes = json.load(f) |
||||
|
||||
if not all_boxes: |
||||
print('=> Load %s fail!' % bbox_file_path) |
||||
return None |
||||
|
||||
kpt_db = [] |
||||
for n_img in range(0, len(all_boxes)): |
||||
det_res = all_boxes[n_img] |
||||
if det_res['category_id'] != 1: |
||||
continue |
||||
file_name = det_res[ |
||||
'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[ |
||||
'image_id'] |
||||
img_name = os.path.join(self.img_prefix, file_name) |
||||
box = det_res['bbox'] |
||||
score = det_res['score'] |
||||
im_id = int(det_res['image_id']) |
||||
|
||||
if score < self.image_thre: |
||||
continue |
||||
|
||||
center, scale = self._box2cs(box) |
||||
joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float) |
||||
joints_vis = np.ones( |
||||
(self.ann_info['num_joints'], 3), dtype=np.float) |
||||
kpt_db.append({ |
||||
'image_file': img_name, |
||||
'im_id': im_id, |
||||
'center': center, |
||||
'scale': scale, |
||||
'score': score, |
||||
'joints': joints, |
||||
'joints_vis': joints_vis, |
||||
}) |
||||
|
||||
return kpt_db |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset): |
||||
"""MPII dataset for topdown pose estimation. |
||||
|
||||
The dataset loads raw features and apply specified transforms |
||||
to return a dict containing the image tensors and other information. |
||||
|
||||
MPII keypoint indexes:: |
||||
|
||||
0: 'right_ankle', |
||||
1: 'right_knee', |
||||
2: 'right_hip', |
||||
3: 'left_hip', |
||||
4: 'left_knee', |
||||
5: 'left_ankle', |
||||
6: 'pelvis', |
||||
7: 'thorax', |
||||
8: 'upper_neck', |
||||
9: 'head_top', |
||||
10: 'right_wrist', |
||||
11: 'right_elbow', |
||||
12: 'right_shoulder', |
||||
13: 'left_shoulder', |
||||
14: 'left_elbow', |
||||
15: 'left_wrist', |
||||
|
||||
Args: |
||||
dataset_dir (str): Root path to the dataset. |
||||
image_dir (str): Path to a directory where images are held. |
||||
anno_path (str): Relative path to the annotation file. |
||||
num_joints (int): Keypoint numbers |
||||
trainsize (list):[w, h] Image target size |
||||
transform (composed(operators)): A sequence of data transforms. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir, |
||||
image_dir, |
||||
anno_path, |
||||
num_joints, |
||||
transform=[]): |
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints, |
||||
transform) |
||||
|
||||
self.dataset_name = 'mpii' |
||||
|
||||
def parse_dataset(self): |
||||
with open(self.get_anno()) as anno_file: |
||||
anno = json.load(anno_file) |
||||
|
||||
gt_db = [] |
||||
for a in anno: |
||||
image_name = a['image'] |
||||
im_id = a['image_id'] if 'image_id' in a else int( |
||||
os.path.splitext(image_name)[0]) |
||||
|
||||
c = np.array(a['center'], dtype=np.float) |
||||
s = np.array([a['scale'], a['scale']], dtype=np.float) |
||||
|
||||
# Adjust center/scale slightly to avoid cropping limbs |
||||
if c[0] != -1: |
||||
c[1] = c[1] + 15 * s[1] |
||||
s = s * 1.25 |
||||
c = c - 1 |
||||
|
||||
joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float) |
||||
joints_vis = np.zeros( |
||||
(self.ann_info['num_joints'], 3), dtype=np.float) |
||||
if 'joints' in a: |
||||
joints_ = np.array(a['joints']) |
||||
joints_[:, 0:2] = joints_[:, 0:2] - 1 |
||||
joints_vis_ = np.array(a['joints_vis']) |
||||
assert len(joints_) == self.ann_info[ |
||||
'num_joints'], 'joint num diff: {} vs {}'.format( |
||||
len(joints_), self.ann_info['num_joints']) |
||||
|
||||
joints[:, 0:2] = joints_[:, 0:2] |
||||
joints_vis[:, 0] = joints_vis_[:] |
||||
joints_vis[:, 1] = joints_vis_[:] |
||||
|
||||
gt_db.append({ |
||||
'image_file': os.path.join(self.img_prefix, image_name), |
||||
'im_id': im_id, |
||||
'center': c, |
||||
'scale': s, |
||||
'joints': joints, |
||||
'joints_vis': joints_vis |
||||
}) |
||||
print("number length: {}".format(len(gt_db))) |
||||
self.db = gt_db |
@ -0,0 +1,636 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import sys |
||||
import cv2 |
||||
import glob |
||||
import numpy as np |
||||
from collections import OrderedDict, defaultdict |
||||
try: |
||||
from collections.abc import Sequence |
||||
except Exception: |
||||
from collections import Sequence |
||||
from .dataset import DetDataset, _make_dataset, _is_valid_file |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class MOTDataSet(DetDataset): |
||||
""" |
||||
Load dataset with MOT format, only support single class MOT. |
||||
|
||||
Args: |
||||
dataset_dir (str): root directory for dataset. |
||||
image_lists (str|list): mot data image lists, muiti-source mot dataset. |
||||
data_fields (list): key name of data dictionary, at least have 'image'. |
||||
sample_num (int): number of samples to load, -1 means all. |
||||
|
||||
Notes: |
||||
MOT datasets root directory following this: |
||||
dataset/mot |
||||
|——————image_lists |
||||
| |——————caltech.train |
||||
| |——————caltech.val |
||||
| |——————mot16.train |
||||
| |——————mot17.train |
||||
| ...... |
||||
|——————Caltech |
||||
|——————MOT17 |
||||
|——————...... |
||||
|
||||
All the MOT datasets have the following structure: |
||||
Caltech |
||||
|——————images |
||||
| └——————00001.jpg |
||||
| |—————— ... |
||||
| └——————0000N.jpg |
||||
└——————labels_with_ids |
||||
└——————00001.txt |
||||
|—————— ... |
||||
└——————0000N.txt |
||||
or |
||||
|
||||
MOT17 |
||||
|——————images |
||||
| └——————train |
||||
| └——————test |
||||
└——————labels_with_ids |
||||
└——————train |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_lists=[], |
||||
data_fields=['image'], |
||||
sample_num=-1): |
||||
super(MOTDataSet, self).__init__( |
||||
dataset_dir=dataset_dir, |
||||
data_fields=data_fields, |
||||
sample_num=sample_num) |
||||
self.dataset_dir = dataset_dir |
||||
self.image_lists = image_lists |
||||
if isinstance(self.image_lists, str): |
||||
self.image_lists = [self.image_lists] |
||||
self.roidbs = None |
||||
self.cname2cid = None |
||||
|
||||
def get_anno(self): |
||||
if self.image_lists == []: |
||||
return |
||||
# only used to get categories and metric |
||||
# only check first data, but the label_list of all data should be same. |
||||
first_mot_data = self.image_lists[0].split('.')[0] |
||||
anno_file = os.path.join(self.dataset_dir, first_mot_data, |
||||
'label_list.txt') |
||||
return anno_file |
||||
|
||||
def parse_dataset(self): |
||||
self.img_files = OrderedDict() |
||||
self.img_start_index = OrderedDict() |
||||
self.label_files = OrderedDict() |
||||
self.tid_num = OrderedDict() |
||||
self.tid_start_index = OrderedDict() |
||||
|
||||
img_index = 0 |
||||
for data_name in self.image_lists: |
||||
# check every data image list |
||||
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists') |
||||
assert os.path.isdir(image_lists_dir), \ |
||||
"The {} is not a directory.".format(image_lists_dir) |
||||
|
||||
list_path = os.path.join(image_lists_dir, data_name) |
||||
assert os.path.exists(list_path), \ |
||||
"The list path {} does not exist.".format(list_path) |
||||
|
||||
# record img_files, filter out empty ones |
||||
with open(list_path, 'r') as file: |
||||
self.img_files[data_name] = file.readlines() |
||||
self.img_files[data_name] = [ |
||||
os.path.join(self.dataset_dir, x.strip()) |
||||
for x in self.img_files[data_name] |
||||
] |
||||
self.img_files[data_name] = list( |
||||
filter(lambda x: len(x) > 0, self.img_files[data_name])) |
||||
|
||||
self.img_start_index[data_name] = img_index |
||||
img_index += len(self.img_files[data_name]) |
||||
|
||||
# record label_files |
||||
self.label_files[data_name] = [ |
||||
x.replace('images', 'labels_with_ids').replace( |
||||
'.png', '.txt').replace('.jpg', '.txt') |
||||
for x in self.img_files[data_name] |
||||
] |
||||
|
||||
for data_name, label_paths in self.label_files.items(): |
||||
max_index = -1 |
||||
for lp in label_paths: |
||||
lb = np.loadtxt(lp) |
||||
if len(lb) < 1: |
||||
continue |
||||
if len(lb.shape) < 2: |
||||
img_max = lb[1] |
||||
else: |
||||
img_max = np.max(lb[:, 1]) |
||||
if img_max > max_index: |
||||
max_index = img_max |
||||
self.tid_num[data_name] = int(max_index + 1) |
||||
|
||||
last_index = 0 |
||||
for i, (k, v) in enumerate(self.tid_num.items()): |
||||
self.tid_start_index[k] = last_index |
||||
last_index += v |
||||
|
||||
self.num_identities_dict = defaultdict(int) |
||||
self.num_identities_dict[0] = int(last_index + 1) # single class |
||||
self.num_imgs_each_data = [len(x) for x in self.img_files.values()] |
||||
self.total_imgs = sum(self.num_imgs_each_data) |
||||
|
||||
logger.info('MOT dataset summary: ') |
||||
logger.info(self.tid_num) |
||||
logger.info('Total images: {}'.format(self.total_imgs)) |
||||
logger.info('Image start index: {}'.format(self.img_start_index)) |
||||
logger.info('Total identities: {}'.format(self.num_identities_dict[0])) |
||||
logger.info('Identity start index: {}'.format(self.tid_start_index)) |
||||
|
||||
records = [] |
||||
cname2cid = mot_label() |
||||
|
||||
for img_index in range(self.total_imgs): |
||||
for i, (k, v) in enumerate(self.img_start_index.items()): |
||||
if img_index >= v: |
||||
data_name = list(self.label_files.keys())[i] |
||||
start_index = v |
||||
img_file = self.img_files[data_name][img_index - start_index] |
||||
lbl_file = self.label_files[data_name][img_index - start_index] |
||||
|
||||
if not os.path.exists(img_file): |
||||
logger.warning( |
||||
'Illegal image file: {}, and it will be ignored'.format( |
||||
img_file)) |
||||
continue |
||||
if not os.path.isfile(lbl_file): |
||||
logger.warning( |
||||
'Illegal label file: {}, and it will be ignored'.format( |
||||
lbl_file)) |
||||
continue |
||||
|
||||
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6) |
||||
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h] |
||||
|
||||
cx, cy = labels[:, 2], labels[:, 3] |
||||
w, h = labels[:, 4], labels[:, 5] |
||||
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32') |
||||
gt_class = labels[:, 0:1].astype('int32') |
||||
gt_score = np.ones((len(labels), 1)).astype('float32') |
||||
gt_ide = labels[:, 1:2].astype('int32') |
||||
for i, _ in enumerate(gt_ide): |
||||
if gt_ide[i] > -1: |
||||
gt_ide[i] += self.tid_start_index[data_name] |
||||
|
||||
mot_rec = { |
||||
'im_file': img_file, |
||||
'im_id': img_index, |
||||
} if 'image' in self.data_fields else {} |
||||
|
||||
gt_rec = { |
||||
'gt_class': gt_class, |
||||
'gt_score': gt_score, |
||||
'gt_bbox': gt_bbox, |
||||
'gt_ide': gt_ide, |
||||
} |
||||
|
||||
for k, v in gt_rec.items(): |
||||
if k in self.data_fields: |
||||
mot_rec[k] = v |
||||
|
||||
records.append(mot_rec) |
||||
if self.sample_num > 0 and img_index >= self.sample_num: |
||||
break |
||||
assert len(records) > 0, 'not found any mot record in %s' % ( |
||||
self.image_lists) |
||||
self.roidbs, self.cname2cid = records, cname2cid |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class MCMOTDataSet(DetDataset): |
||||
""" |
||||
Load dataset with MOT format, support multi-class MOT. |
||||
|
||||
Args: |
||||
dataset_dir (str): root directory for dataset. |
||||
image_lists (list(str)): mcmot data image lists, muiti-source mcmot dataset. |
||||
data_fields (list): key name of data dictionary, at least have 'image'. |
||||
label_list (str): if use_default_label is False, will load |
||||
mapping between category and class index. |
||||
sample_num (int): number of samples to load, -1 means all. |
||||
|
||||
Notes: |
||||
MCMOT datasets root directory following this: |
||||
dataset/mot |
||||
|——————image_lists |
||||
| |——————visdrone_mcmot.train |
||||
| |——————visdrone_mcmot.val |
||||
visdrone_mcmot |
||||
|——————images |
||||
| └——————train |
||||
| └——————val |
||||
└——————labels_with_ids |
||||
└——————train |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_lists=[], |
||||
data_fields=['image'], |
||||
label_list=None, |
||||
sample_num=-1): |
||||
super(MCMOTDataSet, self).__init__( |
||||
dataset_dir=dataset_dir, |
||||
data_fields=data_fields, |
||||
sample_num=sample_num) |
||||
self.dataset_dir = dataset_dir |
||||
self.image_lists = image_lists |
||||
if isinstance(self.image_lists, str): |
||||
self.image_lists = [self.image_lists] |
||||
self.label_list = label_list |
||||
self.roidbs = None |
||||
self.cname2cid = None |
||||
|
||||
def get_anno(self): |
||||
if self.image_lists == []: |
||||
return |
||||
# only used to get categories and metric |
||||
# only check first data, but the label_list of all data should be same. |
||||
first_mot_data = self.image_lists[0].split('.')[0] |
||||
anno_file = os.path.join(self.dataset_dir, first_mot_data, |
||||
'label_list.txt') |
||||
return anno_file |
||||
|
||||
def parse_dataset(self): |
||||
self.img_files = OrderedDict() |
||||
self.img_start_index = OrderedDict() |
||||
self.label_files = OrderedDict() |
||||
self.tid_num = OrderedDict() |
||||
self.tid_start_idx_of_cls_ids = defaultdict(dict) # for MCMOT |
||||
|
||||
img_index = 0 |
||||
for data_name in self.image_lists: |
||||
# check every data image list |
||||
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists') |
||||
assert os.path.isdir(image_lists_dir), \ |
||||
"The {} is not a directory.".format(image_lists_dir) |
||||
|
||||
list_path = os.path.join(image_lists_dir, data_name) |
||||
assert os.path.exists(list_path), \ |
||||
"The list path {} does not exist.".format(list_path) |
||||
|
||||
# record img_files, filter out empty ones |
||||
with open(list_path, 'r') as file: |
||||
self.img_files[data_name] = file.readlines() |
||||
self.img_files[data_name] = [ |
||||
os.path.join(self.dataset_dir, x.strip()) |
||||
for x in self.img_files[data_name] |
||||
] |
||||
self.img_files[data_name] = list( |
||||
filter(lambda x: len(x) > 0, self.img_files[data_name])) |
||||
|
||||
self.img_start_index[data_name] = img_index |
||||
img_index += len(self.img_files[data_name]) |
||||
|
||||
# record label_files |
||||
self.label_files[data_name] = [ |
||||
x.replace('images', 'labels_with_ids').replace( |
||||
'.png', '.txt').replace('.jpg', '.txt') |
||||
for x in self.img_files[data_name] |
||||
] |
||||
|
||||
for data_name, label_paths in self.label_files.items(): |
||||
# using max_ids_dict rather than max_index |
||||
max_ids_dict = defaultdict(int) |
||||
for lp in label_paths: |
||||
lb = np.loadtxt(lp) |
||||
if len(lb) < 1: |
||||
continue |
||||
lb = lb.reshape(-1, 6) |
||||
for item in lb: |
||||
if item[1] > max_ids_dict[int(item[0])]: |
||||
# item[0]: cls_id |
||||
# item[1]: track id |
||||
max_ids_dict[int(item[0])] = int(item[1]) |
||||
# track id number |
||||
self.tid_num[data_name] = max_ids_dict |
||||
|
||||
last_idx_dict = defaultdict(int) |
||||
for i, (k, v) in enumerate(self.tid_num.items()): # each sub dataset |
||||
for cls_id, id_num in v.items(): # v is a max_ids_dict |
||||
self.tid_start_idx_of_cls_ids[k][cls_id] = last_idx_dict[ |
||||
cls_id] |
||||
last_idx_dict[cls_id] += id_num |
||||
|
||||
self.num_identities_dict = defaultdict(int) |
||||
for k, v in last_idx_dict.items(): |
||||
self.num_identities_dict[k] = int(v) # total ids of each category |
||||
|
||||
self.num_imgs_each_data = [len(x) for x in self.img_files.values()] |
||||
self.total_imgs = sum(self.num_imgs_each_data) |
||||
|
||||
# cname2cid and cid2cname |
||||
cname2cid = {} |
||||
if self.label_list is not None: |
||||
# if use label_list for multi source mix dataset, |
||||
# please make sure label_list in the first sub_dataset at least. |
||||
sub_dataset = self.image_lists[0].split('.')[0] |
||||
label_path = os.path.join(self.dataset_dir, sub_dataset, |
||||
self.label_list) |
||||
if not os.path.exists(label_path): |
||||
logger.info( |
||||
"Note: label_list {} does not exists, use VisDrone 10 classes labels as default.". |
||||
format(label_path)) |
||||
cname2cid = visdrone_mcmot_label() |
||||
else: |
||||
with open(label_path, 'r') as fr: |
||||
label_id = 0 |
||||
for line in fr.readlines(): |
||||
cname2cid[line.strip()] = label_id |
||||
label_id += 1 |
||||
else: |
||||
cname2cid = visdrone_mcmot_label() |
||||
|
||||
cid2cname = dict([(v, k) for (k, v) in cname2cid.items()]) |
||||
|
||||
logger.info('MCMOT dataset summary: ') |
||||
logger.info(self.tid_num) |
||||
logger.info('Total images: {}'.format(self.total_imgs)) |
||||
logger.info('Image start index: {}'.format(self.img_start_index)) |
||||
|
||||
logger.info('Total identities of each category: ') |
||||
num_identities_dict = sorted( |
||||
self.num_identities_dict.items(), key=lambda x: x[0]) |
||||
total_IDs_all_cats = 0 |
||||
for (k, v) in num_identities_dict: |
||||
logger.info('Category {} [{}] has {} IDs.'.format(k, cid2cname[k], |
||||
v)) |
||||
total_IDs_all_cats += v |
||||
logger.info('Total identities of all categories: {}'.format( |
||||
total_IDs_all_cats)) |
||||
|
||||
logger.info('Identity start index of each category: ') |
||||
for k, v in self.tid_start_idx_of_cls_ids.items(): |
||||
sorted_v = sorted(v.items(), key=lambda x: x[0]) |
||||
for (cls_id, start_idx) in sorted_v: |
||||
logger.info('Start index of dataset {} category {:d} is {:d}' |
||||
.format(k, cls_id, start_idx)) |
||||
|
||||
records = [] |
||||
for img_index in range(self.total_imgs): |
||||
for i, (k, v) in enumerate(self.img_start_index.items()): |
||||
if img_index >= v: |
||||
data_name = list(self.label_files.keys())[i] |
||||
start_index = v |
||||
img_file = self.img_files[data_name][img_index - start_index] |
||||
lbl_file = self.label_files[data_name][img_index - start_index] |
||||
|
||||
if not os.path.exists(img_file): |
||||
logger.warning( |
||||
'Illegal image file: {}, and it will be ignored'.format( |
||||
img_file)) |
||||
continue |
||||
if not os.path.isfile(lbl_file): |
||||
logger.warning( |
||||
'Illegal label file: {}, and it will be ignored'.format( |
||||
lbl_file)) |
||||
continue |
||||
|
||||
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6) |
||||
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h] |
||||
|
||||
cx, cy = labels[:, 2], labels[:, 3] |
||||
w, h = labels[:, 4], labels[:, 5] |
||||
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32') |
||||
gt_class = labels[:, 0:1].astype('int32') |
||||
gt_score = np.ones((len(labels), 1)).astype('float32') |
||||
gt_ide = labels[:, 1:2].astype('int32') |
||||
for i, _ in enumerate(gt_ide): |
||||
if gt_ide[i] > -1: |
||||
cls_id = int(gt_class[i]) |
||||
start_idx = self.tid_start_idx_of_cls_ids[data_name][ |
||||
cls_id] |
||||
gt_ide[i] += start_idx |
||||
|
||||
mot_rec = { |
||||
'im_file': img_file, |
||||
'im_id': img_index, |
||||
} if 'image' in self.data_fields else {} |
||||
|
||||
gt_rec = { |
||||
'gt_class': gt_class, |
||||
'gt_score': gt_score, |
||||
'gt_bbox': gt_bbox, |
||||
'gt_ide': gt_ide, |
||||
} |
||||
|
||||
for k, v in gt_rec.items(): |
||||
if k in self.data_fields: |
||||
mot_rec[k] = v |
||||
|
||||
records.append(mot_rec) |
||||
if self.sample_num > 0 and img_index >= self.sample_num: |
||||
break |
||||
assert len(records) > 0, 'not found any mot record in %s' % ( |
||||
self.image_lists) |
||||
self.roidbs, self.cname2cid = records, cname2cid |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class MOTImageFolder(DetDataset): |
||||
""" |
||||
Load MOT dataset with MOT format from image folder or video . |
||||
Args: |
||||
video_file (str): path of the video file, default ''. |
||||
frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set. |
||||
dataset_dir (str): root directory for dataset. |
||||
keep_ori_im (bool): whether to keep original image, default False. |
||||
Set True when used during MOT model inference while saving |
||||
images or video, or used in DeepSORT. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
video_file=None, |
||||
frame_rate=-1, |
||||
dataset_dir=None, |
||||
data_root=None, |
||||
image_dir=None, |
||||
sample_num=-1, |
||||
keep_ori_im=False, |
||||
**kwargs): |
||||
super(MOTImageFolder, self).__init__( |
||||
dataset_dir, image_dir, sample_num=sample_num) |
||||
self.video_file = video_file |
||||
self.data_root = data_root |
||||
self.keep_ori_im = keep_ori_im |
||||
self._imid2path = {} |
||||
self.roidbs = None |
||||
self.frame_rate = frame_rate |
||||
|
||||
def check_or_download_dataset(self): |
||||
return |
||||
|
||||
def parse_dataset(self, ): |
||||
if not self.roidbs: |
||||
if self.video_file is None: |
||||
self.frame_rate = 30 # set as default if infer image folder |
||||
self.roidbs = self._load_images() |
||||
else: |
||||
self.roidbs = self._load_video_images() |
||||
|
||||
def _load_video_images(self): |
||||
if self.frame_rate == -1: |
||||
# if frame_rate is not set for video, use cv2.VideoCapture |
||||
cap = cv2.VideoCapture(self.video_file) |
||||
self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS)) |
||||
|
||||
extension = self.video_file.split('.')[-1] |
||||
output_path = self.video_file.replace('.{}'.format(extension), '') |
||||
frames_path = video2frames(self.video_file, output_path, |
||||
self.frame_rate) |
||||
self.video_frames = sorted( |
||||
glob.glob(os.path.join(frames_path, '*.png'))) |
||||
|
||||
self.video_length = len(self.video_frames) |
||||
logger.info('Length of the video: {:d} frames.'.format( |
||||
self.video_length)) |
||||
ct = 0 |
||||
records = [] |
||||
for image in self.video_frames: |
||||
assert image != '' and os.path.isfile(image), \ |
||||
"Image {} not found".format(image) |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
rec = {'im_id': np.array([ct]), 'im_file': image} |
||||
if self.keep_ori_im: |
||||
rec.update({'keep_ori_im': 1}) |
||||
self._imid2path[ct] = image |
||||
ct += 1 |
||||
records.append(rec) |
||||
assert len(records) > 0, "No image file found" |
||||
return records |
||||
|
||||
def _find_images(self): |
||||
image_dir = self.image_dir |
||||
if not isinstance(image_dir, Sequence): |
||||
image_dir = [image_dir] |
||||
images = [] |
||||
for im_dir in image_dir: |
||||
if os.path.isdir(im_dir): |
||||
im_dir = os.path.join(self.dataset_dir, im_dir) |
||||
images.extend(_make_dataset(im_dir)) |
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir): |
||||
images.append(im_dir) |
||||
return images |
||||
|
||||
def _load_images(self): |
||||
images = self._find_images() |
||||
ct = 0 |
||||
records = [] |
||||
for image in images: |
||||
assert image != '' and os.path.isfile(image), \ |
||||
"Image {} not found".format(image) |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
rec = {'im_id': np.array([ct]), 'im_file': image} |
||||
if self.keep_ori_im: |
||||
rec.update({'keep_ori_im': 1}) |
||||
self._imid2path[ct] = image |
||||
ct += 1 |
||||
records.append(rec) |
||||
assert len(records) > 0, "No image file found" |
||||
return records |
||||
|
||||
def get_imid2path(self): |
||||
return self._imid2path |
||||
|
||||
def set_images(self, images): |
||||
self.image_dir = images |
||||
self.roidbs = self._load_images() |
||||
|
||||
def set_video(self, video_file, frame_rate): |
||||
# update video_file and frame_rate by command line of tools/infer_mot.py |
||||
self.video_file = video_file |
||||
self.frame_rate = frame_rate |
||||
assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \ |
||||
"wrong or unsupported file format: {}".format(self.video_file) |
||||
self.roidbs = self._load_video_images() |
||||
|
||||
|
||||
def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')): |
||||
return f.lower().endswith(extensions) |
||||
|
||||
|
||||
def video2frames(video_path, outpath, frame_rate, **kargs): |
||||
def _dict2str(kargs): |
||||
cmd_str = '' |
||||
for k, v in kargs.items(): |
||||
cmd_str += (' ' + str(k) + ' ' + str(v)) |
||||
return cmd_str |
||||
|
||||
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] |
||||
vid_name = os.path.basename(video_path).split('.')[0] |
||||
out_full_path = os.path.join(outpath, vid_name) |
||||
|
||||
if not os.path.exists(out_full_path): |
||||
os.makedirs(out_full_path) |
||||
|
||||
# video file name |
||||
outformat = os.path.join(out_full_path, '%08d.png') |
||||
|
||||
cmd = ffmpeg |
||||
cmd = ffmpeg + [ |
||||
' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat |
||||
] |
||||
cmd = ''.join(cmd) + _dict2str(kargs) |
||||
|
||||
if os.system(cmd) != 0: |
||||
raise RuntimeError('ffmpeg process video: {} error'.format(video_path)) |
||||
sys.exit(-1) |
||||
|
||||
sys.stdout.flush() |
||||
return out_full_path |
||||
|
||||
|
||||
def mot_label(): |
||||
labels_map = {'person': 0} |
||||
return labels_map |
||||
|
||||
|
||||
def visdrone_mcmot_label(): |
||||
labels_map = { |
||||
'pedestrian': 0, |
||||
'people': 1, |
||||
'bicycle': 2, |
||||
'car': 3, |
||||
'van': 4, |
||||
'truck': 5, |
||||
'tricycle': 6, |
||||
'awning-tricycle': 7, |
||||
'bus': 8, |
||||
'motor': 9, |
||||
} |
||||
return labels_map |
@ -0,0 +1,191 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import cv2 |
||||
import json |
||||
import copy |
||||
import numpy as np |
||||
|
||||
try: |
||||
from collections.abc import Sequence |
||||
except Exception: |
||||
from collections import Sequence |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddlers.models.ppdet.data.crop_utils.annotation_cropper import AnnoCropper |
||||
from .coco import COCODataSet |
||||
from .dataset import _make_dataset, _is_valid_file |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
|
||||
logger = setup_logger('sniper_coco_dataset') |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class SniperCOCODataSet(COCODataSet): |
||||
"""SniperCOCODataSet""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_dir=None, |
||||
anno_path=None, |
||||
proposals_file=None, |
||||
data_fields=['image'], |
||||
sample_num=-1, |
||||
load_crowd=False, |
||||
allow_empty=True, |
||||
empty_ratio=1., |
||||
is_trainset=True, |
||||
image_target_sizes=[2000, 1000], |
||||
valid_box_ratio_ranges=[[-1, 0.1], [0.08, -1]], |
||||
chip_target_size=500, |
||||
chip_target_stride=200, |
||||
use_neg_chip=False, |
||||
max_neg_num_per_im=8, |
||||
max_per_img=-1, |
||||
nms_thresh=0.5): |
||||
super(SniperCOCODataSet, self).__init__( |
||||
dataset_dir=dataset_dir, |
||||
image_dir=image_dir, |
||||
anno_path=anno_path, |
||||
data_fields=data_fields, |
||||
sample_num=sample_num, |
||||
load_crowd=load_crowd, |
||||
allow_empty=allow_empty, |
||||
empty_ratio=empty_ratio) |
||||
self.proposals_file = proposals_file |
||||
self.proposals = None |
||||
self.anno_cropper = None |
||||
self.is_trainset = is_trainset |
||||
self.image_target_sizes = image_target_sizes |
||||
self.valid_box_ratio_ranges = valid_box_ratio_ranges |
||||
self.chip_target_size = chip_target_size |
||||
self.chip_target_stride = chip_target_stride |
||||
self.use_neg_chip = use_neg_chip |
||||
self.max_neg_num_per_im = max_neg_num_per_im |
||||
self.max_per_img = max_per_img |
||||
self.nms_thresh = nms_thresh |
||||
|
||||
def parse_dataset(self): |
||||
if not hasattr(self, "roidbs"): |
||||
super(SniperCOCODataSet, self).parse_dataset() |
||||
if self.is_trainset: |
||||
self._parse_proposals() |
||||
self._merge_anno_proposals() |
||||
self.ori_roidbs = copy.deepcopy(self.roidbs) |
||||
self.init_anno_cropper() |
||||
self.roidbs = self.generate_chips_roidbs(self.roidbs, self.is_trainset) |
||||
|
||||
def set_proposals_file(self, file_path): |
||||
self.proposals_file = file_path |
||||
|
||||
def init_anno_cropper(self): |
||||
logger.info("Init AnnoCropper...") |
||||
self.anno_cropper = AnnoCropper( |
||||
image_target_sizes=self.image_target_sizes, |
||||
valid_box_ratio_ranges=self.valid_box_ratio_ranges, |
||||
chip_target_size=self.chip_target_size, |
||||
chip_target_stride=self.chip_target_stride, |
||||
use_neg_chip=self.use_neg_chip, |
||||
max_neg_num_per_im=self.max_neg_num_per_im, |
||||
max_per_img=self.max_per_img, |
||||
nms_thresh=self.nms_thresh) |
||||
|
||||
def generate_chips_roidbs(self, roidbs, is_trainset): |
||||
if is_trainset: |
||||
roidbs = self.anno_cropper.crop_anno_records(roidbs) |
||||
else: |
||||
roidbs = self.anno_cropper.crop_infer_anno_records(roidbs) |
||||
return roidbs |
||||
|
||||
def _parse_proposals(self): |
||||
if self.proposals_file: |
||||
self.proposals = {} |
||||
logger.info("Parse proposals file:{}".format(self.proposals_file)) |
||||
with open(self.proposals_file, 'r') as f: |
||||
proposals = json.load(f) |
||||
for prop in proposals: |
||||
image_id = prop["image_id"] |
||||
if image_id not in self.proposals: |
||||
self.proposals[image_id] = [] |
||||
x, y, w, h = prop["bbox"] |
||||
self.proposals[image_id].append([x, y, x + w, y + h]) |
||||
|
||||
def _merge_anno_proposals(self): |
||||
assert self.roidbs |
||||
if self.proposals and len(self.proposals.keys()) > 0: |
||||
logger.info("merge proposals to annos") |
||||
for id, record in enumerate(self.roidbs): |
||||
image_id = int(record["im_id"]) |
||||
if image_id not in self.proposals.keys(): |
||||
logger.info("image id :{} no proposals".format(image_id)) |
||||
record["proposals"] = np.array( |
||||
self.proposals.get(image_id, []), dtype=np.float32) |
||||
self.roidbs[id] = record |
||||
|
||||
def get_ori_roidbs(self): |
||||
if not hasattr(self, "ori_roidbs"): |
||||
return None |
||||
return self.ori_roidbs |
||||
|
||||
def get_roidbs(self): |
||||
if not hasattr(self, "roidbs"): |
||||
self.parse_dataset() |
||||
return self.roidbs |
||||
|
||||
def set_roidbs(self, roidbs): |
||||
self.roidbs = roidbs |
||||
|
||||
def check_or_download_dataset(self): |
||||
return |
||||
|
||||
def _parse(self): |
||||
image_dir = self.image_dir |
||||
if not isinstance(image_dir, Sequence): |
||||
image_dir = [image_dir] |
||||
images = [] |
||||
for im_dir in image_dir: |
||||
if os.path.isdir(im_dir): |
||||
im_dir = os.path.join(self.dataset_dir, im_dir) |
||||
images.extend(_make_dataset(im_dir)) |
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir): |
||||
images.append(im_dir) |
||||
return images |
||||
|
||||
def _load_images(self): |
||||
images = self._parse() |
||||
ct = 0 |
||||
records = [] |
||||
for image in images: |
||||
assert image != '' and os.path.isfile(image), \ |
||||
"Image {} not found".format(image) |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
im = cv2.imread(image) |
||||
h, w, c = im.shape |
||||
rec = {'im_id': np.array([ct]), 'im_file': image, "h": h, "w": w} |
||||
self._imid2path[ct] = image |
||||
ct += 1 |
||||
records.append(rec) |
||||
assert len(records) > 0, "No image file found" |
||||
return records |
||||
|
||||
def get_imid2path(self): |
||||
return self._imid2path |
||||
|
||||
def set_images(self, images): |
||||
self._imid2path = {} |
||||
self.image_dir = images |
||||
self.roidbs = self._load_images() |
@ -0,0 +1,231 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import numpy as np |
||||
|
||||
import xml.etree.ElementTree as ET |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
|
||||
from .dataset import DetDataset |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class VOCDataSet(DetDataset): |
||||
""" |
||||
Load dataset with PascalVOC format. |
||||
|
||||
Notes: |
||||
`anno_path` must contains xml file and image file path for annotations. |
||||
|
||||
Args: |
||||
dataset_dir (str): root directory for dataset. |
||||
image_dir (str): directory for images. |
||||
anno_path (str): voc annotation file path. |
||||
data_fields (list): key name of data dictionary, at least have 'image'. |
||||
sample_num (int): number of samples to load, -1 means all. |
||||
label_list (str): if use_default_label is False, will load |
||||
mapping between category and class index. |
||||
allow_empty (bool): whether to load empty entry. False as default |
||||
empty_ratio (float): the ratio of empty record number to total |
||||
record's, if empty_ratio is out of [0. ,1.), do not sample the |
||||
records and use all the empty entries. 1. as default |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_dir=None, |
||||
anno_path=None, |
||||
data_fields=['image'], |
||||
sample_num=-1, |
||||
label_list=None, |
||||
allow_empty=False, |
||||
empty_ratio=1.): |
||||
super(VOCDataSet, self).__init__( |
||||
dataset_dir=dataset_dir, |
||||
image_dir=image_dir, |
||||
anno_path=anno_path, |
||||
data_fields=data_fields, |
||||
sample_num=sample_num) |
||||
self.label_list = label_list |
||||
self.allow_empty = allow_empty |
||||
self.empty_ratio = empty_ratio |
||||
|
||||
def _sample_empty(self, records, num): |
||||
# if empty_ratio is out of [0. ,1.), do not sample the records |
||||
if self.empty_ratio < 0. or self.empty_ratio >= 1.: |
||||
return records |
||||
import random |
||||
sample_num = min( |
||||
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records)) |
||||
records = random.sample(records, sample_num) |
||||
return records |
||||
|
||||
def parse_dataset(self, ): |
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path) |
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir) |
||||
|
||||
# mapping category name to class id |
||||
# first_class:0, second_class:1, ... |
||||
records = [] |
||||
empty_records = [] |
||||
ct = 0 |
||||
cname2cid = {} |
||||
if self.label_list: |
||||
label_path = os.path.join(self.dataset_dir, self.label_list) |
||||
if not os.path.exists(label_path): |
||||
raise ValueError("label_list {} does not exists".format( |
||||
label_path)) |
||||
with open(label_path, 'r') as fr: |
||||
label_id = 0 |
||||
for line in fr.readlines(): |
||||
cname2cid[line.strip()] = label_id |
||||
label_id += 1 |
||||
else: |
||||
cname2cid = pascalvoc_label() |
||||
|
||||
with open(anno_path, 'r') as fr: |
||||
while True: |
||||
line = fr.readline() |
||||
if not line: |
||||
break |
||||
img_file, xml_file = [os.path.join(image_dir, x) \ |
||||
for x in line.strip().split()[:2]] |
||||
if not os.path.exists(img_file): |
||||
logger.warning( |
||||
'Illegal image file: {}, and it will be ignored'. |
||||
format(img_file)) |
||||
continue |
||||
if not os.path.isfile(xml_file): |
||||
logger.warning( |
||||
'Illegal xml file: {}, and it will be ignored'.format( |
||||
xml_file)) |
||||
continue |
||||
tree = ET.parse(xml_file) |
||||
if tree.find('id') is None: |
||||
im_id = np.array([ct]) |
||||
else: |
||||
im_id = np.array([int(tree.find('id').text)]) |
||||
|
||||
objs = tree.findall('object') |
||||
im_w = float(tree.find('size').find('width').text) |
||||
im_h = float(tree.find('size').find('height').text) |
||||
if im_w < 0 or im_h < 0: |
||||
logger.warning( |
||||
'Illegal width: {} or height: {} in annotation, ' |
||||
'and {} will be ignored'.format(im_w, im_h, xml_file)) |
||||
continue |
||||
|
||||
num_bbox, i = len(objs), 0 |
||||
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) |
||||
gt_class = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
gt_score = np.zeros((num_bbox, 1), dtype=np.float32) |
||||
difficult = np.zeros((num_bbox, 1), dtype=np.int32) |
||||
for obj in objs: |
||||
cname = obj.find('name').text |
||||
|
||||
# user dataset may not contain difficult field |
||||
_difficult = obj.find('difficult') |
||||
_difficult = int( |
||||
_difficult.text) if _difficult is not None else 0 |
||||
|
||||
x1 = float(obj.find('bndbox').find('xmin').text) |
||||
y1 = float(obj.find('bndbox').find('ymin').text) |
||||
x2 = float(obj.find('bndbox').find('xmax').text) |
||||
y2 = float(obj.find('bndbox').find('ymax').text) |
||||
x1 = max(0, x1) |
||||
y1 = max(0, y1) |
||||
x2 = min(im_w - 1, x2) |
||||
y2 = min(im_h - 1, y2) |
||||
if x2 > x1 and y2 > y1: |
||||
gt_bbox[i, :] = [x1, y1, x2, y2] |
||||
gt_class[i, 0] = cname2cid[cname] |
||||
gt_score[i, 0] = 1. |
||||
difficult[i, 0] = _difficult |
||||
i += 1 |
||||
else: |
||||
logger.warning( |
||||
'Found an invalid bbox in annotations: xml_file: {}' |
||||
', x1: {}, y1: {}, x2: {}, y2: {}.'.format( |
||||
xml_file, x1, y1, x2, y2)) |
||||
gt_bbox = gt_bbox[:i, :] |
||||
gt_class = gt_class[:i, :] |
||||
gt_score = gt_score[:i, :] |
||||
difficult = difficult[:i, :] |
||||
|
||||
voc_rec = { |
||||
'im_file': img_file, |
||||
'im_id': im_id, |
||||
'h': im_h, |
||||
'w': im_w |
||||
} if 'image' in self.data_fields else {} |
||||
|
||||
gt_rec = { |
||||
'gt_class': gt_class, |
||||
'gt_score': gt_score, |
||||
'gt_bbox': gt_bbox, |
||||
'difficult': difficult |
||||
} |
||||
for k, v in gt_rec.items(): |
||||
if k in self.data_fields: |
||||
voc_rec[k] = v |
||||
|
||||
if len(objs) == 0: |
||||
empty_records.append(voc_rec) |
||||
else: |
||||
records.append(voc_rec) |
||||
|
||||
ct += 1 |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
assert ct > 0, 'not found any voc record in %s' % (self.anno_path) |
||||
logger.debug('{} samples in file {}'.format(ct, anno_path)) |
||||
if self.allow_empty and len(empty_records) > 0: |
||||
empty_records = self._sample_empty(empty_records, len(records)) |
||||
records += empty_records |
||||
self.roidbs, self.cname2cid = records, cname2cid |
||||
|
||||
def get_label_list(self): |
||||
return os.path.join(self.dataset_dir, self.label_list) |
||||
|
||||
|
||||
def pascalvoc_label(): |
||||
labels_map = { |
||||
'aeroplane': 0, |
||||
'bicycle': 1, |
||||
'bird': 2, |
||||
'boat': 3, |
||||
'bottle': 4, |
||||
'bus': 5, |
||||
'car': 6, |
||||
'cat': 7, |
||||
'chair': 8, |
||||
'cow': 9, |
||||
'diningtable': 10, |
||||
'dog': 11, |
||||
'horse': 12, |
||||
'motorbike': 13, |
||||
'person': 14, |
||||
'pottedplant': 15, |
||||
'sheep': 16, |
||||
'sofa': 17, |
||||
'train': 18, |
||||
'tvmonitor': 19 |
||||
} |
||||
return labels_map |
@ -0,0 +1,180 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import numpy as np |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from .dataset import DetDataset |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class WIDERFaceDataSet(DetDataset): |
||||
""" |
||||
Load WiderFace records with 'anno_path' |
||||
|
||||
Args: |
||||
dataset_dir (str): root directory for dataset. |
||||
image_dir (str): directory for images. |
||||
anno_path (str): WiderFace annotation data. |
||||
data_fields (list): key name of data dictionary, at least have 'image'. |
||||
sample_num (int): number of samples to load, -1 means all. |
||||
with_lmk (bool): whether to load face landmark keypoint labels. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
dataset_dir=None, |
||||
image_dir=None, |
||||
anno_path=None, |
||||
data_fields=['image'], |
||||
sample_num=-1, |
||||
with_lmk=False): |
||||
super(WIDERFaceDataSet, self).__init__( |
||||
dataset_dir=dataset_dir, |
||||
image_dir=image_dir, |
||||
anno_path=anno_path, |
||||
data_fields=data_fields, |
||||
sample_num=sample_num, |
||||
with_lmk=with_lmk) |
||||
self.anno_path = anno_path |
||||
self.sample_num = sample_num |
||||
self.roidbs = None |
||||
self.cname2cid = None |
||||
self.with_lmk = with_lmk |
||||
|
||||
def parse_dataset(self): |
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path) |
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir) |
||||
|
||||
txt_file = anno_path |
||||
|
||||
records = [] |
||||
ct = 0 |
||||
file_lists = self._load_file_list(txt_file) |
||||
cname2cid = widerface_label() |
||||
|
||||
for item in file_lists: |
||||
im_fname = item[0] |
||||
im_id = np.array([ct]) |
||||
gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32) |
||||
gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32) |
||||
gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32) |
||||
lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32) |
||||
for index_box in range(len(item)): |
||||
if index_box < 1: |
||||
continue |
||||
gt_bbox[index_box - 1] = item[index_box][0] |
||||
if self.with_lmk: |
||||
gt_lmk_labels[index_box - 1] = item[index_box][1] |
||||
lmk_ignore_flag[index_box - 1] = item[index_box][2] |
||||
im_fname = os.path.join(image_dir, |
||||
im_fname) if image_dir else im_fname |
||||
widerface_rec = { |
||||
'im_file': im_fname, |
||||
'im_id': im_id, |
||||
} if 'image' in self.data_fields else {} |
||||
gt_rec = { |
||||
'gt_bbox': gt_bbox, |
||||
'gt_class': gt_class, |
||||
} |
||||
for k, v in gt_rec.items(): |
||||
if k in self.data_fields: |
||||
widerface_rec[k] = v |
||||
if self.with_lmk: |
||||
widerface_rec['gt_keypoint'] = gt_lmk_labels |
||||
widerface_rec['keypoint_ignore'] = lmk_ignore_flag |
||||
|
||||
if len(item) != 0: |
||||
records.append(widerface_rec) |
||||
|
||||
ct += 1 |
||||
if self.sample_num > 0 and ct >= self.sample_num: |
||||
break |
||||
assert len(records) > 0, 'not found any widerface in %s' % (anno_path) |
||||
logger.debug('{} samples in file {}'.format(ct, anno_path)) |
||||
self.roidbs, self.cname2cid = records, cname2cid |
||||
|
||||
def _load_file_list(self, input_txt): |
||||
with open(input_txt, 'r') as f_dir: |
||||
lines_input_txt = f_dir.readlines() |
||||
|
||||
file_dict = {} |
||||
num_class = 0 |
||||
exts = ['jpg', 'jpeg', 'png', 'bmp'] |
||||
exts += [ext.upper() for ext in exts] |
||||
for i in range(len(lines_input_txt)): |
||||
line_txt = lines_input_txt[i].strip('\n\t\r') |
||||
split_str = line_txt.split(' ') |
||||
if len(split_str) == 1: |
||||
img_file_name = os.path.split(split_str[0])[1] |
||||
split_txt = img_file_name.split('.') |
||||
if len(split_txt) < 2: |
||||
continue |
||||
elif split_txt[-1] in exts: |
||||
if i != 0: |
||||
num_class += 1 |
||||
file_dict[num_class] = [line_txt] |
||||
else: |
||||
if len(line_txt) <= 6: |
||||
continue |
||||
result_boxs = [] |
||||
xmin = float(split_str[0]) |
||||
ymin = float(split_str[1]) |
||||
w = float(split_str[2]) |
||||
h = float(split_str[3]) |
||||
# Filter out wrong labels |
||||
if w < 0 or h < 0: |
||||
logger.warning('Illegal box with w: {}, h: {} in ' |
||||
'img: {}, and it will be ignored'.format( |
||||
w, h, file_dict[num_class][0])) |
||||
continue |
||||
xmin = max(0, xmin) |
||||
ymin = max(0, ymin) |
||||
xmax = xmin + w |
||||
ymax = ymin + h |
||||
gt_bbox = [xmin, ymin, xmax, ymax] |
||||
result_boxs.append(gt_bbox) |
||||
if self.with_lmk: |
||||
assert len(split_str) > 18, 'When `with_lmk=True`, the number' \ |
||||
'of characters per line in the annotation file should' \ |
||||
'exceed 18.' |
||||
lmk0_x = float(split_str[5]) |
||||
lmk0_y = float(split_str[6]) |
||||
lmk1_x = float(split_str[8]) |
||||
lmk1_y = float(split_str[9]) |
||||
lmk2_x = float(split_str[11]) |
||||
lmk2_y = float(split_str[12]) |
||||
lmk3_x = float(split_str[14]) |
||||
lmk3_y = float(split_str[15]) |
||||
lmk4_x = float(split_str[17]) |
||||
lmk4_y = float(split_str[18]) |
||||
lmk_ignore_flag = 0 if lmk0_x == -1 else 1 |
||||
gt_lmk_label = [ |
||||
lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x, |
||||
lmk3_y, lmk4_x, lmk4_y |
||||
] |
||||
result_boxs.append(gt_lmk_label) |
||||
result_boxs.append(lmk_ignore_flag) |
||||
file_dict[num_class].append(result_boxs) |
||||
|
||||
return list(file_dict.values()) |
||||
|
||||
|
||||
def widerface_label(): |
||||
labels_map = {'face': 0} |
||||
return labels_map |
@ -0,0 +1,28 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import operators |
||||
from . import batch_operators |
||||
from . import keypoint_operators |
||||
from . import mot_operators |
||||
|
||||
from .operators import * |
||||
from .batch_operators import * |
||||
from .keypoint_operators import * |
||||
from .mot_operators import * |
||||
|
||||
__all__ = [] |
||||
__all__ += registered_ops |
||||
__all__ += keypoint_operators.__all__ |
||||
__all__ += mot_operators.__all__ |
@ -0,0 +1,270 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# The code is based on: |
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import numpy as np |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6): |
||||
"""Calculate overlap between two set of bboxes. |
||||
If ``is_aligned `` is ``False``, then calculate the overlaps between each |
||||
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned |
||||
pair of bboxes1 and bboxes2. |
||||
Args: |
||||
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty. |
||||
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty. |
||||
B indicates the batch dim, in shape (B1, B2, ..., Bn). |
||||
If ``is_aligned `` is ``True``, then m and n must be equal. |
||||
mode (str): "iou" (intersection over union) or "iof" (intersection over |
||||
foreground). |
||||
is_aligned (bool, optional): If True, then m and n must be equal. |
||||
Default False. |
||||
eps (float, optional): A value added to the denominator for numerical |
||||
stability. Default 1e-6. |
||||
Returns: |
||||
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,) |
||||
""" |
||||
assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode) |
||||
# Either the boxes are empty or the length of boxes's last dimenstion is 4 |
||||
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0) |
||||
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0) |
||||
|
||||
# Batch dim must be the same |
||||
# Batch dim: (B1, B2, ... Bn) |
||||
assert bboxes1.shape[:-2] == bboxes2.shape[:-2] |
||||
batch_shape = bboxes1.shape[:-2] |
||||
|
||||
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0 |
||||
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0 |
||||
if is_aligned: |
||||
assert rows == cols |
||||
|
||||
if rows * cols == 0: |
||||
if is_aligned: |
||||
return np.random.random(batch_shape + (rows, )) |
||||
else: |
||||
return np.random.random(batch_shape + (rows, cols)) |
||||
|
||||
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * ( |
||||
bboxes1[..., 3] - bboxes1[..., 1]) |
||||
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * ( |
||||
bboxes2[..., 3] - bboxes2[..., 1]) |
||||
|
||||
if is_aligned: |
||||
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2] |
||||
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2] |
||||
|
||||
wh = (rb - lt).clip(min=0) # [B, rows, 2] |
||||
overlap = wh[..., 0] * wh[..., 1] |
||||
|
||||
if mode in ['iou', 'giou']: |
||||
union = area1 + area2 - overlap |
||||
else: |
||||
union = area1 |
||||
if mode == 'giou': |
||||
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2]) |
||||
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:]) |
||||
else: |
||||
lt = np.maximum(bboxes1[..., :, None, :2], |
||||
bboxes2[..., None, :, :2]) # [B, rows, cols, 2] |
||||
rb = np.minimum(bboxes1[..., :, None, 2:], |
||||
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2] |
||||
|
||||
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2] |
||||
overlap = wh[..., 0] * wh[..., 1] |
||||
|
||||
if mode in ['iou', 'giou']: |
||||
union = area1[..., None] + area2[..., None, :] - overlap |
||||
else: |
||||
union = area1[..., None] |
||||
if mode == 'giou': |
||||
enclosed_lt = np.minimum(bboxes1[..., :, None, :2], |
||||
bboxes2[..., None, :, :2]) |
||||
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:], |
||||
bboxes2[..., None, :, 2:]) |
||||
|
||||
eps = np.array([eps]) |
||||
union = np.maximum(union, eps) |
||||
ious = overlap / union |
||||
if mode in ['iou', 'iof']: |
||||
return ious |
||||
# calculate gious |
||||
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0) |
||||
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] |
||||
enclose_area = np.maximum(enclose_area, eps) |
||||
gious = ious - (enclose_area - union) / enclose_area |
||||
return gious |
||||
|
||||
|
||||
def topk_(input, k, axis=1, largest=True): |
||||
x = -input if largest else input |
||||
if axis == 0: |
||||
row_index = np.arange(input.shape[1 - axis]) |
||||
topk_index = np.argpartition(x, k, axis=axis)[0:k, :] |
||||
topk_data = x[topk_index, row_index] |
||||
|
||||
topk_index_sort = np.argsort(topk_data, axis=axis) |
||||
topk_data_sort = topk_data[topk_index_sort, row_index] |
||||
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index] |
||||
else: |
||||
column_index = np.arange(x.shape[1 - axis])[:, None] |
||||
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k] |
||||
topk_data = x[column_index, topk_index] |
||||
topk_data = -topk_data if largest else topk_data |
||||
topk_index_sort = np.argsort(topk_data, axis=axis) |
||||
topk_data_sort = topk_data[column_index, topk_index_sort] |
||||
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort] |
||||
|
||||
return topk_data_sort, topk_index_sort |
||||
|
||||
|
||||
class ATSSAssigner(object): |
||||
"""Assign a corresponding gt bbox or background to each bbox. |
||||
|
||||
Each proposals will be assigned with `0` or a positive integer |
||||
indicating the ground truth index. |
||||
|
||||
- 0: negative sample, no assigned gt |
||||
- positive integer: positive sample, index (1-based) of assigned gt |
||||
|
||||
Args: |
||||
topk (float): number of bbox selected in each level |
||||
""" |
||||
|
||||
def __init__(self, topk=9): |
||||
self.topk = topk |
||||
|
||||
def __call__(self, |
||||
bboxes, |
||||
num_level_bboxes, |
||||
gt_bboxes, |
||||
gt_bboxes_ignore=None, |
||||
gt_labels=None): |
||||
"""Assign gt to bboxes. |
||||
The assignment is done in following steps |
||||
1. compute iou between all bbox (bbox of all pyramid levels) and gt |
||||
2. compute center distance between all bbox and gt |
||||
3. on each pyramid level, for each gt, select k bbox whose center |
||||
are closest to the gt center, so we total select k*l bbox as |
||||
candidates for each gt |
||||
4. get corresponding iou for the these candidates, and compute the |
||||
mean and std, set mean + std as the iou threshold |
||||
5. select these candidates whose iou are greater than or equal to |
||||
the threshold as postive |
||||
6. limit the positive sample's center in gt |
||||
Args: |
||||
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4). |
||||
num_level_bboxes (List): num of bboxes in each level |
||||
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4). |
||||
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are |
||||
labelled as `ignored`, e.g., crowd boxes in COCO. |
||||
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ). |
||||
""" |
||||
bboxes = bboxes[:, :4] |
||||
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0] |
||||
|
||||
# assign 0 by default |
||||
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64) |
||||
|
||||
if num_gt == 0 or num_bboxes == 0: |
||||
# No ground truth or boxes, return empty assignment |
||||
max_overlaps = np.zeros((num_bboxes, )) |
||||
if num_gt == 0: |
||||
# No truth, assign everything to background |
||||
assigned_gt_inds[:] = 0 |
||||
if not np.any(gt_labels): |
||||
assigned_labels = None |
||||
else: |
||||
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64) |
||||
return assigned_gt_inds, max_overlaps |
||||
|
||||
# compute iou between all bbox and gt |
||||
overlaps = bbox_overlaps(bboxes, gt_bboxes) |
||||
# compute center distance between all bbox and gt |
||||
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 |
||||
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 |
||||
gt_points = np.stack((gt_cx, gt_cy), axis=1) |
||||
|
||||
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0 |
||||
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0 |
||||
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1) |
||||
|
||||
distances = np.sqrt( |
||||
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2) |
||||
.sum(-1)) |
||||
|
||||
# Selecting candidates based on the center distance |
||||
candidate_idxs = [] |
||||
start_idx = 0 |
||||
for bboxes_per_level in num_level_bboxes: |
||||
# on each pyramid level, for each gt, |
||||
# select k bbox whose center are closest to the gt center |
||||
end_idx = start_idx + bboxes_per_level |
||||
distances_per_level = distances[start_idx:end_idx, :] |
||||
selectable_k = min(self.topk, bboxes_per_level) |
||||
_, topk_idxs_per_level = topk_( |
||||
distances_per_level, selectable_k, axis=0, largest=False) |
||||
candidate_idxs.append(topk_idxs_per_level + start_idx) |
||||
start_idx = end_idx |
||||
candidate_idxs = np.concatenate(candidate_idxs, axis=0) |
||||
|
||||
# get corresponding iou for the these candidates, and compute the |
||||
# mean and std, set mean + std as the iou threshold |
||||
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)] |
||||
overlaps_mean_per_gt = candidate_overlaps.mean(0) |
||||
overlaps_std_per_gt = candidate_overlaps.std(0) |
||||
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt |
||||
|
||||
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :] |
||||
|
||||
# limit the positive sample's center in gt |
||||
for gt_idx in range(num_gt): |
||||
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes |
||||
ep_bboxes_cx = np.broadcast_to( |
||||
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1) |
||||
ep_bboxes_cy = np.broadcast_to( |
||||
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1) |
||||
candidate_idxs = candidate_idxs.reshape(-1) |
||||
|
||||
# calculate the left, top, right, bottom distance between positive |
||||
# bbox center and gt side |
||||
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0] |
||||
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1] |
||||
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) |
||||
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) |
||||
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01 |
||||
is_pos = is_pos & is_in_gts |
||||
|
||||
# if an anchor box is assigned to multiple gts, |
||||
# the one with the highest IoU will be selected. |
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1) |
||||
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)] |
||||
overlaps_inf[index] = overlaps.T.reshape(-1)[index] |
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T |
||||
|
||||
max_overlaps = overlaps_inf.max(axis=1) |
||||
argmax_overlaps = overlaps_inf.argmax(axis=1) |
||||
assigned_gt_inds[max_overlaps != |
||||
-np.inf] = argmax_overlaps[max_overlaps != |
||||
-np.inf] + 1 |
||||
|
||||
return assigned_gt_inds, max_overlaps |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,86 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# The code is based on: |
||||
# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import print_function |
||||
from __future__ import division |
||||
|
||||
import numpy as np |
||||
from PIL import Image |
||||
|
||||
|
||||
class Gridmask(object): |
||||
def __init__(self, |
||||
use_h=True, |
||||
use_w=True, |
||||
rotate=1, |
||||
offset=False, |
||||
ratio=0.5, |
||||
mode=1, |
||||
prob=0.7, |
||||
upper_iter=360000): |
||||
super(Gridmask, self).__init__() |
||||
self.use_h = use_h |
||||
self.use_w = use_w |
||||
self.rotate = rotate |
||||
self.offset = offset |
||||
self.ratio = ratio |
||||
self.mode = mode |
||||
self.prob = prob |
||||
self.st_prob = prob |
||||
self.upper_iter = upper_iter |
||||
|
||||
def __call__(self, x, curr_iter): |
||||
self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter) |
||||
if np.random.rand() > self.prob: |
||||
return x |
||||
h, w, _ = x.shape |
||||
hh = int(1.5 * h) |
||||
ww = int(1.5 * w) |
||||
d = np.random.randint(2, h) |
||||
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1) |
||||
mask = np.ones((hh, ww), np.float32) |
||||
st_h = np.random.randint(d) |
||||
st_w = np.random.randint(d) |
||||
if self.use_h: |
||||
for i in range(hh // d): |
||||
s = d * i + st_h |
||||
t = min(s + self.l, hh) |
||||
mask[s:t, :] *= 0 |
||||
if self.use_w: |
||||
for i in range(ww // d): |
||||
s = d * i + st_w |
||||
t = min(s + self.l, ww) |
||||
mask[:, s:t] *= 0 |
||||
|
||||
r = np.random.randint(self.rotate) |
||||
mask = Image.fromarray(np.uint8(mask)) |
||||
mask = mask.rotate(r) |
||||
mask = np.asarray(mask) |
||||
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // |
||||
2 + w].astype(np.float32) |
||||
|
||||
if self.mode == 1: |
||||
mask = 1 - mask |
||||
mask = np.expand_dims(mask, axis=-1) |
||||
if self.offset: |
||||
offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32) |
||||
x = (x * mask + offset * (1 - mask)).astype(x.dtype) |
||||
else: |
||||
x = (x * mask).astype(x.dtype) |
||||
|
||||
return x |
@ -0,0 +1,868 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# function: |
||||
# operators to process sample, |
||||
# eg: decode/resize/crop image |
||||
|
||||
from __future__ import absolute_import |
||||
|
||||
try: |
||||
from collections.abc import Sequence |
||||
except Exception: |
||||
from collections import Sequence |
||||
|
||||
import cv2 |
||||
import numpy as np |
||||
import math |
||||
import copy |
||||
|
||||
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix |
||||
from paddlers.models.ppdet.core.workspace import serializable |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
registered_ops = [] |
||||
|
||||
__all__ = [ |
||||
'RandomAffine', |
||||
'KeyPointFlip', |
||||
'TagGenerate', |
||||
'ToHeatmaps', |
||||
'NormalizePermute', |
||||
'EvalAffine', |
||||
'RandomFlipHalfBodyTransform', |
||||
'TopDownAffine', |
||||
'ToHeatmapsTopDown', |
||||
'ToHeatmapsTopDown_DARK', |
||||
'ToHeatmapsTopDown_UDP', |
||||
'TopDownEvalAffine', |
||||
'AugmentationbyInformantionDropping', |
||||
] |
||||
|
||||
|
||||
def register_keypointop(cls): |
||||
return serializable(cls) |
||||
|
||||
|
||||
@register_keypointop |
||||
class KeyPointFlip(object): |
||||
"""Get the fliped image by flip_prob. flip the coords also |
||||
the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped |
||||
|
||||
Args: |
||||
flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16] |
||||
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet |
||||
flip_prob (float): the ratio whether to flip the image |
||||
records(dict): the dict contained the image, mask and coords |
||||
|
||||
Returns: |
||||
records(dict): contain the image, mask and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, flip_permutation, hmsize, flip_prob=0.5): |
||||
super(KeyPointFlip, self).__init__() |
||||
assert isinstance(flip_permutation, Sequence) |
||||
self.flip_permutation = flip_permutation |
||||
self.flip_prob = flip_prob |
||||
self.hmsize = hmsize |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
kpts_lst = records['joints'] |
||||
mask_lst = records['mask'] |
||||
flip = np.random.random() < self.flip_prob |
||||
if flip: |
||||
image = image[:, ::-1] |
||||
for idx, hmsize in enumerate(self.hmsize): |
||||
if len(mask_lst) > idx: |
||||
mask_lst[idx] = mask_lst[idx][:, ::-1] |
||||
if kpts_lst[idx].ndim == 3: |
||||
kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation] |
||||
else: |
||||
kpts_lst[idx] = kpts_lst[idx][self.flip_permutation] |
||||
kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0] |
||||
kpts_lst[idx] = kpts_lst[idx].astype(np.int64) |
||||
kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0 |
||||
kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0 |
||||
kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0 |
||||
kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0 |
||||
records['image'] = image |
||||
records['joints'] = kpts_lst |
||||
records['mask'] = mask_lst |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class RandomAffine(object): |
||||
"""apply affine transform to image, mask and coords |
||||
to achieve the rotate, scale and shift effect for training image |
||||
|
||||
Args: |
||||
max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree] |
||||
max_scale (list[2]): the scale range to apply, transform range is [min, max] |
||||
max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize] |
||||
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet |
||||
trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard |
||||
scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long' |
||||
records(dict): the dict contained the image, mask and coords |
||||
|
||||
Returns: |
||||
records(dict): contain the image, mask and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, |
||||
max_degree=30, |
||||
scale=[0.75, 1.5], |
||||
max_shift=0.2, |
||||
hmsize=[128, 256], |
||||
trainsize=512, |
||||
scale_type='short'): |
||||
super(RandomAffine, self).__init__() |
||||
self.max_degree = max_degree |
||||
self.min_scale = scale[0] |
||||
self.max_scale = scale[1] |
||||
self.max_shift = max_shift |
||||
self.hmsize = hmsize |
||||
self.trainsize = trainsize |
||||
self.scale_type = scale_type |
||||
|
||||
def _get_affine_matrix(self, center, scale, res, rot=0): |
||||
"""Generate transformation matrix.""" |
||||
h = scale |
||||
t = np.zeros((3, 3), dtype=np.float32) |
||||
t[0, 0] = float(res[1]) / h |
||||
t[1, 1] = float(res[0]) / h |
||||
t[0, 2] = res[1] * (-float(center[0]) / h + .5) |
||||
t[1, 2] = res[0] * (-float(center[1]) / h + .5) |
||||
t[2, 2] = 1 |
||||
if rot != 0: |
||||
rot = -rot # To match direction of rotation from cropping |
||||
rot_mat = np.zeros((3, 3), dtype=np.float32) |
||||
rot_rad = rot * np.pi / 180 |
||||
sn, cs = np.sin(rot_rad), np.cos(rot_rad) |
||||
rot_mat[0, :2] = [cs, -sn] |
||||
rot_mat[1, :2] = [sn, cs] |
||||
rot_mat[2, 2] = 1 |
||||
# Need to rotate around center |
||||
t_mat = np.eye(3) |
||||
t_mat[0, 2] = -res[1] / 2 |
||||
t_mat[1, 2] = -res[0] / 2 |
||||
t_inv = t_mat.copy() |
||||
t_inv[:2, 2] *= -1 |
||||
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) |
||||
return t |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
keypoints = records['joints'] |
||||
heatmap_mask = records['mask'] |
||||
|
||||
degree = (np.random.random() * 2 - 1) * self.max_degree |
||||
shape = np.array(image.shape[:2][::-1]) |
||||
center = center = np.array((np.array(shape) / 2)) |
||||
|
||||
aug_scale = np.random.random() * (self.max_scale - self.min_scale |
||||
) + self.min_scale |
||||
if self.scale_type == 'long': |
||||
scale = max(shape[0], shape[1]) / 1.0 |
||||
elif self.scale_type == 'short': |
||||
scale = min(shape[0], shape[1]) / 1.0 |
||||
else: |
||||
raise ValueError('Unknown scale type: {}'.format(self.scale_type)) |
||||
roi_size = aug_scale * scale |
||||
dx = int(0) |
||||
dy = int(0) |
||||
if self.max_shift > 0: |
||||
|
||||
dx = np.random.randint(-self.max_shift * roi_size, |
||||
self.max_shift * roi_size) |
||||
dy = np.random.randint(-self.max_shift * roi_size, |
||||
self.max_shift * roi_size) |
||||
|
||||
center += np.array([dx, dy]) |
||||
input_size = 2 * center |
||||
|
||||
keypoints[..., :2] *= shape |
||||
heatmap_mask *= 255 |
||||
kpts_lst = [] |
||||
mask_lst = [] |
||||
|
||||
image_affine_mat = self._get_affine_matrix( |
||||
center, roi_size, (self.trainsize, self.trainsize), degree)[:2] |
||||
image = cv2.warpAffine( |
||||
image, |
||||
image_affine_mat, (self.trainsize, self.trainsize), |
||||
flags=cv2.INTER_LINEAR) |
||||
for hmsize in self.hmsize: |
||||
kpts = copy.deepcopy(keypoints) |
||||
mask_affine_mat = self._get_affine_matrix( |
||||
center, roi_size, (hmsize, hmsize), degree)[:2] |
||||
if heatmap_mask is not None: |
||||
mask = cv2.warpAffine(heatmap_mask, mask_affine_mat, |
||||
(hmsize, hmsize)) |
||||
mask = ((mask / 255) > 0.5).astype(np.float32) |
||||
kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(), |
||||
mask_affine_mat) |
||||
kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0 |
||||
kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0 |
||||
kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0 |
||||
kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0 |
||||
kpts_lst.append(kpts) |
||||
mask_lst.append(mask) |
||||
records['image'] = image |
||||
records['joints'] = kpts_lst |
||||
records['mask'] = mask_lst |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class EvalAffine(object): |
||||
"""apply affine transform to image |
||||
resize the short of [h,w] to standard size for eval |
||||
|
||||
Args: |
||||
size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard |
||||
records(dict): the dict contained the image, mask and coords |
||||
|
||||
Returns: |
||||
records(dict): contain the image, mask and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, size, stride=64): |
||||
super(EvalAffine, self).__init__() |
||||
self.size = size |
||||
self.stride = stride |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
mask = records['mask'] if 'mask' in records else None |
||||
s = self.size |
||||
h, w, _ = image.shape |
||||
trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False) |
||||
image_resized = cv2.warpAffine(image, trans, size_resized) |
||||
if mask is not None: |
||||
mask = cv2.warpAffine(mask, trans, size_resized) |
||||
records['mask'] = mask |
||||
if 'joints' in records: |
||||
del records['joints'] |
||||
records['image'] = image_resized |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class NormalizePermute(object): |
||||
def __init__(self, |
||||
mean=[123.675, 116.28, 103.53], |
||||
std=[58.395, 57.120, 57.375], |
||||
is_scale=True): |
||||
super(NormalizePermute, self).__init__() |
||||
self.mean = mean |
||||
self.std = std |
||||
self.is_scale = is_scale |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
image = image.astype(np.float32) |
||||
if self.is_scale: |
||||
image /= 255. |
||||
image = image.transpose((2, 0, 1)) |
||||
mean = np.array(self.mean, dtype=np.float32) |
||||
std = np.array(self.std, dtype=np.float32) |
||||
invstd = 1. / std |
||||
for v, m, s in zip(image, mean, invstd): |
||||
v.__isub__(m).__imul__(s) |
||||
records['image'] = image |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class TagGenerate(object): |
||||
"""record gt coords for aeloss to sample coords value in tagmaps |
||||
|
||||
Args: |
||||
num_joints (int): the keypoint numbers of dataset to train |
||||
num_people (int): maxmum people to support for sample aeloss |
||||
records(dict): the dict contained the image, mask and coords |
||||
|
||||
Returns: |
||||
records(dict): contain the gt coords used in tagmap |
||||
|
||||
""" |
||||
|
||||
def __init__(self, num_joints, max_people=30): |
||||
super(TagGenerate, self).__init__() |
||||
self.max_people = max_people |
||||
self.num_joints = num_joints |
||||
|
||||
def __call__(self, records): |
||||
kpts_lst = records['joints'] |
||||
kpts = kpts_lst[0] |
||||
tagmap = np.zeros( |
||||
(self.max_people, self.num_joints, 4), dtype=np.int64) |
||||
inds = np.where(kpts[..., 2] > 0) |
||||
p, j = inds[0], inds[1] |
||||
visible = kpts[inds] |
||||
# tagmap is [p, j, 3], where last dim is j, y, x |
||||
tagmap[p, j, 0] = j |
||||
tagmap[p, j, 1] = visible[..., 1] # y |
||||
tagmap[p, j, 2] = visible[..., 0] # x |
||||
tagmap[p, j, 3] = 1 |
||||
records['tagmap'] = tagmap |
||||
del records['joints'] |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class ToHeatmaps(object): |
||||
"""to generate the gaussin heatmaps of keypoint for heatmap loss |
||||
|
||||
Args: |
||||
num_joints (int): the keypoint numbers of dataset to train |
||||
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet |
||||
sigma (float): the std of gaussin kernel genereted |
||||
records(dict): the dict contained the image, mask and coords |
||||
|
||||
Returns: |
||||
records(dict): contain the heatmaps used to heatmaploss |
||||
|
||||
""" |
||||
|
||||
def __init__(self, num_joints, hmsize, sigma=None): |
||||
super(ToHeatmaps, self).__init__() |
||||
self.num_joints = num_joints |
||||
self.hmsize = np.array(hmsize) |
||||
if sigma is None: |
||||
sigma = hmsize[0] // 64 |
||||
self.sigma = sigma |
||||
|
||||
r = 6 * sigma + 3 |
||||
x = np.arange(0, r, 1, np.float32) |
||||
y = x[:, None] |
||||
x0, y0 = 3 * sigma + 1, 3 * sigma + 1 |
||||
self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2)) |
||||
|
||||
def __call__(self, records): |
||||
kpts_lst = records['joints'] |
||||
mask_lst = records['mask'] |
||||
for idx, hmsize in enumerate(self.hmsize): |
||||
mask = mask_lst[idx] |
||||
kpts = kpts_lst[idx] |
||||
heatmaps = np.zeros((self.num_joints, hmsize, hmsize)) |
||||
inds = np.where(kpts[..., 2] > 0) |
||||
visible = kpts[inds].astype(np.int64)[..., :2] |
||||
ul = np.round(visible - 3 * self.sigma - 1) |
||||
br = np.round(visible + 3 * self.sigma + 2) |
||||
sul = np.maximum(0, -ul) |
||||
sbr = np.minimum(hmsize, br) - ul |
||||
dul = np.clip(ul, 0, hmsize - 1) |
||||
dbr = np.clip(br, 0, hmsize) |
||||
for i in range(len(visible)): |
||||
if visible[i][0] < 0 or visible[i][1] < 0 or visible[i][ |
||||
0] >= hmsize or visible[i][1] >= hmsize: |
||||
continue |
||||
dx1, dy1 = dul[i] |
||||
dx2, dy2 = dbr[i] |
||||
sx1, sy1 = sul[i] |
||||
sx2, sy2 = sbr[i] |
||||
heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum( |
||||
self.gaussian[sy1:sy2, sx1:sx2], |
||||
heatmaps[inds[1][i], dy1:dy2, dx1:dx2]) |
||||
records['heatmap_gt{}x'.format(idx + 1)] = heatmaps |
||||
records['mask_{}x'.format(idx + 1)] = mask |
||||
del records['mask'] |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class RandomFlipHalfBodyTransform(object): |
||||
"""apply data augment to image and coords |
||||
to achieve the flip, scale, rotate and half body transform effect for training image |
||||
|
||||
Args: |
||||
trainsize (list):[w, h], Image target size |
||||
upper_body_ids (list): The upper body joint ids |
||||
flip_pairs (list): The left-right joints exchange order list |
||||
pixel_std (int): The pixel std of the scale |
||||
scale (float): The scale factor to transform the image |
||||
rot (int): The rotate factor to transform the image |
||||
num_joints_half_body (int): The joints threshold of the half body transform |
||||
prob_half_body (float): The threshold of the half body transform |
||||
flip (bool): Whether to flip the image |
||||
|
||||
Returns: |
||||
records(dict): contain the image and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, |
||||
trainsize, |
||||
upper_body_ids, |
||||
flip_pairs, |
||||
pixel_std, |
||||
scale=0.35, |
||||
rot=40, |
||||
num_joints_half_body=8, |
||||
prob_half_body=0.3, |
||||
flip=True, |
||||
rot_prob=0.6): |
||||
super(RandomFlipHalfBodyTransform, self).__init__() |
||||
self.trainsize = trainsize |
||||
self.upper_body_ids = upper_body_ids |
||||
self.flip_pairs = flip_pairs |
||||
self.pixel_std = pixel_std |
||||
self.scale = scale |
||||
self.rot = rot |
||||
self.num_joints_half_body = num_joints_half_body |
||||
self.prob_half_body = prob_half_body |
||||
self.flip = flip |
||||
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1] |
||||
self.rot_prob = rot_prob |
||||
|
||||
def halfbody_transform(self, joints, joints_vis): |
||||
upper_joints = [] |
||||
lower_joints = [] |
||||
for joint_id in range(joints.shape[0]): |
||||
if joints_vis[joint_id][0] > 0: |
||||
if joint_id in self.upper_body_ids: |
||||
upper_joints.append(joints[joint_id]) |
||||
else: |
||||
lower_joints.append(joints[joint_id]) |
||||
if np.random.randn() < 0.5 and len(upper_joints) > 2: |
||||
selected_joints = upper_joints |
||||
else: |
||||
selected_joints = lower_joints if len( |
||||
lower_joints) > 2 else upper_joints |
||||
if len(selected_joints) < 2: |
||||
return None, None |
||||
selected_joints = np.array(selected_joints, dtype=np.float32) |
||||
center = selected_joints.mean(axis=0)[:2] |
||||
left_top = np.amin(selected_joints, axis=0) |
||||
right_bottom = np.amax(selected_joints, axis=0) |
||||
w = right_bottom[0] - left_top[0] |
||||
h = right_bottom[1] - left_top[1] |
||||
if w > self.aspect_ratio * h: |
||||
h = w * 1.0 / self.aspect_ratio |
||||
elif w < self.aspect_ratio * h: |
||||
w = h * self.aspect_ratio |
||||
scale = np.array( |
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], |
||||
dtype=np.float32) |
||||
scale = scale * 1.5 |
||||
|
||||
return center, scale |
||||
|
||||
def flip_joints(self, joints, joints_vis, width, matched_parts): |
||||
joints[:, 0] = width - joints[:, 0] - 1 |
||||
for pair in matched_parts: |
||||
joints[pair[0], :], joints[pair[1], :] = \ |
||||
joints[pair[1], :], joints[pair[0], :].copy() |
||||
joints_vis[pair[0], :], joints_vis[pair[1], :] = \ |
||||
joints_vis[pair[1], :], joints_vis[pair[0], :].copy() |
||||
|
||||
return joints * joints_vis, joints_vis |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
joints = records['joints'] |
||||
joints_vis = records['joints_vis'] |
||||
c = records['center'] |
||||
s = records['scale'] |
||||
r = 0 |
||||
if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and |
||||
np.random.rand() < self.prob_half_body): |
||||
c_half_body, s_half_body = self.halfbody_transform(joints, |
||||
joints_vis) |
||||
if c_half_body is not None and s_half_body is not None: |
||||
c, s = c_half_body, s_half_body |
||||
sf = self.scale |
||||
rf = self.rot |
||||
s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) |
||||
r = np.clip(np.random.randn() * rf, -rf * 2, |
||||
rf * 2) if np.random.random() <= self.rot_prob else 0 |
||||
|
||||
if self.flip and np.random.random() <= 0.5: |
||||
image = image[:, ::-1, :] |
||||
joints, joints_vis = self.flip_joints( |
||||
joints, joints_vis, image.shape[1], self.flip_pairs) |
||||
c[0] = image.shape[1] - c[0] - 1 |
||||
records['image'] = image |
||||
records['joints'] = joints |
||||
records['joints_vis'] = joints_vis |
||||
records['center'] = c |
||||
records['scale'] = s |
||||
records['rotate'] = r |
||||
|
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class AugmentationbyInformantionDropping(object): |
||||
"""AID: Augmentation by Informantion Dropping. Please refer |
||||
to https://arxiv.org/abs/2008.07139 |
||||
|
||||
Args: |
||||
prob_cutout (float): The probability of the Cutout augmentation. |
||||
offset_factor (float): Offset factor of cutout center. |
||||
num_patch (int): Number of patches to be cutout. |
||||
records(dict): the dict contained the image and coords |
||||
|
||||
Returns: |
||||
records (dict): contain the image and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, |
||||
trainsize, |
||||
prob_cutout=0.0, |
||||
offset_factor=0.2, |
||||
num_patch=1): |
||||
self.prob_cutout = prob_cutout |
||||
self.offset_factor = offset_factor |
||||
self.num_patch = num_patch |
||||
self.trainsize = trainsize |
||||
|
||||
def _cutout(self, img, joints, joints_vis): |
||||
height, width, _ = img.shape |
||||
img = img.reshape((height * width, -1)) |
||||
feat_x_int = np.arange(0, width) |
||||
feat_y_int = np.arange(0, height) |
||||
feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int) |
||||
feat_x_int = feat_x_int.reshape((-1, )) |
||||
feat_y_int = feat_y_int.reshape((-1, )) |
||||
for _ in range(self.num_patch): |
||||
vis_idx, _ = np.where(joints_vis > 0) |
||||
occlusion_joint_id = np.random.choice(vis_idx) |
||||
center = joints[occlusion_joint_id, 0:2] |
||||
offset = np.random.randn(2) * self.trainsize[ |
||||
0] * self.offset_factor |
||||
center = center + offset |
||||
radius = np.random.uniform(0.1, 0.2) * self.trainsize[0] |
||||
x_offset = (center[0] - feat_x_int) / radius |
||||
y_offset = (center[1] - feat_y_int) / radius |
||||
dis = x_offset**2 + y_offset**2 |
||||
keep_pos = np.where((dis <= 1) & (dis >= 0))[0] |
||||
img[keep_pos, :] = 0 |
||||
img = img.reshape((height, width, -1)) |
||||
return img |
||||
|
||||
def __call__(self, records): |
||||
img = records['image'] |
||||
joints = records['joints'] |
||||
joints_vis = records['joints_vis'] |
||||
if np.random.rand() < self.prob_cutout: |
||||
img = self._cutout(img, joints, joints_vis) |
||||
records['image'] = img |
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class TopDownAffine(object): |
||||
"""apply affine transform to image and coords |
||||
|
||||
Args: |
||||
trainsize (list): [w, h], the standard size used to train |
||||
use_udp (bool): whether to use Unbiased Data Processing. |
||||
records(dict): the dict contained the image and coords |
||||
|
||||
Returns: |
||||
records (dict): contain the image and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, trainsize, use_udp=False): |
||||
self.trainsize = trainsize |
||||
self.use_udp = use_udp |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
joints = records['joints'] |
||||
joints_vis = records['joints_vis'] |
||||
rot = records['rotate'] if "rotate" in records else 0 |
||||
if self.use_udp: |
||||
trans = get_warp_matrix( |
||||
rot, records['center'] * 2.0, |
||||
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], |
||||
records['scale'] * 200.0) |
||||
image = cv2.warpAffine( |
||||
image, |
||||
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
||||
flags=cv2.INTER_LINEAR) |
||||
joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans) |
||||
else: |
||||
trans = get_affine_transform(records['center'], records['scale'] * |
||||
200, rot, self.trainsize) |
||||
image = cv2.warpAffine( |
||||
image, |
||||
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
||||
flags=cv2.INTER_LINEAR) |
||||
for i in range(joints.shape[0]): |
||||
if joints_vis[i, 0] > 0.0: |
||||
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) |
||||
|
||||
records['image'] = image |
||||
records['joints'] = joints |
||||
|
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class TopDownEvalAffine(object): |
||||
"""apply affine transform to image and coords |
||||
|
||||
Args: |
||||
trainsize (list): [w, h], the standard size used to train |
||||
use_udp (bool): whether to use Unbiased Data Processing. |
||||
records(dict): the dict contained the image and coords |
||||
|
||||
Returns: |
||||
records (dict): contain the image and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, trainsize, use_udp=False): |
||||
self.trainsize = trainsize |
||||
self.use_udp = use_udp |
||||
|
||||
def __call__(self, records): |
||||
image = records['image'] |
||||
rot = 0 |
||||
imshape = records['im_shape'][::-1] |
||||
center = imshape / 2. |
||||
scale = imshape |
||||
|
||||
if self.use_udp: |
||||
trans = get_warp_matrix( |
||||
rot, center * 2.0, |
||||
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale) |
||||
image = cv2.warpAffine( |
||||
image, |
||||
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
||||
flags=cv2.INTER_LINEAR) |
||||
else: |
||||
trans = get_affine_transform(center, scale, rot, self.trainsize) |
||||
image = cv2.warpAffine( |
||||
image, |
||||
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
||||
flags=cv2.INTER_LINEAR) |
||||
records['image'] = image |
||||
|
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class ToHeatmapsTopDown(object): |
||||
"""to generate the gaussin heatmaps of keypoint for heatmap loss |
||||
|
||||
Args: |
||||
hmsize (list): [w, h] output heatmap's size |
||||
sigma (float): the std of gaussin kernel genereted |
||||
records(dict): the dict contained the image and coords |
||||
|
||||
Returns: |
||||
records (dict): contain the heatmaps used to heatmaploss |
||||
|
||||
""" |
||||
|
||||
def __init__(self, hmsize, sigma): |
||||
super(ToHeatmapsTopDown, self).__init__() |
||||
self.hmsize = np.array(hmsize) |
||||
self.sigma = sigma |
||||
|
||||
def __call__(self, records): |
||||
"""refer to |
||||
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch |
||||
Copyright (c) Microsoft, under the MIT License. |
||||
""" |
||||
joints = records['joints'] |
||||
joints_vis = records['joints_vis'] |
||||
num_joints = joints.shape[0] |
||||
image_size = np.array( |
||||
[records['image'].shape[1], records['image'].shape[0]]) |
||||
target_weight = np.ones((num_joints, 1), dtype=np.float32) |
||||
target_weight[:, 0] = joints_vis[:, 0] |
||||
target = np.zeros( |
||||
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) |
||||
tmp_size = self.sigma * 3 |
||||
feat_stride = image_size / self.hmsize |
||||
for joint_id in range(num_joints): |
||||
mu_x = int(joints[joint_id][0] + 0.5) / feat_stride[0] |
||||
mu_y = int(joints[joint_id][1] + 0.5) / feat_stride[1] |
||||
# Check that any part of the gaussian is in-bounds |
||||
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] |
||||
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] |
||||
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ |
||||
0] < 0 or br[1] < 0: |
||||
# If not, just return the image as is |
||||
target_weight[joint_id] = 0 |
||||
continue |
||||
# # Generate gaussian |
||||
size = 2 * tmp_size + 1 |
||||
x = np.arange(0, size, 1, np.float32) |
||||
y = x[:, np.newaxis] |
||||
x0 = y0 = size // 2 |
||||
# The gaussian is not normalized, we want the center value to equal 1 |
||||
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2)) |
||||
|
||||
# Usable gaussian range |
||||
g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0] |
||||
g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1] |
||||
# Image range |
||||
img_x = max(0, ul[0]), min(br[0], self.hmsize[0]) |
||||
img_y = max(0, ul[1]), min(br[1], self.hmsize[1]) |
||||
|
||||
v = target_weight[joint_id] |
||||
if v > 0.5: |
||||
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[ |
||||
0]:g_y[1], g_x[0]:g_x[1]] |
||||
records['target'] = target |
||||
records['target_weight'] = target_weight |
||||
del records['joints'], records['joints_vis'] |
||||
|
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class ToHeatmapsTopDown_DARK(object): |
||||
"""to generate the gaussin heatmaps of keypoint for heatmap loss |
||||
|
||||
Args: |
||||
hmsize (list): [w, h] output heatmap's size |
||||
sigma (float): the std of gaussin kernel genereted |
||||
records(dict): the dict contained the image and coords |
||||
|
||||
Returns: |
||||
records (dict): contain the heatmaps used to heatmaploss |
||||
|
||||
""" |
||||
|
||||
def __init__(self, hmsize, sigma): |
||||
super(ToHeatmapsTopDown_DARK, self).__init__() |
||||
self.hmsize = np.array(hmsize) |
||||
self.sigma = sigma |
||||
|
||||
def __call__(self, records): |
||||
joints = records['joints'] |
||||
joints_vis = records['joints_vis'] |
||||
num_joints = joints.shape[0] |
||||
image_size = np.array( |
||||
[records['image'].shape[1], records['image'].shape[0]]) |
||||
target_weight = np.ones((num_joints, 1), dtype=np.float32) |
||||
target_weight[:, 0] = joints_vis[:, 0] |
||||
target = np.zeros( |
||||
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) |
||||
tmp_size = self.sigma * 3 |
||||
feat_stride = image_size / self.hmsize |
||||
for joint_id in range(num_joints): |
||||
mu_x = joints[joint_id][0] / feat_stride[0] |
||||
mu_y = joints[joint_id][1] / feat_stride[1] |
||||
# Check that any part of the gaussian is in-bounds |
||||
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] |
||||
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] |
||||
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ |
||||
0] < 0 or br[1] < 0: |
||||
# If not, just return the image as is |
||||
target_weight[joint_id] = 0 |
||||
continue |
||||
|
||||
x = np.arange(0, self.hmsize[0], 1, np.float32) |
||||
y = np.arange(0, self.hmsize[1], 1, np.float32) |
||||
y = y[:, np.newaxis] |
||||
|
||||
v = target_weight[joint_id] |
||||
if v > 0.5: |
||||
target[joint_id] = np.exp(-( |
||||
(x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2)) |
||||
records['target'] = target |
||||
records['target_weight'] = target_weight |
||||
del records['joints'], records['joints_vis'] |
||||
|
||||
return records |
||||
|
||||
|
||||
@register_keypointop |
||||
class ToHeatmapsTopDown_UDP(object): |
||||
"""This code is based on: |
||||
https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py |
||||
|
||||
to generate the gaussian heatmaps of keypoint for heatmap loss. |
||||
ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing |
||||
for Human Pose Estimation (CVPR 2020). |
||||
|
||||
Args: |
||||
hmsize (list): [w, h] output heatmap's size |
||||
sigma (float): the std of gaussin kernel genereted |
||||
records(dict): the dict contained the image and coords |
||||
|
||||
Returns: |
||||
records (dict): contain the heatmaps used to heatmaploss |
||||
""" |
||||
|
||||
def __init__(self, hmsize, sigma): |
||||
super(ToHeatmapsTopDown_UDP, self).__init__() |
||||
self.hmsize = np.array(hmsize) |
||||
self.sigma = sigma |
||||
|
||||
def __call__(self, records): |
||||
joints = records['joints'] |
||||
joints_vis = records['joints_vis'] |
||||
num_joints = joints.shape[0] |
||||
image_size = np.array( |
||||
[records['image'].shape[1], records['image'].shape[0]]) |
||||
target_weight = np.ones((num_joints, 1), dtype=np.float32) |
||||
target_weight[:, 0] = joints_vis[:, 0] |
||||
target = np.zeros( |
||||
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) |
||||
tmp_size = self.sigma * 3 |
||||
size = 2 * tmp_size + 1 |
||||
x = np.arange(0, size, 1, np.float32) |
||||
y = x[:, None] |
||||
feat_stride = (image_size - 1.0) / (self.hmsize - 1.0) |
||||
for joint_id in range(num_joints): |
||||
mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) |
||||
mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) |
||||
# Check that any part of the gaussian is in-bounds |
||||
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] |
||||
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] |
||||
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ |
||||
0] < 0 or br[1] < 0: |
||||
# If not, just return the image as is |
||||
target_weight[joint_id] = 0 |
||||
continue |
||||
|
||||
mu_x_ac = joints[joint_id][0] / feat_stride[0] |
||||
mu_y_ac = joints[joint_id][1] / feat_stride[1] |
||||
x0 = y0 = size // 2 |
||||
x0 += mu_x_ac - mu_x |
||||
y0 += mu_y_ac - mu_y |
||||
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2)) |
||||
# Usable gaussian range |
||||
g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0] |
||||
g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1] |
||||
# Image range |
||||
img_x = max(0, ul[0]), min(br[0], self.hmsize[0]) |
||||
img_y = max(0, ul[1]), min(br[1], self.hmsize[1]) |
||||
|
||||
v = target_weight[joint_id] |
||||
if v > 0.5: |
||||
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[ |
||||
0]:g_y[1], g_x[0]:g_x[1]] |
||||
records['target'] = target |
||||
records['target_weight'] = target_weight |
||||
del records['joints'], records['joints_vis'] |
||||
|
||||
return records |
@ -0,0 +1,628 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
try: |
||||
from collections.abc import Sequence |
||||
except Exception: |
||||
from collections import Sequence |
||||
from numbers import Integral |
||||
|
||||
import cv2 |
||||
import copy |
||||
import numpy as np |
||||
import random |
||||
import math |
||||
|
||||
from .operators import BaseOperator, register_op |
||||
from .batch_operators import Gt2TTFTarget |
||||
from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
from .op_helper import gaussian_radius |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = [ |
||||
'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres', |
||||
'Gt2JDETargetMax', 'Gt2FairMOTTarget' |
||||
] |
||||
|
||||
|
||||
@register_op |
||||
class RGBReverse(BaseOperator): |
||||
"""RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine |
||||
""" |
||||
|
||||
def __init__(self): |
||||
super(RGBReverse, self).__init__() |
||||
|
||||
def apply(self, sample, context=None): |
||||
im = sample['image'] |
||||
sample['image'] = np.ascontiguousarray(im[:, :, ::-1]) |
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class LetterBoxResize(BaseOperator): |
||||
def __init__(self, target_size): |
||||
""" |
||||
Resize image to target size, convert normalized xywh to pixel xyxy |
||||
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]). |
||||
Args: |
||||
target_size (int|list): image target size. |
||||
""" |
||||
super(LetterBoxResize, self).__init__() |
||||
if not isinstance(target_size, (Integral, Sequence)): |
||||
raise TypeError( |
||||
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}". |
||||
format(type(target_size))) |
||||
if isinstance(target_size, Integral): |
||||
target_size = [target_size, target_size] |
||||
self.target_size = target_size |
||||
|
||||
def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)): |
||||
# letterbox: resize a rectangular image to a padded rectangular |
||||
shape = img.shape[:2] # [height, width] |
||||
ratio_h = float(height) / shape[0] |
||||
ratio_w = float(width) / shape[1] |
||||
ratio = min(ratio_h, ratio_w) |
||||
new_shape = (round(shape[1] * ratio), |
||||
round(shape[0] * ratio)) # [width, height] |
||||
padw = (width - new_shape[0]) / 2 |
||||
padh = (height - new_shape[1]) / 2 |
||||
top, bottom = round(padh - 0.1), round(padh + 0.1) |
||||
left, right = round(padw - 0.1), round(padw + 0.1) |
||||
|
||||
img = cv2.resize( |
||||
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border |
||||
img = cv2.copyMakeBorder( |
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT, |
||||
value=color) # padded rectangular |
||||
return img, ratio, padw, padh |
||||
|
||||
def apply_bbox(self, bbox0, h, w, ratio, padw, padh): |
||||
bboxes = bbox0.copy() |
||||
bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw |
||||
bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh |
||||
bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw |
||||
bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh |
||||
return bboxes |
||||
|
||||
def apply(self, sample, context=None): |
||||
""" Resize the image numpy. |
||||
""" |
||||
im = sample['image'] |
||||
h, w = sample['im_shape'] |
||||
if not isinstance(im, np.ndarray): |
||||
raise TypeError("{}: image type is not numpy.".format(self)) |
||||
if len(im.shape) != 3: |
||||
from PIL import UnidentifiedImageError |
||||
raise UnidentifiedImageError( |
||||
'{}: image is not 3-dimensional.'.format(self)) |
||||
|
||||
# apply image |
||||
height, width = self.target_size |
||||
img, ratio, padw, padh = self.apply_image( |
||||
im, height=height, width=width) |
||||
|
||||
sample['image'] = img |
||||
new_shape = (round(h * ratio), round(w * ratio)) |
||||
sample['im_shape'] = np.asarray(new_shape, dtype=np.float32) |
||||
sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32) |
||||
|
||||
# apply bbox |
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: |
||||
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio, |
||||
padw, padh) |
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class MOTRandomAffine(BaseOperator): |
||||
""" |
||||
Affine transform to image and coords to achieve the rotate, scale and |
||||
shift effect for training image. |
||||
|
||||
Args: |
||||
degrees (list[2]): the rotate range to apply, transform range is [min, max] |
||||
translate (list[2]): the translate range to apply, transform range is [min, max] |
||||
scale (list[2]): the scale range to apply, transform range is [min, max] |
||||
shear (list[2]): the shear range to apply, transform range is [min, max] |
||||
borderValue (list[3]): value used in case of a constant border when appling |
||||
the perspective transformation |
||||
reject_outside (bool): reject warped bounding bboxes outside of image |
||||
|
||||
Returns: |
||||
records(dict): contain the image and coords after tranformed |
||||
|
||||
""" |
||||
|
||||
def __init__(self, |
||||
degrees=(-5, 5), |
||||
translate=(0.10, 0.10), |
||||
scale=(0.50, 1.20), |
||||
shear=(-2, 2), |
||||
borderValue=(127.5, 127.5, 127.5), |
||||
reject_outside=True): |
||||
super(MOTRandomAffine, self).__init__() |
||||
self.degrees = degrees |
||||
self.translate = translate |
||||
self.scale = scale |
||||
self.shear = shear |
||||
self.borderValue = borderValue |
||||
self.reject_outside = reject_outside |
||||
|
||||
def apply(self, sample, context=None): |
||||
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 |
||||
border = 0 # width of added border (optional) |
||||
|
||||
img = sample['image'] |
||||
height, width = img.shape[0], img.shape[1] |
||||
|
||||
# Rotation and Scale |
||||
R = np.eye(3) |
||||
a = random.random() * (self.degrees[1] - self.degrees[0] |
||||
) + self.degrees[0] |
||||
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0] |
||||
R[:2] = cv2.getRotationMatrix2D( |
||||
angle=a, center=(width / 2, height / 2), scale=s) |
||||
|
||||
# Translation |
||||
T = np.eye(3) |
||||
T[0, 2] = ( |
||||
random.random() * 2 - 1 |
||||
) * self.translate[0] * height + border # x translation (pixels) |
||||
T[1, 2] = ( |
||||
random.random() * 2 - 1 |
||||
) * self.translate[1] * width + border # y translation (pixels) |
||||
|
||||
# Shear |
||||
S = np.eye(3) |
||||
S[0, 1] = math.tan((random.random() * |
||||
(self.shear[1] - self.shear[0]) + self.shear[0]) * |
||||
math.pi / 180) # x shear (deg) |
||||
S[1, 0] = math.tan((random.random() * |
||||
(self.shear[1] - self.shear[0]) + self.shear[0]) * |
||||
math.pi / 180) # y shear (deg) |
||||
|
||||
M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! |
||||
imw = cv2.warpPerspective( |
||||
img, |
||||
M, |
||||
dsize=(width, height), |
||||
flags=cv2.INTER_LINEAR, |
||||
borderValue=self.borderValue) # BGR order borderValue |
||||
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: |
||||
targets = sample['gt_bbox'] |
||||
n = targets.shape[0] |
||||
points = targets.copy() |
||||
area0 = (points[:, 2] - points[:, 0]) * ( |
||||
points[:, 3] - points[:, 1]) |
||||
|
||||
# warp points |
||||
xy = np.ones((n * 4, 3)) |
||||
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( |
||||
n * 4, 2) # x1y1, x2y2, x1y2, x2y1 |
||||
xy = (xy @M.T)[:, :2].reshape(n, 8) |
||||
|
||||
# create new boxes |
||||
x = xy[:, [0, 2, 4, 6]] |
||||
y = xy[:, [1, 3, 5, 7]] |
||||
xy = np.concatenate( |
||||
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T |
||||
|
||||
# apply angle-based reduction |
||||
radians = a * math.pi / 180 |
||||
reduction = max(abs(math.sin(radians)), |
||||
abs(math.cos(radians)))**0.5 |
||||
x = (xy[:, 2] + xy[:, 0]) / 2 |
||||
y = (xy[:, 3] + xy[:, 1]) / 2 |
||||
w = (xy[:, 2] - xy[:, 0]) * reduction |
||||
h = (xy[:, 3] - xy[:, 1]) * reduction |
||||
xy = np.concatenate( |
||||
(x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T |
||||
|
||||
# reject warped points outside of image |
||||
if self.reject_outside: |
||||
np.clip(xy[:, 0], 0, width, out=xy[:, 0]) |
||||
np.clip(xy[:, 2], 0, width, out=xy[:, 2]) |
||||
np.clip(xy[:, 1], 0, height, out=xy[:, 1]) |
||||
np.clip(xy[:, 3], 0, height, out=xy[:, 3]) |
||||
w = xy[:, 2] - xy[:, 0] |
||||
h = xy[:, 3] - xy[:, 1] |
||||
area = w * h |
||||
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) |
||||
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) |
||||
|
||||
if sum(i) > 0: |
||||
sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype) |
||||
sample['gt_class'] = sample['gt_class'][i] |
||||
if 'difficult' in sample: |
||||
sample['difficult'] = sample['difficult'][i] |
||||
if 'gt_ide' in sample: |
||||
sample['gt_ide'] = sample['gt_ide'][i] |
||||
if 'is_crowd' in sample: |
||||
sample['is_crowd'] = sample['is_crowd'][i] |
||||
sample['image'] = imw |
||||
return sample |
||||
else: |
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class Gt2JDETargetThres(BaseOperator): |
||||
__shared__ = ['num_classes'] |
||||
""" |
||||
Generate JDE targets by groud truth data when training |
||||
Args: |
||||
anchors (list): anchors of JDE model |
||||
anchor_masks (list): anchor_masks of JDE model |
||||
downsample_ratios (list): downsample ratios of JDE model |
||||
ide_thresh (float): thresh of identity, higher is groud truth |
||||
fg_thresh (float): thresh of foreground, higher is foreground |
||||
bg_thresh (float): thresh of background, lower is background |
||||
num_classes (int): number of classes |
||||
""" |
||||
|
||||
def __init__(self, |
||||
anchors, |
||||
anchor_masks, |
||||
downsample_ratios, |
||||
ide_thresh=0.5, |
||||
fg_thresh=0.5, |
||||
bg_thresh=0.4, |
||||
num_classes=1): |
||||
super(Gt2JDETargetThres, self).__init__() |
||||
self.anchors = anchors |
||||
self.anchor_masks = anchor_masks |
||||
self.downsample_ratios = downsample_ratios |
||||
self.ide_thresh = ide_thresh |
||||
self.fg_thresh = fg_thresh |
||||
self.bg_thresh = bg_thresh |
||||
self.num_classes = num_classes |
||||
|
||||
def generate_anchor(self, nGh, nGw, anchor_hw): |
||||
nA = len(anchor_hw) |
||||
yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw)) |
||||
|
||||
mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw] |
||||
mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw] |
||||
|
||||
anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None] |
||||
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2) |
||||
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1) |
||||
|
||||
anchor_mesh = np.concatenate( |
||||
[mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw] |
||||
return anchor_mesh |
||||
|
||||
def encode_delta(self, gt_box_list, fg_anchor_list): |
||||
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \ |
||||
fg_anchor_list[:, 2], fg_anchor_list[:,3] |
||||
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \ |
||||
gt_box_list[:, 2], gt_box_list[:, 3] |
||||
dx = (gx - px) / pw |
||||
dy = (gy - py) / ph |
||||
dw = np.log(gw / pw) |
||||
dh = np.log(gh / ph) |
||||
return np.stack([dx, dy, dw, dh], axis=1) |
||||
|
||||
def pad_box(self, sample, num_max): |
||||
assert 'gt_bbox' in sample |
||||
bbox = sample['gt_bbox'] |
||||
gt_num = len(bbox) |
||||
pad_bbox = np.zeros((num_max, 4), dtype=np.float32) |
||||
if gt_num > 0: |
||||
pad_bbox[:gt_num, :] = bbox[:gt_num, :] |
||||
sample['gt_bbox'] = pad_bbox |
||||
if 'gt_score' in sample: |
||||
pad_score = np.zeros((num_max, ), dtype=np.float32) |
||||
if gt_num > 0: |
||||
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0] |
||||
sample['gt_score'] = pad_score |
||||
if 'difficult' in sample: |
||||
pad_diff = np.zeros((num_max, ), dtype=np.int32) |
||||
if gt_num > 0: |
||||
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0] |
||||
sample['difficult'] = pad_diff |
||||
if 'is_crowd' in sample: |
||||
pad_crowd = np.zeros((num_max, ), dtype=np.int32) |
||||
if gt_num > 0: |
||||
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0] |
||||
sample['is_crowd'] = pad_crowd |
||||
if 'gt_ide' in sample: |
||||
pad_ide = np.zeros((num_max, ), dtype=np.int32) |
||||
if gt_num > 0: |
||||
pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0] |
||||
sample['gt_ide'] = pad_ide |
||||
return sample |
||||
|
||||
def __call__(self, samples, context=None): |
||||
assert len(self.anchor_masks) == len(self.downsample_ratios), \ |
||||
"anchor_masks', and 'downsample_ratios' should have same length." |
||||
h, w = samples[0]['image'].shape[1:3] |
||||
|
||||
num_max = 0 |
||||
for sample in samples: |
||||
num_max = max(num_max, len(sample['gt_bbox'])) |
||||
|
||||
for sample in samples: |
||||
gt_bbox = sample['gt_bbox'] |
||||
gt_ide = sample['gt_ide'] |
||||
for i, (anchor_hw, downsample_ratio |
||||
) in enumerate(zip(self.anchors, self.downsample_ratios)): |
||||
anchor_hw = np.array( |
||||
anchor_hw, dtype=np.float32) / downsample_ratio |
||||
nA = len(anchor_hw) |
||||
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio) |
||||
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32) |
||||
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32) |
||||
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32) |
||||
|
||||
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy() |
||||
gxy[:, 0] = gxy[:, 0] * nGw |
||||
gxy[:, 1] = gxy[:, 1] * nGh |
||||
gwh[:, 0] = gwh[:, 0] * nGw |
||||
gwh[:, 1] = gwh[:, 1] * nGh |
||||
gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1) |
||||
gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1) |
||||
tboxes = np.concatenate([gxy, gwh], axis=1) |
||||
|
||||
anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw) |
||||
|
||||
anchor_list = np.transpose(anchor_mesh, |
||||
(0, 2, 3, 1)).reshape(-1, 4) |
||||
iou_pdist = bbox_iou_np_expand( |
||||
anchor_list, tboxes, x1y1x2y2=False) |
||||
|
||||
iou_max = np.max(iou_pdist, axis=1) |
||||
max_gt_index = np.argmax(iou_pdist, axis=1) |
||||
|
||||
iou_map = iou_max.reshape(nA, nGh, nGw) |
||||
gt_index_map = max_gt_index.reshape(nA, nGh, nGw) |
||||
|
||||
id_index = iou_map > self.ide_thresh |
||||
fg_index = iou_map > self.fg_thresh |
||||
bg_index = iou_map < self.bg_thresh |
||||
ign_index = (iou_map < self.fg_thresh) * ( |
||||
iou_map > self.bg_thresh) |
||||
tconf[fg_index] = 1 |
||||
tconf[bg_index] = 0 |
||||
tconf[ign_index] = -1 |
||||
|
||||
gt_index = gt_index_map[fg_index] |
||||
gt_box_list = tboxes[gt_index] |
||||
gt_id_list = gt_ide[gt_index_map[id_index]] |
||||
|
||||
if np.sum(fg_index) > 0: |
||||
tid[id_index] = gt_id_list |
||||
|
||||
fg_anchor_list = anchor_list.reshape(nA, nGh, nGw, |
||||
4)[fg_index] |
||||
delta_target = self.encode_delta(gt_box_list, |
||||
fg_anchor_list) |
||||
tbox[fg_index] = delta_target |
||||
|
||||
sample['tbox{}'.format(i)] = tbox |
||||
sample['tconf{}'.format(i)] = tconf |
||||
sample['tide{}'.format(i)] = tid |
||||
sample.pop('gt_class') |
||||
sample = self.pad_box(sample, num_max) |
||||
return samples |
||||
|
||||
|
||||
@register_op |
||||
class Gt2JDETargetMax(BaseOperator): |
||||
__shared__ = ['num_classes'] |
||||
""" |
||||
Generate JDE targets by groud truth data when evaluating |
||||
Args: |
||||
anchors (list): anchors of JDE model |
||||
anchor_masks (list): anchor_masks of JDE model |
||||
downsample_ratios (list): downsample ratios of JDE model |
||||
max_iou_thresh (float): iou thresh for high quality anchor |
||||
num_classes (int): number of classes |
||||
""" |
||||
|
||||
def __init__(self, |
||||
anchors, |
||||
anchor_masks, |
||||
downsample_ratios, |
||||
max_iou_thresh=0.60, |
||||
num_classes=1): |
||||
super(Gt2JDETargetMax, self).__init__() |
||||
self.anchors = anchors |
||||
self.anchor_masks = anchor_masks |
||||
self.downsample_ratios = downsample_ratios |
||||
self.max_iou_thresh = max_iou_thresh |
||||
self.num_classes = num_classes |
||||
|
||||
def __call__(self, samples, context=None): |
||||
assert len(self.anchor_masks) == len(self.downsample_ratios), \ |
||||
"anchor_masks', and 'downsample_ratios' should have same length." |
||||
h, w = samples[0]['image'].shape[1:3] |
||||
for sample in samples: |
||||
gt_bbox = sample['gt_bbox'] |
||||
gt_ide = sample['gt_ide'] |
||||
for i, (anchor_hw, downsample_ratio |
||||
) in enumerate(zip(self.anchors, self.downsample_ratios)): |
||||
anchor_hw = np.array( |
||||
anchor_hw, dtype=np.float32) / downsample_ratio |
||||
nA = len(anchor_hw) |
||||
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio) |
||||
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32) |
||||
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32) |
||||
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32) |
||||
|
||||
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy() |
||||
gxy[:, 0] = gxy[:, 0] * nGw |
||||
gxy[:, 1] = gxy[:, 1] * nGh |
||||
gwh[:, 0] = gwh[:, 0] * nGw |
||||
gwh[:, 1] = gwh[:, 1] * nGh |
||||
gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int) |
||||
gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int) |
||||
|
||||
# iou of targets-anchors (using wh only) |
||||
box1 = gwh |
||||
box2 = anchor_hw[:, None, :] |
||||
inter_area = np.minimum(box1, box2).prod(2) |
||||
iou = inter_area / ( |
||||
box1.prod(1) + box2.prod(2) - inter_area + 1e-16) |
||||
|
||||
# Select best iou_pred and anchor |
||||
iou_best = iou.max(0) # best anchor [0-2] for each target |
||||
a = np.argmax(iou, axis=0) |
||||
|
||||
# Select best unique target-anchor combinations |
||||
iou_order = np.argsort(-iou_best) # best to worst |
||||
|
||||
# Unique anchor selection |
||||
u = np.stack((gi, gj, a), 0)[:, iou_order] |
||||
_, first_unique = np.unique(u, axis=1, return_index=True) |
||||
mask = iou_order[first_unique] |
||||
# best anchor must share significant commonality (iou) with target |
||||
# TODO: examine arbitrary threshold |
||||
idx = mask[iou_best[mask] > self.max_iou_thresh] |
||||
|
||||
if len(idx) > 0: |
||||
a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx] |
||||
t_box = gt_bbox[idx] |
||||
t_id = gt_ide[idx] |
||||
if len(t_box.shape) == 1: |
||||
t_box = t_box.reshape(1, 4) |
||||
|
||||
gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy() |
||||
gxy[:, 0] = gxy[:, 0] * nGw |
||||
gxy[:, 1] = gxy[:, 1] * nGh |
||||
gwh[:, 0] = gwh[:, 0] * nGw |
||||
gwh[:, 1] = gwh[:, 1] * nGh |
||||
|
||||
# XY coordinates |
||||
tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int) |
||||
# Width and height in yolo method |
||||
tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log( |
||||
gwh / anchor_hw[a_i]) |
||||
tconf[a_i, gj_i, gi_i] = 1 |
||||
tid[a_i, gj_i, gi_i] = t_id |
||||
|
||||
sample['tbox{}'.format(i)] = tbox |
||||
sample['tconf{}'.format(i)] = tconf |
||||
sample['tide{}'.format(i)] = tid |
||||
|
||||
|
||||
class Gt2FairMOTTarget(Gt2TTFTarget): |
||||
__shared__ = ['num_classes'] |
||||
""" |
||||
Generate FairMOT targets by ground truth data. |
||||
Difference between Gt2FairMOTTarget and Gt2TTFTarget are: |
||||
1. the gaussian kernal radius to generate a heatmap. |
||||
2. the targets needed during traing. |
||||
|
||||
Args: |
||||
num_classes(int): the number of classes. |
||||
down_ratio(int): the down ratio from images to heatmap, 4 by default. |
||||
max_objs(int): the maximum number of ground truth objects in a image, 500 by default. |
||||
""" |
||||
|
||||
def __init__(self, num_classes=1, down_ratio=4, max_objs=500): |
||||
super(Gt2TTFTarget, self).__init__() |
||||
self.down_ratio = down_ratio |
||||
self.num_classes = num_classes |
||||
self.max_objs = max_objs |
||||
|
||||
def __call__(self, samples, context=None): |
||||
for b_id, sample in enumerate(samples): |
||||
output_h = sample['image'].shape[1] // self.down_ratio |
||||
output_w = sample['image'].shape[2] // self.down_ratio |
||||
|
||||
heatmap = np.zeros( |
||||
(self.num_classes, output_h, output_w), dtype='float32') |
||||
bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32) |
||||
center_offset = np.zeros((self.max_objs, 2), dtype=np.float32) |
||||
index = np.zeros((self.max_objs, ), dtype=np.int64) |
||||
index_mask = np.zeros((self.max_objs, ), dtype=np.int32) |
||||
reid = np.zeros((self.max_objs, ), dtype=np.int64) |
||||
bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32) |
||||
if self.num_classes > 1: |
||||
# each category corresponds to a set of track ids |
||||
cls_tr_ids = np.zeros( |
||||
(self.num_classes, output_h, output_w), dtype=np.int64) |
||||
cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64) |
||||
|
||||
gt_bbox = sample['gt_bbox'] |
||||
gt_class = sample['gt_class'] |
||||
gt_ide = sample['gt_ide'] |
||||
|
||||
for k in range(len(gt_bbox)): |
||||
cls_id = gt_class[k][0] |
||||
bbox = gt_bbox[k] |
||||
ide = gt_ide[k][0] |
||||
bbox[[0, 2]] = bbox[[0, 2]] * output_w |
||||
bbox[[1, 3]] = bbox[[1, 3]] * output_h |
||||
bbox_amodal = copy.deepcopy(bbox) |
||||
bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2. |
||||
bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2. |
||||
bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2] |
||||
bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3] |
||||
bbox[0] = np.clip(bbox[0], 0, output_w - 1) |
||||
bbox[1] = np.clip(bbox[1], 0, output_h - 1) |
||||
h = bbox[3] |
||||
w = bbox[2] |
||||
|
||||
bbox_xy = copy.deepcopy(bbox) |
||||
bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2 |
||||
bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2 |
||||
bbox_xy[2] = bbox_xy[0] + bbox_xy[2] |
||||
bbox_xy[3] = bbox_xy[1] + bbox_xy[3] |
||||
|
||||
if h > 0 and w > 0: |
||||
radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7) |
||||
radius = max(0, int(radius)) |
||||
ct = np.array([bbox[0], bbox[1]], dtype=np.float32) |
||||
ct_int = ct.astype(np.int32) |
||||
self.draw_truncate_gaussian(heatmap[cls_id], ct_int, |
||||
radius, radius) |
||||
bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \ |
||||
bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1] |
||||
|
||||
index[k] = ct_int[1] * output_w + ct_int[0] |
||||
center_offset[k] = ct - ct_int |
||||
index_mask[k] = 1 |
||||
reid[k] = ide |
||||
bbox_xys[k] = bbox_xy |
||||
if self.num_classes > 1: |
||||
cls_id_map[ct_int[1], ct_int[0]] = cls_id |
||||
cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1 |
||||
# track id start from 0 |
||||
|
||||
sample['heatmap'] = heatmap |
||||
sample['index'] = index |
||||
sample['offset'] = center_offset |
||||
sample['size'] = bbox_size |
||||
sample['index_mask'] = index_mask |
||||
sample['reid'] = reid |
||||
if self.num_classes > 1: |
||||
sample['cls_id_map'] = cls_id_map |
||||
sample['cls_tr_ids'] = cls_tr_ids |
||||
sample['bbox_xys'] = bbox_xys |
||||
sample.pop('is_crowd', None) |
||||
sample.pop('difficult', None) |
||||
sample.pop('gt_class', None) |
||||
sample.pop('gt_bbox', None) |
||||
sample.pop('gt_score', None) |
||||
sample.pop('gt_ide', None) |
||||
return samples |
@ -0,0 +1,498 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
# this file contains helper methods for BBOX processing |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import numpy as np |
||||
import random |
||||
import math |
||||
import cv2 |
||||
|
||||
|
||||
def meet_emit_constraint(src_bbox, sample_bbox): |
||||
center_x = (src_bbox[2] + src_bbox[0]) / 2 |
||||
center_y = (src_bbox[3] + src_bbox[1]) / 2 |
||||
if center_x >= sample_bbox[0] and \ |
||||
center_x <= sample_bbox[2] and \ |
||||
center_y >= sample_bbox[1] and \ |
||||
center_y <= sample_bbox[3]: |
||||
return True |
||||
return False |
||||
|
||||
|
||||
def clip_bbox(src_bbox): |
||||
src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0) |
||||
src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0) |
||||
src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0) |
||||
src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0) |
||||
return src_bbox |
||||
|
||||
|
||||
def bbox_area(src_bbox): |
||||
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]: |
||||
return 0. |
||||
else: |
||||
width = src_bbox[2] - src_bbox[0] |
||||
height = src_bbox[3] - src_bbox[1] |
||||
return width * height |
||||
|
||||
|
||||
def is_overlap(object_bbox, sample_bbox): |
||||
if object_bbox[0] >= sample_bbox[2] or \ |
||||
object_bbox[2] <= sample_bbox[0] or \ |
||||
object_bbox[1] >= sample_bbox[3] or \ |
||||
object_bbox[3] <= sample_bbox[1]: |
||||
return False |
||||
else: |
||||
return True |
||||
|
||||
|
||||
def filter_and_process(sample_bbox, |
||||
bboxes, |
||||
labels, |
||||
scores=None, |
||||
keypoints=None): |
||||
new_bboxes = [] |
||||
new_labels = [] |
||||
new_scores = [] |
||||
new_keypoints = [] |
||||
new_kp_ignore = [] |
||||
for i in range(len(bboxes)): |
||||
new_bbox = [0, 0, 0, 0] |
||||
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]] |
||||
if not meet_emit_constraint(obj_bbox, sample_bbox): |
||||
continue |
||||
if not is_overlap(obj_bbox, sample_bbox): |
||||
continue |
||||
sample_width = sample_bbox[2] - sample_bbox[0] |
||||
sample_height = sample_bbox[3] - sample_bbox[1] |
||||
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width |
||||
new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height |
||||
new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width |
||||
new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height |
||||
new_bbox = clip_bbox(new_bbox) |
||||
if bbox_area(new_bbox) > 0: |
||||
new_bboxes.append(new_bbox) |
||||
new_labels.append([labels[i][0]]) |
||||
if scores is not None: |
||||
new_scores.append([scores[i][0]]) |
||||
if keypoints is not None: |
||||
sample_keypoint = keypoints[0][i] |
||||
for j in range(len(sample_keypoint)): |
||||
kp_len = sample_height if j % 2 else sample_width |
||||
sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0] |
||||
sample_keypoint[j] = ( |
||||
sample_keypoint[j] - sample_coord) / kp_len |
||||
sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0) |
||||
new_keypoints.append(sample_keypoint) |
||||
new_kp_ignore.append(keypoints[1][i]) |
||||
|
||||
bboxes = np.array(new_bboxes) |
||||
labels = np.array(new_labels) |
||||
scores = np.array(new_scores) |
||||
if keypoints is not None: |
||||
keypoints = np.array(new_keypoints) |
||||
new_kp_ignore = np.array(new_kp_ignore) |
||||
return bboxes, labels, scores, (keypoints, new_kp_ignore) |
||||
return bboxes, labels, scores |
||||
|
||||
|
||||
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size): |
||||
new_bboxes = [] |
||||
new_labels = [] |
||||
new_scores = [] |
||||
for i, bbox in enumerate(bboxes): |
||||
w = float((bbox[2] - bbox[0]) * target_size) |
||||
h = float((bbox[3] - bbox[1]) * target_size) |
||||
if w * h < float(min_size * min_size): |
||||
continue |
||||
else: |
||||
new_bboxes.append(bbox) |
||||
new_labels.append(labels[i]) |
||||
if scores is not None and scores.size != 0: |
||||
new_scores.append(scores[i]) |
||||
bboxes = np.array(new_bboxes) |
||||
labels = np.array(new_labels) |
||||
scores = np.array(new_scores) |
||||
return bboxes, labels, scores |
||||
|
||||
|
||||
def generate_sample_bbox(sampler): |
||||
scale = np.random.uniform(sampler[2], sampler[3]) |
||||
aspect_ratio = np.random.uniform(sampler[4], sampler[5]) |
||||
aspect_ratio = max(aspect_ratio, (scale**2.0)) |
||||
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) |
||||
bbox_width = scale * (aspect_ratio**0.5) |
||||
bbox_height = scale / (aspect_ratio**0.5) |
||||
xmin_bound = 1 - bbox_width |
||||
ymin_bound = 1 - bbox_height |
||||
xmin = np.random.uniform(0, xmin_bound) |
||||
ymin = np.random.uniform(0, ymin_bound) |
||||
xmax = xmin + bbox_width |
||||
ymax = ymin + bbox_height |
||||
sampled_bbox = [xmin, ymin, xmax, ymax] |
||||
return sampled_bbox |
||||
|
||||
|
||||
def generate_sample_bbox_square(sampler, image_width, image_height): |
||||
scale = np.random.uniform(sampler[2], sampler[3]) |
||||
aspect_ratio = np.random.uniform(sampler[4], sampler[5]) |
||||
aspect_ratio = max(aspect_ratio, (scale**2.0)) |
||||
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) |
||||
bbox_width = scale * (aspect_ratio**0.5) |
||||
bbox_height = scale / (aspect_ratio**0.5) |
||||
if image_height < image_width: |
||||
bbox_width = bbox_height * image_height / image_width |
||||
else: |
||||
bbox_height = bbox_width * image_width / image_height |
||||
xmin_bound = 1 - bbox_width |
||||
ymin_bound = 1 - bbox_height |
||||
xmin = np.random.uniform(0, xmin_bound) |
||||
ymin = np.random.uniform(0, ymin_bound) |
||||
xmax = xmin + bbox_width |
||||
ymax = ymin + bbox_height |
||||
sampled_bbox = [xmin, ymin, xmax, ymax] |
||||
return sampled_bbox |
||||
|
||||
|
||||
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array, |
||||
resize_width): |
||||
num_gt = len(bbox_labels) |
||||
# np.random.randint range: [low, high) |
||||
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0 |
||||
|
||||
if num_gt != 0: |
||||
norm_xmin = bbox_labels[rand_idx][0] |
||||
norm_ymin = bbox_labels[rand_idx][1] |
||||
norm_xmax = bbox_labels[rand_idx][2] |
||||
norm_ymax = bbox_labels[rand_idx][3] |
||||
|
||||
xmin = norm_xmin * image_width |
||||
ymin = norm_ymin * image_height |
||||
wid = image_width * (norm_xmax - norm_xmin) |
||||
hei = image_height * (norm_ymax - norm_ymin) |
||||
range_size = 0 |
||||
|
||||
area = wid * hei |
||||
for scale_ind in range(0, len(scale_array) - 1): |
||||
if area > scale_array[scale_ind] ** 2 and area < \ |
||||
scale_array[scale_ind + 1] ** 2: |
||||
range_size = scale_ind + 1 |
||||
break |
||||
|
||||
if area > scale_array[len(scale_array) - 2]**2: |
||||
range_size = len(scale_array) - 2 |
||||
|
||||
scale_choose = 0.0 |
||||
if range_size == 0: |
||||
rand_idx_size = 0 |
||||
else: |
||||
# np.random.randint range: [low, high) |
||||
rng_rand_size = np.random.randint(0, range_size + 1) |
||||
rand_idx_size = rng_rand_size % (range_size + 1) |
||||
|
||||
if rand_idx_size == range_size: |
||||
min_resize_val = scale_array[rand_idx_size] / 2.0 |
||||
max_resize_val = min(2.0 * scale_array[rand_idx_size], |
||||
2 * math.sqrt(wid * hei)) |
||||
scale_choose = random.uniform(min_resize_val, max_resize_val) |
||||
else: |
||||
min_resize_val = scale_array[rand_idx_size] / 2.0 |
||||
max_resize_val = 2.0 * scale_array[rand_idx_size] |
||||
scale_choose = random.uniform(min_resize_val, max_resize_val) |
||||
|
||||
sample_bbox_size = wid * resize_width / scale_choose |
||||
|
||||
w_off_orig = 0.0 |
||||
h_off_orig = 0.0 |
||||
if sample_bbox_size < max(image_height, image_width): |
||||
if wid <= sample_bbox_size: |
||||
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size, |
||||
xmin) |
||||
else: |
||||
w_off_orig = np.random.uniform(xmin, |
||||
xmin + wid - sample_bbox_size) |
||||
|
||||
if hei <= sample_bbox_size: |
||||
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size, |
||||
ymin) |
||||
else: |
||||
h_off_orig = np.random.uniform(ymin, |
||||
ymin + hei - sample_bbox_size) |
||||
|
||||
else: |
||||
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0) |
||||
h_off_orig = np.random.uniform(image_height - sample_bbox_size, |
||||
0.0) |
||||
|
||||
w_off_orig = math.floor(w_off_orig) |
||||
h_off_orig = math.floor(h_off_orig) |
||||
|
||||
# Figure out top left coordinates. |
||||
w_off = float(w_off_orig / image_width) |
||||
h_off = float(h_off_orig / image_height) |
||||
|
||||
sampled_bbox = [ |
||||
w_off, h_off, w_off + float(sample_bbox_size / image_width), |
||||
h_off + float(sample_bbox_size / image_height) |
||||
] |
||||
return sampled_bbox |
||||
else: |
||||
return 0 |
||||
|
||||
|
||||
def jaccard_overlap(sample_bbox, object_bbox): |
||||
if sample_bbox[0] >= object_bbox[2] or \ |
||||
sample_bbox[2] <= object_bbox[0] or \ |
||||
sample_bbox[1] >= object_bbox[3] or \ |
||||
sample_bbox[3] <= object_bbox[1]: |
||||
return 0 |
||||
intersect_xmin = max(sample_bbox[0], object_bbox[0]) |
||||
intersect_ymin = max(sample_bbox[1], object_bbox[1]) |
||||
intersect_xmax = min(sample_bbox[2], object_bbox[2]) |
||||
intersect_ymax = min(sample_bbox[3], object_bbox[3]) |
||||
intersect_size = (intersect_xmax - intersect_xmin) * ( |
||||
intersect_ymax - intersect_ymin) |
||||
sample_bbox_size = bbox_area(sample_bbox) |
||||
object_bbox_size = bbox_area(object_bbox) |
||||
overlap = intersect_size / ( |
||||
sample_bbox_size + object_bbox_size - intersect_size) |
||||
return overlap |
||||
|
||||
|
||||
def intersect_bbox(bbox1, bbox2): |
||||
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \ |
||||
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]: |
||||
intersection_box = [0.0, 0.0, 0.0, 0.0] |
||||
else: |
||||
intersection_box = [ |
||||
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]), |
||||
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3]) |
||||
] |
||||
return intersection_box |
||||
|
||||
|
||||
def bbox_coverage(bbox1, bbox2): |
||||
inter_box = intersect_bbox(bbox1, bbox2) |
||||
intersect_size = bbox_area(inter_box) |
||||
|
||||
if intersect_size > 0: |
||||
bbox1_size = bbox_area(bbox1) |
||||
return intersect_size / bbox1_size |
||||
else: |
||||
return 0. |
||||
|
||||
|
||||
def satisfy_sample_constraint(sampler, |
||||
sample_bbox, |
||||
gt_bboxes, |
||||
satisfy_all=False): |
||||
if sampler[6] == 0 and sampler[7] == 0: |
||||
return True |
||||
satisfied = [] |
||||
for i in range(len(gt_bboxes)): |
||||
object_bbox = [ |
||||
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3] |
||||
] |
||||
overlap = jaccard_overlap(sample_bbox, object_bbox) |
||||
if sampler[6] != 0 and \ |
||||
overlap < sampler[6]: |
||||
satisfied.append(False) |
||||
continue |
||||
if sampler[7] != 0 and \ |
||||
overlap > sampler[7]: |
||||
satisfied.append(False) |
||||
continue |
||||
satisfied.append(True) |
||||
if not satisfy_all: |
||||
return True |
||||
|
||||
if satisfy_all: |
||||
return np.all(satisfied) |
||||
else: |
||||
return False |
||||
|
||||
|
||||
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes): |
||||
if sampler[6] == 0 and sampler[7] == 0: |
||||
has_jaccard_overlap = False |
||||
else: |
||||
has_jaccard_overlap = True |
||||
if sampler[8] == 0 and sampler[9] == 0: |
||||
has_object_coverage = False |
||||
else: |
||||
has_object_coverage = True |
||||
|
||||
if not has_jaccard_overlap and not has_object_coverage: |
||||
return True |
||||
found = False |
||||
for i in range(len(gt_bboxes)): |
||||
object_bbox = [ |
||||
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3] |
||||
] |
||||
if has_jaccard_overlap: |
||||
overlap = jaccard_overlap(sample_bbox, object_bbox) |
||||
if sampler[6] != 0 and \ |
||||
overlap < sampler[6]: |
||||
continue |
||||
if sampler[7] != 0 and \ |
||||
overlap > sampler[7]: |
||||
continue |
||||
found = True |
||||
if has_object_coverage: |
||||
object_coverage = bbox_coverage(object_bbox, sample_bbox) |
||||
if sampler[8] != 0 and \ |
||||
object_coverage < sampler[8]: |
||||
continue |
||||
if sampler[9] != 0 and \ |
||||
object_coverage > sampler[9]: |
||||
continue |
||||
found = True |
||||
if found: |
||||
return True |
||||
return found |
||||
|
||||
|
||||
def crop_image_sampling(img, sample_bbox, image_width, image_height, |
||||
target_size): |
||||
# no clipping here |
||||
xmin = int(sample_bbox[0] * image_width) |
||||
xmax = int(sample_bbox[2] * image_width) |
||||
ymin = int(sample_bbox[1] * image_height) |
||||
ymax = int(sample_bbox[3] * image_height) |
||||
|
||||
w_off = xmin |
||||
h_off = ymin |
||||
width = xmax - xmin |
||||
height = ymax - ymin |
||||
cross_xmin = max(0.0, float(w_off)) |
||||
cross_ymin = max(0.0, float(h_off)) |
||||
cross_xmax = min(float(w_off + width - 1.0), float(image_width)) |
||||
cross_ymax = min(float(h_off + height - 1.0), float(image_height)) |
||||
cross_width = cross_xmax - cross_xmin |
||||
cross_height = cross_ymax - cross_ymin |
||||
|
||||
roi_xmin = 0 if w_off >= 0 else abs(w_off) |
||||
roi_ymin = 0 if h_off >= 0 else abs(h_off) |
||||
roi_width = cross_width |
||||
roi_height = cross_height |
||||
|
||||
roi_y1 = int(roi_ymin) |
||||
roi_y2 = int(roi_ymin + roi_height) |
||||
roi_x1 = int(roi_xmin) |
||||
roi_x2 = int(roi_xmin + roi_width) |
||||
|
||||
cross_y1 = int(cross_ymin) |
||||
cross_y2 = int(cross_ymin + cross_height) |
||||
cross_x1 = int(cross_xmin) |
||||
cross_x2 = int(cross_xmin + cross_width) |
||||
|
||||
sample_img = np.zeros((height, width, 3)) |
||||
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \ |
||||
img[cross_y1: cross_y2, cross_x1: cross_x2] |
||||
|
||||
sample_img = cv2.resize( |
||||
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA) |
||||
|
||||
return sample_img |
||||
|
||||
|
||||
def is_poly(segm): |
||||
assert isinstance(segm, (list, dict)), \ |
||||
"Invalid segm type: {}".format(type(segm)) |
||||
return isinstance(segm, list) |
||||
|
||||
|
||||
def gaussian_radius(bbox_size, min_overlap): |
||||
height, width = bbox_size |
||||
|
||||
a1 = 1 |
||||
b1 = (height + width) |
||||
c1 = width * height * (1 - min_overlap) / (1 + min_overlap) |
||||
sq1 = np.sqrt(b1**2 - 4 * a1 * c1) |
||||
radius1 = (b1 + sq1) / (2 * a1) |
||||
|
||||
a2 = 4 |
||||
b2 = 2 * (height + width) |
||||
c2 = (1 - min_overlap) * width * height |
||||
sq2 = np.sqrt(b2**2 - 4 * a2 * c2) |
||||
radius2 = (b2 + sq2) / 2 |
||||
|
||||
a3 = 4 * min_overlap |
||||
b3 = -2 * min_overlap * (height + width) |
||||
c3 = (min_overlap - 1) * width * height |
||||
sq3 = np.sqrt(b3**2 - 4 * a3 * c3) |
||||
radius3 = (b3 + sq3) / 2 |
||||
return min(radius1, radius2, radius3) |
||||
|
||||
|
||||
def draw_gaussian(heatmap, center, radius, k=1, delte=6): |
||||
diameter = 2 * radius + 1 |
||||
sigma = diameter / delte |
||||
gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma) |
||||
|
||||
x, y = center |
||||
|
||||
height, width = heatmap.shape[0:2] |
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1) |
||||
top, bottom = min(y, radius), min(height - y, radius + 1) |
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] |
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left: |
||||
radius + right] |
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) |
||||
|
||||
|
||||
def gaussian2D(shape, sigma_x=1, sigma_y=1): |
||||
m, n = [(ss - 1.) / 2. for ss in shape] |
||||
y, x = np.ogrid[-m:m + 1, -n:n + 1] |
||||
|
||||
h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y * |
||||
sigma_y))) |
||||
h[h < np.finfo(h.dtype).eps * h.max()] = 0 |
||||
return h |
||||
|
||||
|
||||
def draw_umich_gaussian(heatmap, center, radius, k=1): |
||||
""" |
||||
draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126 |
||||
""" |
||||
diameter = 2 * radius + 1 |
||||
gaussian = gaussian2D( |
||||
(diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6) |
||||
|
||||
x, y = int(center[0]), int(center[1]) |
||||
|
||||
height, width = heatmap.shape[0:2] |
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1) |
||||
top, bottom = min(y, radius), min(height - y, radius + 1) |
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] |
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left: |
||||
radius + right] |
||||
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: |
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) |
||||
return heatmap |
||||
|
||||
|
||||
def get_border(border, size): |
||||
i = 1 |
||||
while size - border // i <= border // i: |
||||
i *= 2 |
||||
return border // i |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,30 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import trainer |
||||
from .trainer import * |
||||
|
||||
from . import callbacks |
||||
from .callbacks import * |
||||
|
||||
from . import env |
||||
from .env import * |
||||
|
||||
__all__ = trainer.__all__ \ |
||||
+ callbacks.__all__ \ |
||||
+ env.__all__ |
||||
|
||||
from . import tracker |
||||
from .tracker import * |
||||
__all__ = __all__ + tracker.__all__ |
@ -0,0 +1,340 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import sys |
||||
import datetime |
||||
import six |
||||
import copy |
||||
import json |
||||
|
||||
import paddle |
||||
import paddle.distributed as dist |
||||
|
||||
from paddlers.models.ppdet.utils.checkpoint import save_model |
||||
from paddlers.models.ppdet.metrics import get_infer_results |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger('ppdet.engine') |
||||
|
||||
__all__ = [ |
||||
'Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer', |
||||
'VisualDLWriter', 'SniperProposalsGenerator' |
||||
] |
||||
|
||||
|
||||
class Callback(object): |
||||
def __init__(self, model): |
||||
self.model = model |
||||
|
||||
def on_step_begin(self, status): |
||||
pass |
||||
|
||||
def on_step_end(self, status): |
||||
pass |
||||
|
||||
def on_epoch_begin(self, status): |
||||
pass |
||||
|
||||
def on_epoch_end(self, status): |
||||
pass |
||||
|
||||
def on_train_begin(self, status): |
||||
pass |
||||
|
||||
def on_train_end(self, status): |
||||
pass |
||||
|
||||
|
||||
class ComposeCallback(object): |
||||
def __init__(self, callbacks): |
||||
callbacks = [c for c in list(callbacks) if c is not None] |
||||
for c in callbacks: |
||||
assert isinstance( |
||||
c, Callback), "callback should be subclass of Callback" |
||||
self._callbacks = callbacks |
||||
|
||||
def on_step_begin(self, status): |
||||
for c in self._callbacks: |
||||
c.on_step_begin(status) |
||||
|
||||
def on_step_end(self, status): |
||||
for c in self._callbacks: |
||||
c.on_step_end(status) |
||||
|
||||
def on_epoch_begin(self, status): |
||||
for c in self._callbacks: |
||||
c.on_epoch_begin(status) |
||||
|
||||
def on_epoch_end(self, status): |
||||
for c in self._callbacks: |
||||
c.on_epoch_end(status) |
||||
|
||||
def on_train_begin(self, status): |
||||
for c in self._callbacks: |
||||
c.on_train_begin(status) |
||||
|
||||
def on_train_end(self, status): |
||||
for c in self._callbacks: |
||||
c.on_train_end(status) |
||||
|
||||
|
||||
class LogPrinter(Callback): |
||||
def __init__(self, model): |
||||
super(LogPrinter, self).__init__(model) |
||||
|
||||
def on_step_end(self, status): |
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0: |
||||
mode = status['mode'] |
||||
if mode == 'train': |
||||
epoch_id = status['epoch_id'] |
||||
step_id = status['step_id'] |
||||
steps_per_epoch = status['steps_per_epoch'] |
||||
training_staus = status['training_staus'] |
||||
batch_time = status['batch_time'] |
||||
data_time = status['data_time'] |
||||
|
||||
epoches = self.model.cfg.epoch |
||||
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize( |
||||
))]['batch_size'] |
||||
|
||||
logs = training_staus.log() |
||||
space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd' |
||||
if step_id % self.model.cfg.log_iter == 0: |
||||
eta_steps = (epoches - epoch_id |
||||
) * steps_per_epoch - step_id |
||||
eta_sec = eta_steps * batch_time.global_avg |
||||
eta_str = str(datetime.timedelta(seconds=int(eta_sec))) |
||||
ips = float(batch_size) / batch_time.avg |
||||
fmt = ' '.join([ |
||||
'Epoch: [{}]', |
||||
'[{' + space_fmt + '}/{}]', |
||||
'learning_rate: {lr:.6f}', |
||||
'{meters}', |
||||
'eta: {eta}', |
||||
'batch_cost: {btime}', |
||||
'data_cost: {dtime}', |
||||
'ips: {ips:.4f} images/s', |
||||
]) |
||||
fmt = fmt.format( |
||||
epoch_id, |
||||
step_id, |
||||
steps_per_epoch, |
||||
lr=status['learning_rate'], |
||||
meters=logs, |
||||
eta=eta_str, |
||||
btime=str(batch_time), |
||||
dtime=str(data_time), |
||||
ips=ips) |
||||
logger.info(fmt) |
||||
if mode == 'eval': |
||||
step_id = status['step_id'] |
||||
if step_id % 100 == 0: |
||||
logger.info("Eval iter: {}".format(step_id)) |
||||
|
||||
def on_epoch_end(self, status): |
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0: |
||||
mode = status['mode'] |
||||
if mode == 'eval': |
||||
sample_num = status['sample_num'] |
||||
cost_time = status['cost_time'] |
||||
logger.info('Total sample number: {}, averge FPS: {}'.format( |
||||
sample_num, sample_num / cost_time)) |
||||
|
||||
|
||||
class Checkpointer(Callback): |
||||
def __init__(self, model): |
||||
super(Checkpointer, self).__init__(model) |
||||
cfg = self.model.cfg |
||||
self.best_ap = 0. |
||||
self.save_dir = os.path.join(self.model.cfg.save_dir, |
||||
self.model.cfg.filename) |
||||
if hasattr(self.model.model, 'student_model'): |
||||
self.weight = self.model.model.student_model |
||||
else: |
||||
self.weight = self.model.model |
||||
|
||||
def on_epoch_end(self, status): |
||||
# Checkpointer only performed during training |
||||
mode = status['mode'] |
||||
epoch_id = status['epoch_id'] |
||||
weight = None |
||||
save_name = None |
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0: |
||||
if mode == 'train': |
||||
end_epoch = self.model.cfg.epoch |
||||
if ( |
||||
epoch_id + 1 |
||||
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1: |
||||
save_name = str( |
||||
epoch_id |
||||
) if epoch_id != end_epoch - 1 else "model_final" |
||||
weight = self.weight |
||||
elif mode == 'eval': |
||||
if 'save_best_model' in status and status['save_best_model']: |
||||
for metric in self.model._metrics: |
||||
map_res = metric.get_results() |
||||
if 'bbox' in map_res: |
||||
key = 'bbox' |
||||
elif 'keypoint' in map_res: |
||||
key = 'keypoint' |
||||
else: |
||||
key = 'mask' |
||||
if key not in map_res: |
||||
logger.warning("Evaluation results empty, this may be due to " \ |
||||
"training iterations being too few or not " \ |
||||
"loading the correct weights.") |
||||
return |
||||
if map_res[key][0] > self.best_ap: |
||||
self.best_ap = map_res[key][0] |
||||
save_name = 'best_model' |
||||
weight = self.weight |
||||
logger.info("Best test {} ap is {:0.3f}.".format( |
||||
key, self.best_ap)) |
||||
if weight: |
||||
save_model(weight, self.model.optimizer, self.save_dir, |
||||
save_name, epoch_id + 1) |
||||
|
||||
|
||||
class WiferFaceEval(Callback): |
||||
def __init__(self, model): |
||||
super(WiferFaceEval, self).__init__(model) |
||||
|
||||
def on_epoch_begin(self, status): |
||||
assert self.model.mode == 'eval', \ |
||||
"WiferFaceEval can only be set during evaluation" |
||||
for metric in self.model._metrics: |
||||
metric.update(self.model.model) |
||||
sys.exit() |
||||
|
||||
|
||||
class VisualDLWriter(Callback): |
||||
""" |
||||
Use VisualDL to log data or image |
||||
""" |
||||
|
||||
def __init__(self, model): |
||||
super(VisualDLWriter, self).__init__(model) |
||||
|
||||
assert six.PY3, "VisualDL requires Python >= 3.5" |
||||
try: |
||||
from visualdl import LogWriter |
||||
except Exception as e: |
||||
logger.error('visualdl not found, plaese install visualdl. ' |
||||
'for example: `pip install visualdl`.') |
||||
raise e |
||||
self.vdl_writer = LogWriter( |
||||
model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar')) |
||||
self.vdl_loss_step = 0 |
||||
self.vdl_mAP_step = 0 |
||||
self.vdl_image_step = 0 |
||||
self.vdl_image_frame = 0 |
||||
|
||||
def on_step_end(self, status): |
||||
mode = status['mode'] |
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0: |
||||
if mode == 'train': |
||||
training_staus = status['training_staus'] |
||||
for loss_name, loss_value in training_staus.get().items(): |
||||
self.vdl_writer.add_scalar(loss_name, loss_value, |
||||
self.vdl_loss_step) |
||||
self.vdl_loss_step += 1 |
||||
elif mode == 'test': |
||||
ori_image = status['original_image'] |
||||
result_image = status['result_image'] |
||||
self.vdl_writer.add_image( |
||||
"original/frame_{}".format(self.vdl_image_frame), |
||||
ori_image, self.vdl_image_step) |
||||
self.vdl_writer.add_image( |
||||
"result/frame_{}".format(self.vdl_image_frame), |
||||
result_image, self.vdl_image_step) |
||||
self.vdl_image_step += 1 |
||||
# each frame can display ten pictures at most. |
||||
if self.vdl_image_step % 10 == 0: |
||||
self.vdl_image_step = 0 |
||||
self.vdl_image_frame += 1 |
||||
|
||||
def on_epoch_end(self, status): |
||||
mode = status['mode'] |
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0: |
||||
if mode == 'eval': |
||||
for metric in self.model._metrics: |
||||
for key, map_value in metric.get_results().items(): |
||||
self.vdl_writer.add_scalar("{}-mAP".format(key), |
||||
map_value[0], |
||||
self.vdl_mAP_step) |
||||
self.vdl_mAP_step += 1 |
||||
|
||||
|
||||
class SniperProposalsGenerator(Callback): |
||||
def __init__(self, model): |
||||
super(SniperProposalsGenerator, self).__init__(model) |
||||
ori_dataset = self.model.dataset |
||||
self.dataset = self._create_new_dataset(ori_dataset) |
||||
self.loader = self.model.loader |
||||
self.cfg = self.model.cfg |
||||
self.infer_model = self.model.model |
||||
|
||||
def _create_new_dataset(self, ori_dataset): |
||||
dataset = copy.deepcopy(ori_dataset) |
||||
# init anno_cropper |
||||
dataset.init_anno_cropper() |
||||
# generate infer roidbs |
||||
ori_roidbs = dataset.get_ori_roidbs() |
||||
roidbs = dataset.anno_cropper.crop_infer_anno_records(ori_roidbs) |
||||
# set new roidbs |
||||
dataset.set_roidbs(roidbs) |
||||
|
||||
return dataset |
||||
|
||||
def _eval_with_loader(self, loader): |
||||
results = [] |
||||
with paddle.no_grad(): |
||||
self.infer_model.eval() |
||||
for step_id, data in enumerate(loader): |
||||
outs = self.infer_model(data) |
||||
for key in ['im_shape', 'scale_factor', 'im_id']: |
||||
outs[key] = data[key] |
||||
for key, value in outs.items(): |
||||
if hasattr(value, 'numpy'): |
||||
outs[key] = value.numpy() |
||||
|
||||
results.append(outs) |
||||
|
||||
return results |
||||
|
||||
def on_train_end(self, status): |
||||
self.loader.dataset = self.dataset |
||||
results = self._eval_with_loader(self.loader) |
||||
results = self.dataset.anno_cropper.aggregate_chips_detections(results) |
||||
# sniper |
||||
proposals = [] |
||||
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} |
||||
for outs in results: |
||||
batch_res = get_infer_results(outs, clsid2catid) |
||||
start = 0 |
||||
for i, im_id in enumerate(outs['im_id']): |
||||
bbox_num = outs['bbox_num'] |
||||
end = start + bbox_num[i] |
||||
bbox_res = batch_res['bbox'][start:end] \ |
||||
if 'bbox' in batch_res else None |
||||
if bbox_res: |
||||
proposals += bbox_res |
||||
logger.info("save proposals in {}".format(self.cfg.proposals_path)) |
||||
with open(self.cfg.proposals_path, 'w') as f: |
||||
json.dump(proposals, f) |
@ -0,0 +1,50 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import random |
||||
import numpy as np |
||||
|
||||
import paddle |
||||
from paddle.distributed import fleet |
||||
|
||||
__all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env'] |
||||
|
||||
|
||||
def init_fleet_env(find_unused_parameters=False): |
||||
strategy = fleet.DistributedStrategy() |
||||
strategy.find_unused_parameters = find_unused_parameters |
||||
fleet.init(is_collective=True, strategy=strategy) |
||||
|
||||
|
||||
def init_parallel_env(): |
||||
env = os.environ |
||||
dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env |
||||
if dist: |
||||
trainer_id = int(env['PADDLE_TRAINER_ID']) |
||||
local_seed = (99 + trainer_id) |
||||
random.seed(local_seed) |
||||
np.random.seed(local_seed) |
||||
|
||||
paddle.distributed.init_parallel_env() |
||||
|
||||
|
||||
def set_random_seed(seed): |
||||
paddle.seed(seed) |
||||
random.seed(seed) |
||||
np.random.seed(seed) |
@ -0,0 +1,177 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import yaml |
||||
from collections import OrderedDict |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.data.source.category import get_categories |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger('ppdet.engine') |
||||
|
||||
# Global dictionary |
||||
TRT_MIN_SUBGRAPH = { |
||||
'YOLO': 3, |
||||
'SSD': 60, |
||||
'RCNN': 40, |
||||
'RetinaNet': 40, |
||||
'S2ANet': 80, |
||||
'EfficientDet': 40, |
||||
'Face': 3, |
||||
'TTFNet': 60, |
||||
'FCOS': 16, |
||||
'SOLOv2': 60, |
||||
'HigherHRNet': 3, |
||||
'HRNet': 3, |
||||
'DeepSORT': 3, |
||||
'JDE': 10, |
||||
'FairMOT': 5, |
||||
'GFL': 16, |
||||
'PicoDet': 3, |
||||
'CenterNet': 5, |
||||
'TOOD': 5, |
||||
} |
||||
|
||||
KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet'] |
||||
MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT'] |
||||
|
||||
|
||||
def _prune_input_spec(input_spec, program, targets): |
||||
# try to prune static program to figure out pruned input spec |
||||
# so we perform following operations in static mode |
||||
paddle.enable_static() |
||||
pruned_input_spec = [{}] |
||||
program = program.clone() |
||||
program = program._prune(targets=targets) |
||||
global_block = program.global_block() |
||||
for name, spec in input_spec[0].items(): |
||||
try: |
||||
v = global_block.var(name) |
||||
pruned_input_spec[0][name] = spec |
||||
except Exception: |
||||
pass |
||||
paddle.disable_static() |
||||
return pruned_input_spec |
||||
|
||||
|
||||
def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): |
||||
preprocess_list = [] |
||||
|
||||
anno_file = dataset_cfg.get_anno() |
||||
|
||||
clsid2catid, catid2name = get_categories(metric, anno_file, arch) |
||||
|
||||
label_list = [str(cat) for cat in catid2name.values()] |
||||
|
||||
fuse_normalize = reader_cfg.get('fuse_normalize', False) |
||||
sample_transforms = reader_cfg['sample_transforms'] |
||||
for st in sample_transforms[1:]: |
||||
for key, value in st.items(): |
||||
p = {'type': key} |
||||
if key == 'Resize': |
||||
if int(image_shape[1]) != -1: |
||||
value['target_size'] = image_shape[1:] |
||||
if fuse_normalize and key == 'NormalizeImage': |
||||
continue |
||||
p.update(value) |
||||
preprocess_list.append(p) |
||||
batch_transforms = reader_cfg.get('batch_transforms', None) |
||||
if batch_transforms: |
||||
for bt in batch_transforms: |
||||
for key, value in bt.items(): |
||||
# for deploy/infer, use PadStride(stride) instead PadBatch(pad_to_stride) |
||||
if key == 'PadBatch': |
||||
preprocess_list.append({ |
||||
'type': 'PadStride', |
||||
'stride': value['pad_to_stride'] |
||||
}) |
||||
break |
||||
|
||||
return preprocess_list, label_list |
||||
|
||||
|
||||
def _parse_tracker(tracker_cfg): |
||||
tracker_params = {} |
||||
for k, v in tracker_cfg.items(): |
||||
tracker_params.update({k: v}) |
||||
return tracker_params |
||||
|
||||
|
||||
def _dump_infer_config(config, path, image_shape, model): |
||||
arch_state = False |
||||
from paddlers.models.ppdet.core.config.yaml_helpers import setup_orderdict |
||||
setup_orderdict() |
||||
use_dynamic_shape = True if image_shape[2] == -1 else False |
||||
infer_cfg = OrderedDict({ |
||||
'mode': 'fluid', |
||||
'draw_threshold': 0.5, |
||||
'metric': config['metric'], |
||||
'use_dynamic_shape': use_dynamic_shape |
||||
}) |
||||
infer_arch = config['architecture'] |
||||
|
||||
if infer_arch in MOT_ARCH: |
||||
if infer_arch == 'DeepSORT': |
||||
tracker_cfg = config['DeepSORTTracker'] |
||||
else: |
||||
tracker_cfg = config['JDETracker'] |
||||
infer_cfg['tracker'] = _parse_tracker(tracker_cfg) |
||||
|
||||
for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items(): |
||||
if arch in infer_arch: |
||||
infer_cfg['arch'] = arch |
||||
infer_cfg['min_subgraph_size'] = min_subgraph_size |
||||
arch_state = True |
||||
break |
||||
if not arch_state: |
||||
logger.error( |
||||
'Architecture: {} is not supported for exporting model now.\n'. |
||||
format(infer_arch) + |
||||
'Please set TRT_MIN_SUBGRAPH in ppdet/engine/export_utils.py') |
||||
os._exit(0) |
||||
if 'mask_head' in config[config['architecture']] and config[config[ |
||||
'architecture']]['mask_head']: |
||||
infer_cfg['mask'] = True |
||||
label_arch = 'detection_arch' |
||||
if infer_arch in KEYPOINT_ARCH: |
||||
label_arch = 'keypoint_arch' |
||||
|
||||
if infer_arch in MOT_ARCH: |
||||
label_arch = 'mot_arch' |
||||
reader_cfg = config['TestMOTReader'] |
||||
dataset_cfg = config['TestMOTDataset'] |
||||
else: |
||||
reader_cfg = config['TestReader'] |
||||
dataset_cfg = config['TestDataset'] |
||||
|
||||
infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader( |
||||
reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:]) |
||||
|
||||
if infer_arch == 'PicoDet': |
||||
infer_cfg['NMS'] = config['PicoHead']['nms'] |
||||
# In order to speed up the prediction, the threshold of nms |
||||
# is adjusted here, which can be changed in infer_cfg.yml |
||||
config['PicoHead']['nms']["score_threshold"] = 0.3 |
||||
config['PicoHead']['nms']["nms_threshold"] = 0.5 |
||||
infer_cfg['fpn_stride'] = config['PicoHead']['fpn_stride'] |
||||
|
||||
yaml.dump(infer_cfg, open(path, 'w')) |
||||
logger.info("Export inference config file to {}".format( |
||||
os.path.join(path))) |
@ -0,0 +1,538 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import cv2 |
||||
import glob |
||||
import re |
||||
import paddle |
||||
import numpy as np |
||||
import os.path as osp |
||||
from collections import defaultdict |
||||
|
||||
from paddlers.models.ppdet.core.workspace import create |
||||
from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight |
||||
from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box |
||||
from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results |
||||
|
||||
from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric |
||||
from paddlers.models.ppdet.metrics import MCMOTMetric |
||||
|
||||
from .callbacks import Callback, ComposeCallback |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = ['Tracker'] |
||||
|
||||
|
||||
class Tracker(object): |
||||
def __init__(self, cfg, mode='eval'): |
||||
self.cfg = cfg |
||||
assert mode.lower() in ['test', 'eval'], \ |
||||
"mode should be 'test' or 'eval'" |
||||
self.mode = mode.lower() |
||||
self.optimizer = None |
||||
|
||||
# build MOT data loader |
||||
self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())] |
||||
|
||||
# build model |
||||
self.model = create(cfg.architecture) |
||||
|
||||
self.status = {} |
||||
self.start_epoch = 0 |
||||
|
||||
# initial default callbacks |
||||
self._init_callbacks() |
||||
|
||||
# initial default metrics |
||||
self._init_metrics() |
||||
self._reset_metrics() |
||||
|
||||
def _init_callbacks(self): |
||||
self._callbacks = [] |
||||
self._compose_callback = None |
||||
|
||||
def _init_metrics(self): |
||||
if self.mode in ['test']: |
||||
self._metrics = [] |
||||
return |
||||
|
||||
if self.cfg.metric == 'MOT': |
||||
self._metrics = [MOTMetric(), ] |
||||
elif self.cfg.metric == 'MCMOT': |
||||
self._metrics = [MCMOTMetric(self.cfg.num_classes), ] |
||||
elif self.cfg.metric == 'KITTI': |
||||
self._metrics = [KITTIMOTMetric(), ] |
||||
else: |
||||
logger.warning("Metric not support for metric type {}".format( |
||||
self.cfg.metric)) |
||||
self._metrics = [] |
||||
|
||||
def _reset_metrics(self): |
||||
for metric in self._metrics: |
||||
metric.reset() |
||||
|
||||
def register_callbacks(self, callbacks): |
||||
callbacks = [h for h in list(callbacks) if h is not None] |
||||
for c in callbacks: |
||||
assert isinstance(c, Callback), \ |
||||
"metrics shoule be instances of subclass of Metric" |
||||
self._callbacks.extend(callbacks) |
||||
self._compose_callback = ComposeCallback(self._callbacks) |
||||
|
||||
def register_metrics(self, metrics): |
||||
metrics = [m for m in list(metrics) if m is not None] |
||||
for m in metrics: |
||||
assert isinstance(m, Metric), \ |
||||
"metrics shoule be instances of subclass of Metric" |
||||
self._metrics.extend(metrics) |
||||
|
||||
def load_weights_jde(self, weights): |
||||
load_weight(self.model, weights, self.optimizer) |
||||
|
||||
def load_weights_sde(self, det_weights, reid_weights): |
||||
if self.model.detector: |
||||
load_weight(self.model.detector, det_weights) |
||||
load_weight(self.model.reid, reid_weights) |
||||
else: |
||||
load_weight(self.model.reid, reid_weights, self.optimizer) |
||||
|
||||
def _eval_seq_jde(self, |
||||
dataloader, |
||||
save_dir=None, |
||||
show_image=False, |
||||
frame_rate=30, |
||||
draw_threshold=0): |
||||
if save_dir: |
||||
if not os.path.exists(save_dir): os.makedirs(save_dir) |
||||
tracker = self.model.tracker |
||||
tracker.max_time_lost = int(frame_rate / 30.0 * tracker.track_buffer) |
||||
|
||||
timer = MOTTimer() |
||||
frame_id = 0 |
||||
self.status['mode'] = 'track' |
||||
self.model.eval() |
||||
results = defaultdict(list) # support single class and multi classes |
||||
|
||||
for step_id, data in enumerate(dataloader): |
||||
self.status['step_id'] = step_id |
||||
if frame_id % 40 == 0: |
||||
logger.info('Processing frame {} ({:.2f} fps)'.format( |
||||
frame_id, 1. / max(1e-5, timer.average_time))) |
||||
# forward |
||||
timer.tic() |
||||
pred_dets, pred_embs = self.model(data) |
||||
|
||||
pred_dets, pred_embs = pred_dets.numpy(), pred_embs.numpy() |
||||
online_targets_dict = self.model.tracker.update(pred_dets, |
||||
pred_embs) |
||||
online_tlwhs = defaultdict(list) |
||||
online_scores = defaultdict(list) |
||||
online_ids = defaultdict(list) |
||||
for cls_id in range(self.cfg.num_classes): |
||||
online_targets = online_targets_dict[cls_id] |
||||
for t in online_targets: |
||||
tlwh = t.tlwh |
||||
tid = t.track_id |
||||
tscore = t.score |
||||
if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue |
||||
if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ |
||||
3] > tracker.vertical_ratio: |
||||
continue |
||||
online_tlwhs[cls_id].append(tlwh) |
||||
online_ids[cls_id].append(tid) |
||||
online_scores[cls_id].append(tscore) |
||||
# save results |
||||
results[cls_id].append( |
||||
(frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id], |
||||
online_ids[cls_id])) |
||||
|
||||
timer.toc() |
||||
save_vis_results(data, frame_id, online_ids, online_tlwhs, |
||||
online_scores, timer.average_time, show_image, |
||||
save_dir, self.cfg.num_classes) |
||||
frame_id += 1 |
||||
|
||||
return results, frame_id, timer.average_time, timer.calls |
||||
|
||||
def _eval_seq_sde(self, |
||||
dataloader, |
||||
save_dir=None, |
||||
show_image=False, |
||||
frame_rate=30, |
||||
seq_name='', |
||||
scaled=False, |
||||
det_file='', |
||||
draw_threshold=0): |
||||
if save_dir: |
||||
if not os.path.exists(save_dir): os.makedirs(save_dir) |
||||
use_detector = False if not self.model.detector else True |
||||
|
||||
timer = MOTTimer() |
||||
results = defaultdict(list) |
||||
frame_id = 0 |
||||
self.status['mode'] = 'track' |
||||
self.model.eval() |
||||
self.model.reid.eval() |
||||
if not use_detector: |
||||
dets_list = load_det_results(det_file, len(dataloader)) |
||||
logger.info('Finish loading detection results file {}.'.format( |
||||
det_file)) |
||||
|
||||
for step_id, data in enumerate(dataloader): |
||||
self.status['step_id'] = step_id |
||||
if frame_id % 40 == 0: |
||||
logger.info('Processing frame {} ({:.2f} fps)'.format( |
||||
frame_id, 1. / max(1e-5, timer.average_time))) |
||||
|
||||
ori_image = data['ori_image'] # [bs, H, W, 3] |
||||
ori_image_shape = data['ori_image'].shape[1:3] |
||||
# ori_image_shape: [H, W] |
||||
|
||||
input_shape = data['image'].shape[2:] |
||||
# input_shape: [h, w], before data transforms, set in model config |
||||
|
||||
im_shape = data['im_shape'][0].numpy() |
||||
# im_shape: [new_h, new_w], after data transforms |
||||
scale_factor = data['scale_factor'][0].numpy() |
||||
|
||||
empty_detections = False |
||||
# when it has no detected bboxes, will not inference reid model |
||||
# and if visualize, use original image instead |
||||
|
||||
# forward |
||||
timer.tic() |
||||
if not use_detector: |
||||
dets = dets_list[frame_id] |
||||
bbox_tlwh = np.array(dets['bbox'], dtype='float32') |
||||
if bbox_tlwh.shape[0] > 0: |
||||
# detector outputs: pred_cls_ids, pred_scores, pred_bboxes |
||||
pred_cls_ids = np.array(dets['cls_id'], dtype='float32') |
||||
pred_scores = np.array(dets['score'], dtype='float32') |
||||
pred_bboxes = np.concatenate( |
||||
(bbox_tlwh[:, 0:2], |
||||
bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]), |
||||
axis=1) |
||||
else: |
||||
logger.warning( |
||||
'Frame {} has not object, try to modify score threshold.'. |
||||
format(frame_id)) |
||||
empty_detections = True |
||||
else: |
||||
outs = self.model.detector(data) |
||||
outs['bbox'] = outs['bbox'].numpy() |
||||
outs['bbox_num'] = outs['bbox_num'].numpy() |
||||
|
||||
if outs['bbox_num'] > 0 and empty_detections == False: |
||||
# detector outputs: pred_cls_ids, pred_scores, pred_bboxes |
||||
pred_cls_ids = outs['bbox'][:, 0:1] |
||||
pred_scores = outs['bbox'][:, 1:2] |
||||
if not scaled: |
||||
# Note: scaled=False only in JDE YOLOv3 or other detectors |
||||
# with LetterBoxResize and JDEBBoxPostProcess. |
||||
# |
||||
# 'scaled' means whether the coords after detector outputs |
||||
# have been scaled back to the original image, set True |
||||
# in general detector, set False in JDE YOLOv3. |
||||
pred_bboxes = scale_coords(outs['bbox'][:, 2:], |
||||
input_shape, im_shape, |
||||
scale_factor) |
||||
else: |
||||
pred_bboxes = outs['bbox'][:, 2:] |
||||
else: |
||||
logger.warning( |
||||
'Frame {} has not detected object, try to modify score threshold.'. |
||||
format(frame_id)) |
||||
empty_detections = True |
||||
|
||||
if not empty_detections: |
||||
pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape) |
||||
if len(keep_idx[0]) == 0: |
||||
logger.warning( |
||||
'Frame {} has not detected object left after clip_box.'. |
||||
format(frame_id)) |
||||
empty_detections = True |
||||
|
||||
if empty_detections: |
||||
timer.toc() |
||||
# if visualize, use original image instead |
||||
online_ids, online_tlwhs, online_scores = None, None, None |
||||
save_vis_results(data, frame_id, online_ids, online_tlwhs, |
||||
online_scores, timer.average_time, show_image, |
||||
save_dir, self.cfg.num_classes) |
||||
frame_id += 1 |
||||
# thus will not inference reid model |
||||
continue |
||||
|
||||
pred_scores = pred_scores[keep_idx[0]] |
||||
pred_cls_ids = pred_cls_ids[keep_idx[0]] |
||||
pred_tlwhs = np.concatenate( |
||||
(pred_xyxys[:, 0:2], |
||||
pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), |
||||
axis=1) |
||||
pred_dets = np.concatenate( |
||||
(pred_tlwhs, pred_scores, pred_cls_ids), axis=1) |
||||
|
||||
tracker = self.model.tracker |
||||
crops = get_crops( |
||||
pred_xyxys, |
||||
ori_image, |
||||
w=tracker.input_size[0], |
||||
h=tracker.input_size[1]) |
||||
crops = paddle.to_tensor(crops) |
||||
|
||||
data.update({'crops': crops}) |
||||
pred_embs = self.model(data).numpy() |
||||
|
||||
tracker.predict() |
||||
online_targets = tracker.update(pred_dets, pred_embs) |
||||
|
||||
online_tlwhs, online_scores, online_ids = [], [], [] |
||||
for t in online_targets: |
||||
if not t.is_confirmed() or t.time_since_update > 1: |
||||
continue |
||||
tlwh = t.to_tlwh() |
||||
tscore = t.score |
||||
tid = t.track_id |
||||
if tscore < draw_threshold: continue |
||||
if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue |
||||
if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ |
||||
3] > tracker.vertical_ratio: |
||||
continue |
||||
online_tlwhs.append(tlwh) |
||||
online_scores.append(tscore) |
||||
online_ids.append(tid) |
||||
timer.toc() |
||||
|
||||
# save results |
||||
results[0].append( |
||||
(frame_id + 1, online_tlwhs, online_scores, online_ids)) |
||||
save_vis_results(data, frame_id, online_ids, online_tlwhs, |
||||
online_scores, timer.average_time, show_image, |
||||
save_dir, self.cfg.num_classes) |
||||
frame_id += 1 |
||||
|
||||
return results, frame_id, timer.average_time, timer.calls |
||||
|
||||
def mot_evaluate(self, |
||||
data_root, |
||||
seqs, |
||||
output_dir, |
||||
data_type='mot', |
||||
model_type='JDE', |
||||
save_images=False, |
||||
save_videos=False, |
||||
show_image=False, |
||||
scaled=False, |
||||
det_results_dir=''): |
||||
if not os.path.exists(output_dir): os.makedirs(output_dir) |
||||
result_root = os.path.join(output_dir, 'mot_results') |
||||
if not os.path.exists(result_root): os.makedirs(result_root) |
||||
assert data_type in ['mot', 'mcmot', 'kitti'], \ |
||||
"data_type should be 'mot', 'mcmot' or 'kitti'" |
||||
assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \ |
||||
"model_type should be 'JDE', 'DeepSORT' or 'FairMOT'" |
||||
|
||||
# run tracking |
||||
n_frame = 0 |
||||
timer_avgs, timer_calls = [], [] |
||||
for seq in seqs: |
||||
infer_dir = os.path.join(data_root, seq) |
||||
if not os.path.exists(infer_dir) or not os.path.isdir(infer_dir): |
||||
logger.warning("Seq {} error, {} has no images.".format( |
||||
seq, infer_dir)) |
||||
continue |
||||
if os.path.exists(os.path.join(infer_dir, 'img1')): |
||||
infer_dir = os.path.join(infer_dir, 'img1') |
||||
|
||||
frame_rate = 30 |
||||
seqinfo = os.path.join(data_root, seq, 'seqinfo.ini') |
||||
if os.path.exists(seqinfo): |
||||
meta_info = open(seqinfo).read() |
||||
frame_rate = int(meta_info[meta_info.find('frameRate') + 10: |
||||
meta_info.find('\nseqLength')]) |
||||
|
||||
save_dir = os.path.join( |
||||
output_dir, 'mot_outputs', |
||||
seq) if save_images or save_videos else None |
||||
logger.info('start seq: {}'.format(seq)) |
||||
|
||||
self.dataset.set_images(self.get_infer_images(infer_dir)) |
||||
dataloader = create('EvalMOTReader')(self.dataset, 0) |
||||
|
||||
result_filename = os.path.join(result_root, '{}.txt'.format(seq)) |
||||
|
||||
with paddle.no_grad(): |
||||
if model_type in ['JDE', 'FairMOT']: |
||||
results, nf, ta, tc = self._eval_seq_jde( |
||||
dataloader, |
||||
save_dir=save_dir, |
||||
show_image=show_image, |
||||
frame_rate=frame_rate) |
||||
elif model_type in ['DeepSORT']: |
||||
results, nf, ta, tc = self._eval_seq_sde( |
||||
dataloader, |
||||
save_dir=save_dir, |
||||
show_image=show_image, |
||||
frame_rate=frame_rate, |
||||
seq_name=seq, |
||||
scaled=scaled, |
||||
det_file=os.path.join(det_results_dir, |
||||
'{}.txt'.format(seq))) |
||||
else: |
||||
raise ValueError(model_type) |
||||
|
||||
write_mot_results(result_filename, results, data_type, |
||||
self.cfg.num_classes) |
||||
n_frame += nf |
||||
timer_avgs.append(ta) |
||||
timer_calls.append(tc) |
||||
|
||||
if save_videos: |
||||
output_video_path = os.path.join(save_dir, '..', |
||||
'{}_vis.mp4'.format(seq)) |
||||
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format( |
||||
save_dir, output_video_path) |
||||
os.system(cmd_str) |
||||
logger.info('Save video in {}.'.format(output_video_path)) |
||||
|
||||
logger.info('Evaluate seq: {}'.format(seq)) |
||||
# update metrics |
||||
for metric in self._metrics: |
||||
metric.update(data_root, seq, data_type, result_root, |
||||
result_filename) |
||||
|
||||
timer_avgs = np.asarray(timer_avgs) |
||||
timer_calls = np.asarray(timer_calls) |
||||
all_time = np.dot(timer_avgs, timer_calls) |
||||
avg_time = all_time / np.sum(timer_calls) |
||||
logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format( |
||||
all_time, 1.0 / avg_time)) |
||||
|
||||
# accumulate metric to log out |
||||
for metric in self._metrics: |
||||
metric.accumulate() |
||||
metric.log() |
||||
# reset metric states for metric may performed multiple times |
||||
self._reset_metrics() |
||||
|
||||
def get_infer_images(self, infer_dir): |
||||
assert infer_dir is None or os.path.isdir(infer_dir), \ |
||||
"{} is not a directory".format(infer_dir) |
||||
images = set() |
||||
assert os.path.isdir(infer_dir), \ |
||||
"infer_dir {} is not a directory".format(infer_dir) |
||||
exts = ['jpg', 'jpeg', 'png', 'bmp'] |
||||
exts += [ext.upper() for ext in exts] |
||||
for ext in exts: |
||||
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext))) |
||||
images = list(images) |
||||
images.sort() |
||||
assert len(images) > 0, "no image found in {}".format(infer_dir) |
||||
logger.info("Found {} inference images in total.".format(len(images))) |
||||
return images |
||||
|
||||
def mot_predict_seq(self, |
||||
video_file, |
||||
frame_rate, |
||||
image_dir, |
||||
output_dir, |
||||
data_type='mot', |
||||
model_type='JDE', |
||||
save_images=False, |
||||
save_videos=True, |
||||
show_image=False, |
||||
scaled=False, |
||||
det_results_dir='', |
||||
draw_threshold=0.5): |
||||
assert video_file is not None or image_dir is not None, \ |
||||
"--video_file or --image_dir should be set." |
||||
assert video_file is None or os.path.isfile(video_file), \ |
||||
"{} is not a file".format(video_file) |
||||
assert image_dir is None or os.path.isdir(image_dir), \ |
||||
"{} is not a directory".format(image_dir) |
||||
|
||||
if not os.path.exists(output_dir): os.makedirs(output_dir) |
||||
result_root = os.path.join(output_dir, 'mot_results') |
||||
if not os.path.exists(result_root): os.makedirs(result_root) |
||||
assert data_type in ['mot', 'mcmot', 'kitti'], \ |
||||
"data_type should be 'mot', 'mcmot' or 'kitti'" |
||||
assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \ |
||||
"model_type should be 'JDE', 'DeepSORT' or 'FairMOT'" |
||||
|
||||
# run tracking |
||||
if video_file: |
||||
seq = video_file.split('/')[-1].split('.')[0] |
||||
self.dataset.set_video(video_file, frame_rate) |
||||
logger.info('Starting tracking video {}'.format(video_file)) |
||||
elif image_dir: |
||||
seq = image_dir.split('/')[-1].split('.')[0] |
||||
if os.path.exists(os.path.join(image_dir, 'img1')): |
||||
image_dir = os.path.join(image_dir, 'img1') |
||||
images = [ |
||||
'{}/{}'.format(image_dir, x) for x in os.listdir(image_dir) |
||||
] |
||||
images.sort() |
||||
self.dataset.set_images(images) |
||||
logger.info('Starting tracking folder {}, found {} images'.format( |
||||
image_dir, len(images))) |
||||
else: |
||||
raise ValueError('--video_file or --image_dir should be set.') |
||||
|
||||
save_dir = os.path.join(output_dir, 'mot_outputs', |
||||
seq) if save_images or save_videos else None |
||||
|
||||
dataloader = create('TestMOTReader')(self.dataset, 0) |
||||
result_filename = os.path.join(result_root, '{}.txt'.format(seq)) |
||||
if frame_rate == -1: |
||||
frame_rate = self.dataset.frame_rate |
||||
|
||||
with paddle.no_grad(): |
||||
if model_type in ['JDE', 'FairMOT']: |
||||
results, nf, ta, tc = self._eval_seq_jde( |
||||
dataloader, |
||||
save_dir=save_dir, |
||||
show_image=show_image, |
||||
frame_rate=frame_rate, |
||||
draw_threshold=draw_threshold) |
||||
elif model_type in ['DeepSORT']: |
||||
results, nf, ta, tc = self._eval_seq_sde( |
||||
dataloader, |
||||
save_dir=save_dir, |
||||
show_image=show_image, |
||||
frame_rate=frame_rate, |
||||
seq_name=seq, |
||||
scaled=scaled, |
||||
det_file=os.path.join(det_results_dir, |
||||
'{}.txt'.format(seq)), |
||||
draw_threshold=draw_threshold) |
||||
else: |
||||
raise ValueError(model_type) |
||||
|
||||
if save_videos: |
||||
output_video_path = os.path.join(save_dir, '..', |
||||
'{}_vis.mp4'.format(seq)) |
||||
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format( |
||||
save_dir, output_video_path) |
||||
os.system(cmd_str) |
||||
logger.info('Save video in {}'.format(output_video_path)) |
||||
|
||||
write_mot_results(result_filename, results, data_type, |
||||
self.cfg.num_classes) |
@ -0,0 +1,742 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import sys |
||||
import copy |
||||
import time |
||||
|
||||
import numpy as np |
||||
import typing |
||||
from PIL import Image, ImageOps |
||||
|
||||
import paddle |
||||
import paddle.distributed as dist |
||||
from paddle.distributed import fleet |
||||
from paddle import amp |
||||
from paddle.static import InputSpec |
||||
from paddlers.models.ppdet.optimizer import ModelEMA |
||||
|
||||
from paddlers.models.ppdet.core.workspace import create |
||||
from paddlers.models.ppdet.modeling.architectures.meta_arch import BaseArch |
||||
from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight |
||||
from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result |
||||
from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval |
||||
from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric |
||||
from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet |
||||
from paddlers.models.ppdet.data.source.category import get_categories |
||||
from paddlers.models.ppdet.utils import stats |
||||
from paddlers.models.ppdet.utils import profiler |
||||
|
||||
from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator |
||||
from .export_utils import _dump_infer_config, _prune_input_spec |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger('ppdet.engine') |
||||
|
||||
__all__ = ['Trainer'] |
||||
|
||||
MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT'] |
||||
|
||||
|
||||
class Trainer(object): |
||||
def __init__(self, cfg, mode='train'): |
||||
self.cfg = cfg |
||||
assert mode.lower() in ['train', 'eval', 'test'], \ |
||||
"mode should be 'train', 'eval' or 'test'" |
||||
self.mode = mode.lower() |
||||
self.optimizer = None |
||||
self.is_loaded_weights = False |
||||
|
||||
# build data loader |
||||
if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']: |
||||
self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())] |
||||
else: |
||||
self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())] |
||||
|
||||
if cfg.architecture == 'DeepSORT' and self.mode == 'train': |
||||
logger.error('DeepSORT has no need of training on mot dataset.') |
||||
sys.exit(1) |
||||
|
||||
if self.mode == 'train': |
||||
self.loader = create('{}Reader'.format(self.mode.capitalize()))( |
||||
self.dataset, cfg.worker_num) |
||||
|
||||
if cfg.architecture == 'JDE' and self.mode == 'train': |
||||
cfg['JDEEmbeddingHead'][ |
||||
'num_identities'] = self.dataset.num_identities_dict[0] |
||||
# JDE only support single class MOT now. |
||||
|
||||
if cfg.architecture == 'FairMOT' and self.mode == 'train': |
||||
cfg['FairMOTEmbeddingHead'][ |
||||
'num_identities_dict'] = self.dataset.num_identities_dict |
||||
# FairMOT support single class and multi-class MOT now. |
||||
|
||||
# build model |
||||
if 'model' not in self.cfg: |
||||
self.model = create(cfg.architecture) |
||||
else: |
||||
self.model = self.cfg.model |
||||
self.is_loaded_weights = True |
||||
|
||||
#normalize params for deploy |
||||
self.model.load_meanstd(cfg['TestReader']['sample_transforms']) |
||||
|
||||
self.use_ema = ('use_ema' in cfg and cfg['use_ema']) |
||||
if self.use_ema: |
||||
ema_decay = self.cfg.get('ema_decay', 0.9998) |
||||
cycle_epoch = self.cfg.get('cycle_epoch', -1) |
||||
self.ema = ModelEMA( |
||||
self.model, |
||||
decay=ema_decay, |
||||
use_thres_step=True, |
||||
cycle_epoch=cycle_epoch) |
||||
|
||||
# EvalDataset build with BatchSampler to evaluate in single device |
||||
# TODO: multi-device evaluate |
||||
if self.mode == 'eval': |
||||
self._eval_batch_sampler = paddle.io.BatchSampler( |
||||
self.dataset, batch_size=self.cfg.EvalReader['batch_size']) |
||||
reader_name = '{}Reader'.format(self.mode.capitalize()) |
||||
# If metric is VOC, need to be set collate_batch=False. |
||||
if cfg.metric == 'VOC': |
||||
cfg[reader_name]['collate_batch'] = False |
||||
self.loader = create(reader_name)(self.dataset, cfg.worker_num, |
||||
self._eval_batch_sampler) |
||||
# TestDataset build after user set images, skip loader creation here |
||||
|
||||
# build optimizer in train mode |
||||
if self.mode == 'train': |
||||
steps_per_epoch = len(self.loader) |
||||
self.lr = create('LearningRate')(steps_per_epoch) |
||||
self.optimizer = create('OptimizerBuilder')(self.lr, self.model) |
||||
|
||||
if self.cfg.get('unstructured_prune'): |
||||
self.pruner = create('UnstructuredPruner')(self.model, |
||||
steps_per_epoch) |
||||
|
||||
self._nranks = dist.get_world_size() |
||||
self._local_rank = dist.get_rank() |
||||
|
||||
self.status = {} |
||||
|
||||
self.start_epoch = 0 |
||||
self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch |
||||
|
||||
# initial default callbacks |
||||
self._init_callbacks() |
||||
|
||||
# initial default metrics |
||||
self._init_metrics() |
||||
self._reset_metrics() |
||||
|
||||
def _init_callbacks(self): |
||||
if self.mode == 'train': |
||||
self._callbacks = [LogPrinter(self), Checkpointer(self)] |
||||
if self.cfg.get('use_vdl', False): |
||||
self._callbacks.append(VisualDLWriter(self)) |
||||
if self.cfg.get('save_proposals', False): |
||||
self._callbacks.append(SniperProposalsGenerator(self)) |
||||
self._compose_callback = ComposeCallback(self._callbacks) |
||||
elif self.mode == 'eval': |
||||
self._callbacks = [LogPrinter(self)] |
||||
if self.cfg.metric == 'WiderFace': |
||||
self._callbacks.append(WiferFaceEval(self)) |
||||
self._compose_callback = ComposeCallback(self._callbacks) |
||||
elif self.mode == 'test' and self.cfg.get('use_vdl', False): |
||||
self._callbacks = [VisualDLWriter(self)] |
||||
self._compose_callback = ComposeCallback(self._callbacks) |
||||
else: |
||||
self._callbacks = [] |
||||
self._compose_callback = None |
||||
|
||||
def _init_metrics(self, validate=False): |
||||
if self.mode == 'test' or (self.mode == 'train' and not validate): |
||||
self._metrics = [] |
||||
return |
||||
classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False |
||||
if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO": |
||||
# TODO: bias should be unified |
||||
bias = self.cfg['bias'] if 'bias' in self.cfg else 0 |
||||
output_eval = self.cfg['output_eval'] \ |
||||
if 'output_eval' in self.cfg else None |
||||
save_prediction_only = self.cfg.get('save_prediction_only', False) |
||||
|
||||
# pass clsid2catid info to metric instance to avoid multiple loading |
||||
# annotation file |
||||
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \ |
||||
if self.mode == 'eval' else None |
||||
|
||||
# when do validation in train, annotation file should be get from |
||||
# EvalReader instead of self.dataset(which is TrainReader) |
||||
anno_file = self.dataset.get_anno() |
||||
dataset = self.dataset |
||||
if self.mode == 'train' and validate: |
||||
eval_dataset = self.cfg['EvalDataset'] |
||||
eval_dataset.check_or_download_dataset() |
||||
anno_file = eval_dataset.get_anno() |
||||
dataset = eval_dataset |
||||
|
||||
IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox' |
||||
if self.cfg.metric == "COCO": |
||||
self._metrics = [ |
||||
COCOMetric( |
||||
anno_file=anno_file, |
||||
clsid2catid=clsid2catid, |
||||
classwise=classwise, |
||||
output_eval=output_eval, |
||||
bias=bias, |
||||
IouType=IouType, |
||||
save_prediction_only=save_prediction_only) |
||||
] |
||||
elif self.cfg.metric == "SNIPERCOCO": # sniper |
||||
self._metrics = [ |
||||
SNIPERCOCOMetric( |
||||
anno_file=anno_file, |
||||
dataset=dataset, |
||||
clsid2catid=clsid2catid, |
||||
classwise=classwise, |
||||
output_eval=output_eval, |
||||
bias=bias, |
||||
IouType=IouType, |
||||
save_prediction_only=save_prediction_only) |
||||
] |
||||
elif self.cfg.metric == 'RBOX': |
||||
# TODO: bias should be unified |
||||
bias = self.cfg['bias'] if 'bias' in self.cfg else 0 |
||||
output_eval = self.cfg['output_eval'] \ |
||||
if 'output_eval' in self.cfg else None |
||||
save_prediction_only = self.cfg.get('save_prediction_only', False) |
||||
|
||||
# pass clsid2catid info to metric instance to avoid multiple loading |
||||
# annotation file |
||||
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \ |
||||
if self.mode == 'eval' else None |
||||
|
||||
# when do validation in train, annotation file should be get from |
||||
# EvalReader instead of self.dataset(which is TrainReader) |
||||
anno_file = self.dataset.get_anno() |
||||
if self.mode == 'train' and validate: |
||||
eval_dataset = self.cfg['EvalDataset'] |
||||
eval_dataset.check_or_download_dataset() |
||||
anno_file = eval_dataset.get_anno() |
||||
|
||||
self._metrics = [ |
||||
RBoxMetric( |
||||
anno_file=anno_file, |
||||
clsid2catid=clsid2catid, |
||||
classwise=classwise, |
||||
output_eval=output_eval, |
||||
bias=bias, |
||||
save_prediction_only=save_prediction_only) |
||||
] |
||||
elif self.cfg.metric == 'VOC': |
||||
self._metrics = [ |
||||
VOCMetric( |
||||
label_list=self.dataset.get_label_list(), |
||||
class_num=self.cfg.num_classes, |
||||
map_type=self.cfg.map_type, |
||||
classwise=classwise) |
||||
] |
||||
elif self.cfg.metric == 'WiderFace': |
||||
multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True |
||||
self._metrics = [ |
||||
WiderFaceMetric( |
||||
image_dir=os.path.join(self.dataset.dataset_dir, |
||||
self.dataset.image_dir), |
||||
anno_file=self.dataset.get_anno(), |
||||
multi_scale=multi_scale) |
||||
] |
||||
elif self.cfg.metric == 'KeyPointTopDownCOCOEval': |
||||
eval_dataset = self.cfg['EvalDataset'] |
||||
eval_dataset.check_or_download_dataset() |
||||
anno_file = eval_dataset.get_anno() |
||||
save_prediction_only = self.cfg.get('save_prediction_only', False) |
||||
self._metrics = [ |
||||
KeyPointTopDownCOCOEval( |
||||
anno_file, |
||||
len(eval_dataset), |
||||
self.cfg.num_joints, |
||||
self.cfg.save_dir, |
||||
save_prediction_only=save_prediction_only) |
||||
] |
||||
elif self.cfg.metric == 'KeyPointTopDownMPIIEval': |
||||
eval_dataset = self.cfg['EvalDataset'] |
||||
eval_dataset.check_or_download_dataset() |
||||
anno_file = eval_dataset.get_anno() |
||||
save_prediction_only = self.cfg.get('save_prediction_only', False) |
||||
self._metrics = [ |
||||
KeyPointTopDownMPIIEval( |
||||
anno_file, |
||||
len(eval_dataset), |
||||
self.cfg.num_joints, |
||||
self.cfg.save_dir, |
||||
save_prediction_only=save_prediction_only) |
||||
] |
||||
elif self.cfg.metric == 'MOTDet': |
||||
self._metrics = [JDEDetMetric(), ] |
||||
else: |
||||
logger.warning("Metric not support for metric type {}".format( |
||||
self.cfg.metric)) |
||||
self._metrics = [] |
||||
|
||||
def _reset_metrics(self): |
||||
for metric in self._metrics: |
||||
metric.reset() |
||||
|
||||
def register_callbacks(self, callbacks): |
||||
callbacks = [c for c in list(callbacks) if c is not None] |
||||
for c in callbacks: |
||||
assert isinstance(c, Callback), \ |
||||
"metrics shoule be instances of subclass of Metric" |
||||
self._callbacks.extend(callbacks) |
||||
self._compose_callback = ComposeCallback(self._callbacks) |
||||
|
||||
def register_metrics(self, metrics): |
||||
metrics = [m for m in list(metrics) if m is not None] |
||||
for m in metrics: |
||||
assert isinstance(m, Metric), \ |
||||
"metrics shoule be instances of subclass of Metric" |
||||
self._metrics.extend(metrics) |
||||
|
||||
def load_weights(self, weights): |
||||
if self.is_loaded_weights: |
||||
return |
||||
self.start_epoch = 0 |
||||
load_pretrain_weight(self.model, weights) |
||||
logger.debug("Load weights {} to start training".format(weights)) |
||||
|
||||
def load_weights_sde(self, det_weights, reid_weights): |
||||
if self.model.detector: |
||||
load_weight(self.model.detector, det_weights) |
||||
load_weight(self.model.reid, reid_weights) |
||||
else: |
||||
load_weight(self.model.reid, reid_weights) |
||||
|
||||
def resume_weights(self, weights): |
||||
# support Distill resume weights |
||||
if hasattr(self.model, 'student_model'): |
||||
self.start_epoch = load_weight(self.model.student_model, weights, |
||||
self.optimizer) |
||||
else: |
||||
self.start_epoch = load_weight(self.model, weights, self.optimizer) |
||||
logger.debug("Resume weights of epoch {}".format(self.start_epoch)) |
||||
|
||||
def train(self, validate=False): |
||||
assert self.mode == 'train', "Model not in 'train' mode" |
||||
Init_mark = False |
||||
|
||||
sync_bn = ( |
||||
getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and |
||||
self.cfg.use_gpu and self._nranks > 1) |
||||
if sync_bn: |
||||
self.model = BaseArch.convert_sync_batchnorm(self.model) |
||||
|
||||
model = self.model |
||||
if self.cfg.get('fleet', False): |
||||
model = fleet.distributed_model(model) |
||||
self.optimizer = fleet.distributed_optimizer(self.optimizer) |
||||
elif self._nranks > 1: |
||||
find_unused_parameters = self.cfg[ |
||||
'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False |
||||
model = paddle.DataParallel( |
||||
self.model, find_unused_parameters=find_unused_parameters) |
||||
|
||||
# initial fp16 |
||||
if self.cfg.get('fp16', False): |
||||
scaler = amp.GradScaler( |
||||
enable=self.cfg.use_gpu, init_loss_scaling=1024) |
||||
|
||||
self.status.update({ |
||||
'epoch_id': self.start_epoch, |
||||
'step_id': 0, |
||||
'steps_per_epoch': len(self.loader) |
||||
}) |
||||
|
||||
self.status['batch_time'] = stats.SmoothedValue( |
||||
self.cfg.log_iter, fmt='{avg:.4f}') |
||||
self.status['data_time'] = stats.SmoothedValue( |
||||
self.cfg.log_iter, fmt='{avg:.4f}') |
||||
self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter) |
||||
|
||||
if self.cfg.get('print_flops', False): |
||||
flops_loader = create('{}Reader'.format(self.mode.capitalize()))( |
||||
self.dataset, self.cfg.worker_num) |
||||
self._flops(flops_loader) |
||||
profiler_options = self.cfg.get('profiler_options', None) |
||||
|
||||
self._compose_callback.on_train_begin(self.status) |
||||
|
||||
for epoch_id in range(self.start_epoch, self.cfg.epoch): |
||||
self.status['mode'] = 'train' |
||||
self.status['epoch_id'] = epoch_id |
||||
self._compose_callback.on_epoch_begin(self.status) |
||||
self.loader.dataset.set_epoch(epoch_id) |
||||
model.train() |
||||
iter_tic = time.time() |
||||
for step_id, data in enumerate(self.loader): |
||||
self.status['data_time'].update(time.time() - iter_tic) |
||||
self.status['step_id'] = step_id |
||||
profiler.add_profiler_step(profiler_options) |
||||
self._compose_callback.on_step_begin(self.status) |
||||
data['epoch_id'] = epoch_id |
||||
|
||||
if self.cfg.get('fp16', False): |
||||
with amp.auto_cast(enable=self.cfg.use_gpu): |
||||
# model forward |
||||
outputs = model(data) |
||||
loss = outputs['loss'] |
||||
|
||||
# model backward |
||||
scaled_loss = scaler.scale(loss) |
||||
scaled_loss.backward() |
||||
# in dygraph mode, optimizer.minimize is equal to optimizer.step |
||||
scaler.minimize(self.optimizer, scaled_loss) |
||||
else: |
||||
# model forward |
||||
outputs = model(data) |
||||
loss = outputs['loss'] |
||||
# model backward |
||||
loss.backward() |
||||
self.optimizer.step() |
||||
curr_lr = self.optimizer.get_lr() |
||||
self.lr.step() |
||||
if self.cfg.get('unstructured_prune'): |
||||
self.pruner.step() |
||||
self.optimizer.clear_grad() |
||||
self.status['learning_rate'] = curr_lr |
||||
|
||||
if self._nranks < 2 or self._local_rank == 0: |
||||
self.status['training_staus'].update(outputs) |
||||
|
||||
self.status['batch_time'].update(time.time() - iter_tic) |
||||
self._compose_callback.on_step_end(self.status) |
||||
if self.use_ema: |
||||
self.ema.update(self.model) |
||||
iter_tic = time.time() |
||||
|
||||
# apply ema weight on model |
||||
if self.use_ema: |
||||
weight = copy.deepcopy(self.model.state_dict()) |
||||
self.model.set_dict(self.ema.apply()) |
||||
if self.cfg.get('unstructured_prune'): |
||||
self.pruner.update_params() |
||||
|
||||
self._compose_callback.on_epoch_end(self.status) |
||||
|
||||
if validate and (self._nranks < 2 or self._local_rank == 0) \ |
||||
and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \ |
||||
or epoch_id == self.end_epoch - 1): |
||||
if not hasattr(self, '_eval_loader'): |
||||
# build evaluation dataset and loader |
||||
self._eval_dataset = self.cfg.EvalDataset |
||||
self._eval_batch_sampler = \ |
||||
paddle.io.BatchSampler( |
||||
self._eval_dataset, |
||||
batch_size=self.cfg.EvalReader['batch_size']) |
||||
# If metric is VOC, need to be set collate_batch=False. |
||||
if self.cfg.metric == 'VOC': |
||||
self.cfg['EvalReader']['collate_batch'] = False |
||||
self._eval_loader = create('EvalReader')( |
||||
self._eval_dataset, |
||||
self.cfg.worker_num, |
||||
batch_sampler=self._eval_batch_sampler) |
||||
# if validation in training is enabled, metrics should be re-init |
||||
# Init_mark makes sure this code will only execute once |
||||
if validate and Init_mark == False: |
||||
Init_mark = True |
||||
self._init_metrics(validate=validate) |
||||
self._reset_metrics() |
||||
with paddle.no_grad(): |
||||
self.status['save_best_model'] = True |
||||
self._eval_with_loader(self._eval_loader) |
||||
|
||||
# restore origin weight on model |
||||
if self.use_ema: |
||||
self.model.set_dict(weight) |
||||
|
||||
self._compose_callback.on_train_end(self.status) |
||||
|
||||
def _eval_with_loader(self, loader): |
||||
sample_num = 0 |
||||
tic = time.time() |
||||
self._compose_callback.on_epoch_begin(self.status) |
||||
self.status['mode'] = 'eval' |
||||
self.model.eval() |
||||
if self.cfg.get('print_flops', False): |
||||
flops_loader = create('{}Reader'.format(self.mode.capitalize()))( |
||||
self.dataset, self.cfg.worker_num, self._eval_batch_sampler) |
||||
self._flops(flops_loader) |
||||
for step_id, data in enumerate(loader): |
||||
self.status['step_id'] = step_id |
||||
self._compose_callback.on_step_begin(self.status) |
||||
# forward |
||||
outs = self.model(data) |
||||
|
||||
# update metrics |
||||
for metric in self._metrics: |
||||
metric.update(data, outs) |
||||
|
||||
# multi-scale inputs: all inputs have same im_id |
||||
if isinstance(data, typing.Sequence): |
||||
sample_num += data[0]['im_id'].numpy().shape[0] |
||||
else: |
||||
sample_num += data['im_id'].numpy().shape[0] |
||||
self._compose_callback.on_step_end(self.status) |
||||
|
||||
self.status['sample_num'] = sample_num |
||||
self.status['cost_time'] = time.time() - tic |
||||
|
||||
# accumulate metric to log out |
||||
for metric in self._metrics: |
||||
metric.accumulate() |
||||
metric.log() |
||||
self._compose_callback.on_epoch_end(self.status) |
||||
# reset metric states for metric may performed multiple times |
||||
self._reset_metrics() |
||||
|
||||
def evaluate(self): |
||||
with paddle.no_grad(): |
||||
self._eval_with_loader(self.loader) |
||||
|
||||
def predict(self, |
||||
images, |
||||
draw_threshold=0.5, |
||||
output_dir='output', |
||||
save_txt=False): |
||||
self.dataset.set_images(images) |
||||
loader = create('TestReader')(self.dataset, 0) |
||||
|
||||
imid2path = self.dataset.get_imid2path() |
||||
|
||||
anno_file = self.dataset.get_anno() |
||||
clsid2catid, catid2name = get_categories( |
||||
self.cfg.metric, anno_file=anno_file) |
||||
|
||||
# Run Infer |
||||
self.status['mode'] = 'test' |
||||
self.model.eval() |
||||
if self.cfg.get('print_flops', False): |
||||
flops_loader = create('TestReader')(self.dataset, 0) |
||||
self._flops(flops_loader) |
||||
results = [] |
||||
for step_id, data in enumerate(loader): |
||||
self.status['step_id'] = step_id |
||||
# forward |
||||
outs = self.model(data) |
||||
|
||||
for key in ['im_shape', 'scale_factor', 'im_id']: |
||||
if isinstance(data, typing.Sequence): |
||||
outs[key] = data[0][key] |
||||
else: |
||||
outs[key] = data[key] |
||||
for key, value in outs.items(): |
||||
if hasattr(value, 'numpy'): |
||||
outs[key] = value.numpy() |
||||
results.append(outs) |
||||
# sniper |
||||
if type(self.dataset) == SniperCOCODataSet: |
||||
results = self.dataset.anno_cropper.aggregate_chips_detections( |
||||
results) |
||||
|
||||
for outs in results: |
||||
batch_res = get_infer_results(outs, clsid2catid) |
||||
bbox_num = outs['bbox_num'] |
||||
|
||||
start = 0 |
||||
for i, im_id in enumerate(outs['im_id']): |
||||
image_path = imid2path[int(im_id)] |
||||
image = Image.open(image_path).convert('RGB') |
||||
image = ImageOps.exif_transpose(image) |
||||
self.status['original_image'] = np.array(image.copy()) |
||||
|
||||
end = start + bbox_num[i] |
||||
bbox_res = batch_res['bbox'][start:end] \ |
||||
if 'bbox' in batch_res else None |
||||
mask_res = batch_res['mask'][start:end] \ |
||||
if 'mask' in batch_res else None |
||||
segm_res = batch_res['segm'][start:end] \ |
||||
if 'segm' in batch_res else None |
||||
keypoint_res = batch_res['keypoint'][start:end] \ |
||||
if 'keypoint' in batch_res else None |
||||
image = visualize_results( |
||||
image, bbox_res, mask_res, segm_res, keypoint_res, |
||||
int(im_id), catid2name, draw_threshold) |
||||
self.status['result_image'] = np.array(image.copy()) |
||||
if self._compose_callback: |
||||
self._compose_callback.on_step_end(self.status) |
||||
# save image with detection |
||||
save_name = self._get_save_image_name(output_dir, image_path) |
||||
logger.info("Detection bbox results save in {}".format( |
||||
save_name)) |
||||
image.save(save_name, quality=95) |
||||
if save_txt: |
||||
save_path = os.path.splitext(save_name)[0] + '.txt' |
||||
results = {} |
||||
results["im_id"] = im_id |
||||
if bbox_res: |
||||
results["bbox_res"] = bbox_res |
||||
if keypoint_res: |
||||
results["keypoint_res"] = keypoint_res |
||||
save_result(save_path, results, catid2name, draw_threshold) |
||||
start = end |
||||
|
||||
def _get_save_image_name(self, output_dir, image_path): |
||||
""" |
||||
Get save image name from source image path. |
||||
""" |
||||
if not os.path.exists(output_dir): |
||||
os.makedirs(output_dir) |
||||
image_name = os.path.split(image_path)[-1] |
||||
name, ext = os.path.splitext(image_name) |
||||
return os.path.join(output_dir, "{}".format(name)) + ext |
||||
|
||||
def _get_infer_cfg_and_input_spec(self, save_dir, prune_input=True): |
||||
image_shape = None |
||||
im_shape = [None, 2] |
||||
scale_factor = [None, 2] |
||||
if self.cfg.architecture in MOT_ARCH: |
||||
test_reader_name = 'TestMOTReader' |
||||
else: |
||||
test_reader_name = 'TestReader' |
||||
if 'inputs_def' in self.cfg[test_reader_name]: |
||||
inputs_def = self.cfg[test_reader_name]['inputs_def'] |
||||
image_shape = inputs_def.get('image_shape', None) |
||||
# set image_shape=[None, 3, -1, -1] as default |
||||
if image_shape is None: |
||||
image_shape = [None, 3, -1, -1] |
||||
|
||||
if len(image_shape) == 3: |
||||
image_shape = [None] + image_shape |
||||
else: |
||||
im_shape = [image_shape[0], 2] |
||||
scale_factor = [image_shape[0], 2] |
||||
|
||||
if hasattr(self.model, 'deploy'): |
||||
self.model.deploy = True |
||||
if hasattr(self.model, 'fuse_norm'): |
||||
self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize', |
||||
False) |
||||
|
||||
# Save infer cfg |
||||
_dump_infer_config(self.cfg, |
||||
os.path.join(save_dir, 'infer_cfg.yml'), |
||||
image_shape, self.model) |
||||
|
||||
input_spec = [{ |
||||
"image": InputSpec( |
||||
shape=image_shape, name='image'), |
||||
"im_shape": InputSpec( |
||||
shape=im_shape, name='im_shape'), |
||||
"scale_factor": InputSpec( |
||||
shape=scale_factor, name='scale_factor') |
||||
}] |
||||
if self.cfg.architecture == 'DeepSORT': |
||||
input_spec[0].update({ |
||||
"crops": InputSpec( |
||||
shape=[None, 3, 192, 64], name='crops') |
||||
}) |
||||
if prune_input: |
||||
static_model = paddle.jit.to_static( |
||||
self.model, input_spec=input_spec) |
||||
# NOTE: dy2st do not pruned program, but jit.save will prune program |
||||
# input spec, prune input spec here and save with pruned input spec |
||||
pruned_input_spec = _prune_input_spec( |
||||
input_spec, static_model.forward.main_program, |
||||
static_model.forward.outputs) |
||||
else: |
||||
static_model = None |
||||
pruned_input_spec = input_spec |
||||
|
||||
# TODO: Hard code, delete it when support prune input_spec. |
||||
if self.cfg.architecture == 'PicoDet': |
||||
pruned_input_spec = [{ |
||||
"image": InputSpec( |
||||
shape=image_shape, name='image') |
||||
}] |
||||
|
||||
return static_model, pruned_input_spec |
||||
|
||||
def export(self, output_dir='output_inference'): |
||||
self.model.eval() |
||||
model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0] |
||||
save_dir = os.path.join(output_dir, model_name) |
||||
if not os.path.exists(save_dir): |
||||
os.makedirs(save_dir) |
||||
|
||||
static_model, pruned_input_spec = self._get_infer_cfg_and_input_spec( |
||||
save_dir) |
||||
|
||||
# dy2st and save model |
||||
if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT': |
||||
paddle.jit.save( |
||||
static_model, |
||||
os.path.join(save_dir, 'model'), |
||||
input_spec=pruned_input_spec) |
||||
else: |
||||
self.cfg.slim.save_quantized_model( |
||||
self.model, |
||||
os.path.join(save_dir, 'model'), |
||||
input_spec=pruned_input_spec) |
||||
logger.info("Export model and saved in {}".format(save_dir)) |
||||
|
||||
def post_quant(self, output_dir='output_inference'): |
||||
model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0] |
||||
save_dir = os.path.join(output_dir, model_name) |
||||
if not os.path.exists(save_dir): |
||||
os.makedirs(save_dir) |
||||
|
||||
for idx, data in enumerate(self.loader): |
||||
self.model(data) |
||||
if idx == int(self.cfg.get('quant_batch_num', 10)): |
||||
break |
||||
|
||||
# TODO: support prune input_spec |
||||
_, pruned_input_spec = self._get_infer_cfg_and_input_spec( |
||||
save_dir, prune_input=False) |
||||
|
||||
self.cfg.slim.save_quantized_model( |
||||
self.model, |
||||
os.path.join(save_dir, 'model'), |
||||
input_spec=pruned_input_spec) |
||||
logger.info("Export Post-Quant model and saved in {}".format(save_dir)) |
||||
|
||||
def _flops(self, loader): |
||||
self.model.eval() |
||||
try: |
||||
import paddleslim |
||||
except Exception as e: |
||||
logger.warning( |
||||
'Unable to calculate flops, please install paddleslim, for example: `pip install paddleslim`' |
||||
) |
||||
return |
||||
|
||||
from paddleslim.analysis import dygraph_flops as flops |
||||
input_data = None |
||||
for data in loader: |
||||
input_data = data |
||||
break |
||||
|
||||
input_spec = [{ |
||||
"image": input_data['image'][0].unsqueeze(0), |
||||
"im_shape": input_data['im_shape'][0].unsqueeze(0), |
||||
"scale_factor": input_data['scale_factor'][0].unsqueeze(0) |
||||
}] |
||||
flops = flops(self.model, input_spec) / (1000**3) |
||||
logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format( |
||||
flops, input_data['image'][0].unsqueeze(0).shape)) |
@ -0,0 +1,29 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import metrics |
||||
from . import keypoint_metrics |
||||
|
||||
from .metrics import * |
||||
from .keypoint_metrics import * |
||||
|
||||
__all__ = metrics.__all__ + keypoint_metrics.__all__ |
||||
|
||||
from . import mot_metrics |
||||
from .mot_metrics import * |
||||
__all__ = metrics.__all__ + mot_metrics.__all__ |
||||
|
||||
from . import mcmot_metrics |
||||
from .mcmot_metrics import * |
||||
__all__ = metrics.__all__ + mcmot_metrics.__all__ |
@ -0,0 +1,184 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import sys |
||||
import numpy as np |
||||
import itertools |
||||
|
||||
from paddlers.models.ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res |
||||
from paddlers.models.ppdet.metrics.map_utils import draw_pr_curve |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
def get_infer_results(outs, catid, bias=0): |
||||
""" |
||||
Get result at the stage of inference. |
||||
The output format is dictionary containing bbox or mask result. |
||||
|
||||
For example, bbox result is a list and each element contains |
||||
image_id, category_id, bbox and score. |
||||
""" |
||||
if outs is None or len(outs) == 0: |
||||
raise ValueError( |
||||
'The number of valid detection result if zero. Please use reasonable model and check input data.' |
||||
) |
||||
|
||||
im_id = outs['im_id'] |
||||
|
||||
infer_res = {} |
||||
if 'bbox' in outs: |
||||
if len(outs['bbox']) > 0 and len(outs['bbox'][0]) > 6: |
||||
infer_res['bbox'] = get_det_poly_res( |
||||
outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias) |
||||
else: |
||||
infer_res['bbox'] = get_det_res( |
||||
outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias) |
||||
|
||||
if 'mask' in outs: |
||||
# mask post process |
||||
infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'], |
||||
outs['bbox_num'], im_id, catid) |
||||
|
||||
if 'segm' in outs: |
||||
infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid) |
||||
|
||||
if 'keypoint' in outs: |
||||
infer_res['keypoint'] = get_keypoint_res(outs, im_id) |
||||
outs['bbox_num'] = [len(infer_res['keypoint'])] |
||||
|
||||
return infer_res |
||||
|
||||
|
||||
def cocoapi_eval(jsonfile, |
||||
style, |
||||
coco_gt=None, |
||||
anno_file=None, |
||||
max_dets=(100, 300, 1000), |
||||
classwise=False, |
||||
sigmas=None, |
||||
use_area=True): |
||||
""" |
||||
Args: |
||||
jsonfile (str): Evaluation json file, eg: bbox.json, mask.json. |
||||
style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`. |
||||
coco_gt (str): Whether to load COCOAPI through anno_file, |
||||
eg: coco_gt = COCO(anno_file) |
||||
anno_file (str): COCO annotations file. |
||||
max_dets (tuple): COCO evaluation maxDets. |
||||
classwise (bool): Whether per-category AP and draw P-R Curve or not. |
||||
sigmas (nparray): keypoint labelling sigmas. |
||||
use_area (bool): If gt annotations (eg. CrowdPose, AIC) |
||||
do not have 'area', please set use_area=False. |
||||
""" |
||||
assert coco_gt != None or anno_file != None |
||||
if style == 'keypoints_crowd': |
||||
#please install xtcocotools==1.6 |
||||
from xtcocotools.coco import COCO |
||||
from xtcocotools.cocoeval import COCOeval |
||||
else: |
||||
from pycocotools.coco import COCO |
||||
from pycocotools.cocoeval import COCOeval |
||||
|
||||
if coco_gt == None: |
||||
coco_gt = COCO(anno_file) |
||||
logger.info("Start evaluate...") |
||||
coco_dt = coco_gt.loadRes(jsonfile) |
||||
if style == 'proposal': |
||||
coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') |
||||
coco_eval.params.useCats = 0 |
||||
coco_eval.params.maxDets = list(max_dets) |
||||
elif style == 'keypoints_crowd': |
||||
coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area) |
||||
else: |
||||
coco_eval = COCOeval(coco_gt, coco_dt, style) |
||||
coco_eval.evaluate() |
||||
coco_eval.accumulate() |
||||
coco_eval.summarize() |
||||
if classwise: |
||||
# Compute per-category AP and PR curve |
||||
try: |
||||
from terminaltables import AsciiTable |
||||
except Exception as e: |
||||
logger.error( |
||||
'terminaltables not found, plaese install terminaltables. ' |
||||
'for example: `pip install terminaltables`.') |
||||
raise e |
||||
precisions = coco_eval.eval['precision'] |
||||
cat_ids = coco_gt.getCatIds() |
||||
# precision: (iou, recall, cls, area range, max dets) |
||||
assert len(cat_ids) == precisions.shape[2] |
||||
results_per_category = [] |
||||
for idx, catId in enumerate(cat_ids): |
||||
# area range index 0: all area ranges |
||||
# max dets index -1: typically 100 per image |
||||
nm = coco_gt.loadCats(catId)[0] |
||||
precision = precisions[:, :, idx, 0, -1] |
||||
precision = precision[precision > -1] |
||||
if precision.size: |
||||
ap = np.mean(precision) |
||||
else: |
||||
ap = float('nan') |
||||
results_per_category.append( |
||||
(str(nm["name"]), '{:0.3f}'.format(float(ap)))) |
||||
pr_array = precisions[0, :, idx, 0, 2] |
||||
recall_array = np.arange(0.0, 1.01, 0.01) |
||||
draw_pr_curve( |
||||
pr_array, |
||||
recall_array, |
||||
out_dir=style + '_pr_curve', |
||||
file_name='{}_precision_recall_curve.jpg'.format(nm["name"])) |
||||
|
||||
num_columns = min(6, len(results_per_category) * 2) |
||||
results_flatten = list(itertools.chain(*results_per_category)) |
||||
headers = ['category', 'AP'] * (num_columns // 2) |
||||
results_2d = itertools.zip_longest( |
||||
*[results_flatten[i::num_columns] for i in range(num_columns)]) |
||||
table_data = [headers] |
||||
table_data += [result for result in results_2d] |
||||
table = AsciiTable(table_data) |
||||
logger.info('Per-category of {} AP: \n{}'.format(style, table.table)) |
||||
logger.info("per-category PR curve has output to {} folder.".format( |
||||
style + '_pr_curve')) |
||||
# flush coco evaluation result |
||||
sys.stdout.flush() |
||||
return coco_eval.stats |
||||
|
||||
|
||||
def json_eval_results(metric, json_directory, dataset): |
||||
""" |
||||
cocoapi eval with already exists proposal.json, bbox.json or mask.json |
||||
""" |
||||
assert metric == 'COCO' |
||||
anno_file = dataset.get_anno() |
||||
json_file_list = ['proposal.json', 'bbox.json', 'mask.json'] |
||||
if json_directory: |
||||
assert os.path.exists( |
||||
json_directory), "The json directory:{} does not exist".format( |
||||
json_directory) |
||||
for k, v in enumerate(json_file_list): |
||||
json_file_list[k] = os.path.join(str(json_directory), v) |
||||
|
||||
coco_eval_style = ['proposal', 'bbox', 'segm'] |
||||
for i, v_json in enumerate(json_file_list): |
||||
if os.path.exists(v_json): |
||||
cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file) |
||||
else: |
||||
logger.info("{} not exists!".format(v_json)) |
@ -0,0 +1,149 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
import six |
||||
import numpy as np |
||||
|
||||
|
||||
def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0): |
||||
det_res = [] |
||||
k = 0 |
||||
for i in range(len(bbox_nums)): |
||||
cur_image_id = int(image_id[i][0]) |
||||
det_nums = bbox_nums[i] |
||||
for j in range(det_nums): |
||||
dt = bboxes[k] |
||||
k = k + 1 |
||||
num_id, score, xmin, ymin, xmax, ymax = dt.tolist() |
||||
if int(num_id) < 0: |
||||
continue |
||||
category_id = label_to_cat_id_map[int(num_id)] |
||||
w = xmax - xmin + bias |
||||
h = ymax - ymin + bias |
||||
bbox = [xmin, ymin, w, h] |
||||
dt_res = { |
||||
'image_id': cur_image_id, |
||||
'category_id': category_id, |
||||
'bbox': bbox, |
||||
'score': score |
||||
} |
||||
det_res.append(dt_res) |
||||
return det_res |
||||
|
||||
|
||||
def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0): |
||||
det_res = [] |
||||
k = 0 |
||||
for i in range(len(bbox_nums)): |
||||
cur_image_id = int(image_id[i][0]) |
||||
det_nums = bbox_nums[i] |
||||
for j in range(det_nums): |
||||
dt = bboxes[k] |
||||
k = k + 1 |
||||
num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist() |
||||
if int(num_id) < 0: |
||||
continue |
||||
category_id = label_to_cat_id_map[int(num_id)] |
||||
rbox = [x1, y1, x2, y2, x3, y3, x4, y4] |
||||
dt_res = { |
||||
'image_id': cur_image_id, |
||||
'category_id': category_id, |
||||
'bbox': rbox, |
||||
'score': score |
||||
} |
||||
det_res.append(dt_res) |
||||
return det_res |
||||
|
||||
|
||||
def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map): |
||||
import pycocotools.mask as mask_util |
||||
seg_res = [] |
||||
k = 0 |
||||
for i in range(len(mask_nums)): |
||||
cur_image_id = int(image_id[i][0]) |
||||
det_nums = mask_nums[i] |
||||
for j in range(det_nums): |
||||
mask = masks[k].astype(np.uint8) |
||||
score = float(bboxes[k][1]) |
||||
label = int(bboxes[k][0]) |
||||
k = k + 1 |
||||
if label == -1: |
||||
continue |
||||
cat_id = label_to_cat_id_map[label] |
||||
rle = mask_util.encode( |
||||
np.array( |
||||
mask[:, :, None], order="F", dtype="uint8"))[0] |
||||
if six.PY3: |
||||
if 'counts' in rle: |
||||
rle['counts'] = rle['counts'].decode("utf8") |
||||
sg_res = { |
||||
'image_id': cur_image_id, |
||||
'category_id': cat_id, |
||||
'segmentation': rle, |
||||
'score': score |
||||
} |
||||
seg_res.append(sg_res) |
||||
return seg_res |
||||
|
||||
|
||||
def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map): |
||||
import pycocotools.mask as mask_util |
||||
segm_res = [] |
||||
# for each batch |
||||
segms = results['segm'].astype(np.uint8) |
||||
clsid_labels = results['cate_label'] |
||||
clsid_scores = results['cate_score'] |
||||
lengths = segms.shape[0] |
||||
im_id = int(image_id[0][0]) |
||||
if lengths == 0 or segms is None: |
||||
return None |
||||
# for each sample |
||||
for i in range(lengths - 1): |
||||
clsid = int(clsid_labels[i]) |
||||
catid = num_id_to_cat_id_map[clsid] |
||||
score = float(clsid_scores[i]) |
||||
mask = segms[i] |
||||
segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] |
||||
segm['counts'] = segm['counts'].decode('utf8') |
||||
coco_res = { |
||||
'image_id': im_id, |
||||
'category_id': catid, |
||||
'segmentation': segm, |
||||
'score': score |
||||
} |
||||
segm_res.append(coco_res) |
||||
return segm_res |
||||
|
||||
|
||||
def get_keypoint_res(results, im_id): |
||||
anns = [] |
||||
preds = results['keypoint'] |
||||
for idx in range(im_id.shape[0]): |
||||
image_id = im_id[idx].item() |
||||
kpts, scores = preds[idx] |
||||
for kpt, score in zip(kpts, scores): |
||||
kpt = kpt.flatten() |
||||
ann = { |
||||
'image_id': image_id, |
||||
'category_id': 1, # XXX hard code |
||||
'keypoints': kpt.tolist(), |
||||
'score': float(score) |
||||
} |
||||
x = kpt[0::3] |
||||
y = kpt[1::3] |
||||
x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min( |
||||
y).item(), np.max(y).item() |
||||
ann['area'] = (x1 - x0) * (y1 - y0) |
||||
ann['bbox'] = [x0, y0, x1 - x0, y1 - y0] |
||||
anns.append(ann) |
||||
return anns |
@ -0,0 +1,401 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import json |
||||
from collections import defaultdict, OrderedDict |
||||
import numpy as np |
||||
from pycocotools.coco import COCO |
||||
from pycocotools.cocoeval import COCOeval |
||||
from ..modeling.keypoint_utils import oks_nms |
||||
from scipy.io import loadmat, savemat |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval'] |
||||
|
||||
|
||||
class KeyPointTopDownCOCOEval(object): |
||||
"""refer to |
||||
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch |
||||
Copyright (c) Microsoft, under the MIT License. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
anno_file, |
||||
num_samples, |
||||
num_joints, |
||||
output_eval, |
||||
iou_type='keypoints', |
||||
in_vis_thre=0.2, |
||||
oks_thre=0.9, |
||||
save_prediction_only=False): |
||||
super(KeyPointTopDownCOCOEval, self).__init__() |
||||
self.coco = COCO(anno_file) |
||||
self.num_samples = num_samples |
||||
self.num_joints = num_joints |
||||
self.iou_type = iou_type |
||||
self.in_vis_thre = in_vis_thre |
||||
self.oks_thre = oks_thre |
||||
self.output_eval = output_eval |
||||
self.res_file = os.path.join(output_eval, "keypoints_results.json") |
||||
self.save_prediction_only = save_prediction_only |
||||
self.reset() |
||||
|
||||
def reset(self): |
||||
self.results = { |
||||
'all_preds': np.zeros( |
||||
(self.num_samples, self.num_joints, 3), dtype=np.float32), |
||||
'all_boxes': np.zeros((self.num_samples, 6)), |
||||
'image_path': [] |
||||
} |
||||
self.eval_results = {} |
||||
self.idx = 0 |
||||
|
||||
def update(self, inputs, outputs): |
||||
kpts, _ = outputs['keypoint'][0] |
||||
|
||||
num_images = inputs['image'].shape[0] |
||||
self.results['all_preds'][self.idx:self.idx + num_images, :, 0: |
||||
3] = kpts[:, :, 0:3] |
||||
self.results['all_boxes'][self.idx:self.idx + num_images, 0: |
||||
2] = inputs['center'].numpy()[:, 0:2] |
||||
self.results['all_boxes'][self.idx:self.idx + num_images, 2: |
||||
4] = inputs['scale'].numpy()[:, 0:2] |
||||
self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod( |
||||
inputs['scale'].numpy() * 200, 1) |
||||
self.results['all_boxes'][self.idx:self.idx + num_images, |
||||
5] = np.squeeze(inputs['score'].numpy()) |
||||
self.results['image_path'].extend(inputs['im_id'].numpy()) |
||||
|
||||
self.idx += num_images |
||||
|
||||
def _write_coco_keypoint_results(self, keypoints): |
||||
data_pack = [{ |
||||
'cat_id': 1, |
||||
'cls': 'person', |
||||
'ann_type': 'keypoints', |
||||
'keypoints': keypoints |
||||
}] |
||||
results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) |
||||
if not os.path.exists(self.output_eval): |
||||
os.makedirs(self.output_eval) |
||||
with open(self.res_file, 'w') as f: |
||||
json.dump(results, f, sort_keys=True, indent=4) |
||||
logger.info(f'The keypoint result is saved to {self.res_file}.') |
||||
try: |
||||
json.load(open(self.res_file)) |
||||
except Exception: |
||||
content = [] |
||||
with open(self.res_file, 'r') as f: |
||||
for line in f: |
||||
content.append(line) |
||||
content[-1] = ']' |
||||
with open(self.res_file, 'w') as f: |
||||
for c in content: |
||||
f.write(c) |
||||
|
||||
def _coco_keypoint_results_one_category_kernel(self, data_pack): |
||||
cat_id = data_pack['cat_id'] |
||||
keypoints = data_pack['keypoints'] |
||||
cat_results = [] |
||||
|
||||
for img_kpts in keypoints: |
||||
if len(img_kpts) == 0: |
||||
continue |
||||
|
||||
_key_points = np.array( |
||||
[img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) |
||||
_key_points = _key_points.reshape(_key_points.shape[0], -1) |
||||
|
||||
result = [{ |
||||
'image_id': img_kpts[k]['image'], |
||||
'category_id': cat_id, |
||||
'keypoints': _key_points[k].tolist(), |
||||
'score': img_kpts[k]['score'], |
||||
'center': list(img_kpts[k]['center']), |
||||
'scale': list(img_kpts[k]['scale']) |
||||
} for k in range(len(img_kpts))] |
||||
cat_results.extend(result) |
||||
|
||||
return cat_results |
||||
|
||||
def get_final_results(self, preds, all_boxes, img_path): |
||||
_kpts = [] |
||||
for idx, kpt in enumerate(preds): |
||||
_kpts.append({ |
||||
'keypoints': kpt, |
||||
'center': all_boxes[idx][0:2], |
||||
'scale': all_boxes[idx][2:4], |
||||
'area': all_boxes[idx][4], |
||||
'score': all_boxes[idx][5], |
||||
'image': int(img_path[idx]) |
||||
}) |
||||
# image x person x (keypoints) |
||||
kpts = defaultdict(list) |
||||
for kpt in _kpts: |
||||
kpts[kpt['image']].append(kpt) |
||||
|
||||
# rescoring and oks nms |
||||
num_joints = preds.shape[1] |
||||
in_vis_thre = self.in_vis_thre |
||||
oks_thre = self.oks_thre |
||||
oks_nmsed_kpts = [] |
||||
for img in kpts.keys(): |
||||
img_kpts = kpts[img] |
||||
for n_p in img_kpts: |
||||
box_score = n_p['score'] |
||||
kpt_score = 0 |
||||
valid_num = 0 |
||||
for n_jt in range(0, num_joints): |
||||
t_s = n_p['keypoints'][n_jt][2] |
||||
if t_s > in_vis_thre: |
||||
kpt_score = kpt_score + t_s |
||||
valid_num = valid_num + 1 |
||||
if valid_num != 0: |
||||
kpt_score = kpt_score / valid_num |
||||
# rescoring |
||||
n_p['score'] = kpt_score * box_score |
||||
|
||||
keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], |
||||
oks_thre) |
||||
|
||||
if len(keep) == 0: |
||||
oks_nmsed_kpts.append(img_kpts) |
||||
else: |
||||
oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) |
||||
|
||||
self._write_coco_keypoint_results(oks_nmsed_kpts) |
||||
|
||||
def accumulate(self): |
||||
self.get_final_results(self.results['all_preds'], |
||||
self.results['all_boxes'], |
||||
self.results['image_path']) |
||||
if self.save_prediction_only: |
||||
logger.info(f'The keypoint result is saved to {self.res_file} ' |
||||
'and do not evaluate the mAP.') |
||||
return |
||||
coco_dt = self.coco.loadRes(self.res_file) |
||||
coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') |
||||
coco_eval.params.useSegm = None |
||||
coco_eval.evaluate() |
||||
coco_eval.accumulate() |
||||
coco_eval.summarize() |
||||
|
||||
keypoint_stats = [] |
||||
for ind in range(len(coco_eval.stats)): |
||||
keypoint_stats.append((coco_eval.stats[ind])) |
||||
self.eval_results['keypoint'] = keypoint_stats |
||||
|
||||
def log(self): |
||||
if self.save_prediction_only: |
||||
return |
||||
stats_names = [ |
||||
'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', |
||||
'AR .75', 'AR (M)', 'AR (L)' |
||||
] |
||||
num_values = len(stats_names) |
||||
print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |') |
||||
print('|---' * (num_values + 1) + '|') |
||||
|
||||
print(' '.join([ |
||||
'| {:.3f}'.format(value) for value in self.eval_results['keypoint'] |
||||
]) + ' |') |
||||
|
||||
def get_results(self): |
||||
return self.eval_results |
||||
|
||||
|
||||
class KeyPointTopDownMPIIEval(object): |
||||
def __init__(self, |
||||
anno_file, |
||||
num_samples, |
||||
num_joints, |
||||
output_eval, |
||||
oks_thre=0.9, |
||||
save_prediction_only=False): |
||||
super(KeyPointTopDownMPIIEval, self).__init__() |
||||
self.ann_file = anno_file |
||||
self.res_file = os.path.join(output_eval, "keypoints_results.json") |
||||
self.save_prediction_only = save_prediction_only |
||||
self.reset() |
||||
|
||||
def reset(self): |
||||
self.results = [] |
||||
self.eval_results = {} |
||||
self.idx = 0 |
||||
|
||||
def update(self, inputs, outputs): |
||||
kpts, _ = outputs['keypoint'][0] |
||||
|
||||
num_images = inputs['image'].shape[0] |
||||
results = {} |
||||
results['preds'] = kpts[:, :, 0:3] |
||||
results['boxes'] = np.zeros((num_images, 6)) |
||||
results['boxes'][:, 0:2] = inputs['center'].numpy()[:, 0:2] |
||||
results['boxes'][:, 2:4] = inputs['scale'].numpy()[:, 0:2] |
||||
results['boxes'][:, 4] = np.prod(inputs['scale'].numpy() * 200, 1) |
||||
results['boxes'][:, 5] = np.squeeze(inputs['score'].numpy()) |
||||
results['image_path'] = inputs['image_file'] |
||||
|
||||
self.results.append(results) |
||||
|
||||
def accumulate(self): |
||||
self._mpii_keypoint_results_save() |
||||
if self.save_prediction_only: |
||||
logger.info(f'The keypoint result is saved to {self.res_file} ' |
||||
'and do not evaluate the mAP.') |
||||
return |
||||
|
||||
self.eval_results = self.evaluate(self.results) |
||||
|
||||
def _mpii_keypoint_results_save(self): |
||||
results = [] |
||||
for res in self.results: |
||||
if len(res) == 0: |
||||
continue |
||||
result = [{ |
||||
'preds': res['preds'][k].tolist(), |
||||
'boxes': res['boxes'][k].tolist(), |
||||
'image_path': res['image_path'][k], |
||||
} for k in range(len(res))] |
||||
results.extend(result) |
||||
with open(self.res_file, 'w') as f: |
||||
json.dump(results, f, sort_keys=True, indent=4) |
||||
logger.info(f'The keypoint result is saved to {self.res_file}.') |
||||
|
||||
def log(self): |
||||
if self.save_prediction_only: |
||||
return |
||||
for item, value in self.eval_results.items(): |
||||
print("{} : {}".format(item, value)) |
||||
|
||||
def get_results(self): |
||||
return self.eval_results |
||||
|
||||
def evaluate(self, outputs, savepath=None): |
||||
"""Evaluate PCKh for MPII dataset. refer to |
||||
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch |
||||
Copyright (c) Microsoft, under the MIT License. |
||||
|
||||
Args: |
||||
outputs(list(preds, boxes)): |
||||
|
||||
* preds (np.ndarray[N,K,3]): The first two dimensions are |
||||
coordinates, score is the third dimension of the array. |
||||
* boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] |
||||
, scale[1],area, score] |
||||
|
||||
Returns: |
||||
dict: PCKh for each joint |
||||
""" |
||||
|
||||
kpts = [] |
||||
for output in outputs: |
||||
preds = output['preds'] |
||||
batch_size = preds.shape[0] |
||||
for i in range(batch_size): |
||||
kpts.append({'keypoints': preds[i]}) |
||||
|
||||
preds = np.stack([kpt['keypoints'] for kpt in kpts]) |
||||
|
||||
# convert 0-based index to 1-based index, |
||||
# and get the first two dimensions. |
||||
preds = preds[..., :2] + 1.0 |
||||
|
||||
if savepath is not None: |
||||
pred_file = os.path.join(savepath, 'pred.mat') |
||||
savemat(pred_file, mdict={'preds': preds}) |
||||
|
||||
SC_BIAS = 0.6 |
||||
threshold = 0.5 |
||||
|
||||
gt_file = os.path.join( |
||||
os.path.dirname(self.ann_file), 'mpii_gt_val.mat') |
||||
gt_dict = loadmat(gt_file) |
||||
dataset_joints = gt_dict['dataset_joints'] |
||||
jnt_missing = gt_dict['jnt_missing'] |
||||
pos_gt_src = gt_dict['pos_gt_src'] |
||||
headboxes_src = gt_dict['headboxes_src'] |
||||
|
||||
pos_pred_src = np.transpose(preds, [1, 2, 0]) |
||||
|
||||
head = np.where(dataset_joints == 'head')[1][0] |
||||
lsho = np.where(dataset_joints == 'lsho')[1][0] |
||||
lelb = np.where(dataset_joints == 'lelb')[1][0] |
||||
lwri = np.where(dataset_joints == 'lwri')[1][0] |
||||
lhip = np.where(dataset_joints == 'lhip')[1][0] |
||||
lkne = np.where(dataset_joints == 'lkne')[1][0] |
||||
lank = np.where(dataset_joints == 'lank')[1][0] |
||||
|
||||
rsho = np.where(dataset_joints == 'rsho')[1][0] |
||||
relb = np.where(dataset_joints == 'relb')[1][0] |
||||
rwri = np.where(dataset_joints == 'rwri')[1][0] |
||||
rkne = np.where(dataset_joints == 'rkne')[1][0] |
||||
rank = np.where(dataset_joints == 'rank')[1][0] |
||||
rhip = np.where(dataset_joints == 'rhip')[1][0] |
||||
|
||||
jnt_visible = 1 - jnt_missing |
||||
uv_error = pos_pred_src - pos_gt_src |
||||
uv_err = np.linalg.norm(uv_error, axis=1) |
||||
headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] |
||||
headsizes = np.linalg.norm(headsizes, axis=0) |
||||
headsizes *= SC_BIAS |
||||
scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32) |
||||
scaled_uv_err = uv_err / scale |
||||
scaled_uv_err = scaled_uv_err * jnt_visible |
||||
jnt_count = np.sum(jnt_visible, axis=1) |
||||
less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible |
||||
PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count |
||||
|
||||
# save |
||||
rng = np.arange(0, 0.5 + 0.01, 0.01) |
||||
pckAll = np.zeros((len(rng), 16), dtype=np.float32) |
||||
|
||||
for r, threshold in enumerate(rng): |
||||
less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible |
||||
pckAll[r, :] = 100. * np.sum(less_than_threshold, |
||||
axis=1) / jnt_count |
||||
|
||||
PCKh = np.ma.array(PCKh, mask=False) |
||||
PCKh.mask[6:8] = True |
||||
|
||||
jnt_count = np.ma.array(jnt_count, mask=False) |
||||
jnt_count.mask[6:8] = True |
||||
jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) |
||||
|
||||
name_value = [ #noqa |
||||
('Head', PCKh[head]), |
||||
('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), |
||||
('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), |
||||
('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), |
||||
('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), |
||||
('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), |
||||
('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), |
||||
('PCKh', np.sum(PCKh * jnt_ratio)), |
||||
('PCKh@0.1', np.sum(pckAll[11, :] * jnt_ratio)) |
||||
] |
||||
name_value = OrderedDict(name_value) |
||||
|
||||
return name_value |
||||
|
||||
def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): |
||||
"""sort kpts and remove the repeated ones.""" |
||||
kpts = sorted(kpts, key=lambda x: x[key]) |
||||
num = len(kpts) |
||||
for i in range(num - 1, 0, -1): |
||||
if kpts[i][key] == kpts[i - 1][key]: |
||||
del kpts[i] |
||||
|
||||
return kpts |
@ -0,0 +1,444 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
from __future__ import unicode_literals |
||||
|
||||
import os |
||||
import sys |
||||
import numpy as np |
||||
import itertools |
||||
import paddle |
||||
from paddlers.models.ppdet.modeling.bbox_utils import poly2rbox, rbox2poly_np |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = [ |
||||
'draw_pr_curve', |
||||
'bbox_area', |
||||
'jaccard_overlap', |
||||
'prune_zero_padding', |
||||
'DetectionMAP', |
||||
'ap_per_class', |
||||
'compute_ap', |
||||
] |
||||
|
||||
|
||||
def draw_pr_curve(precision, |
||||
recall, |
||||
iou=0.5, |
||||
out_dir='pr_curve', |
||||
file_name='precision_recall_curve.jpg'): |
||||
if not os.path.exists(out_dir): |
||||
os.makedirs(out_dir) |
||||
output_path = os.path.join(out_dir, file_name) |
||||
try: |
||||
import matplotlib.pyplot as plt |
||||
except Exception as e: |
||||
logger.error('Matplotlib not found, plaese install matplotlib.' |
||||
'for example: `pip install matplotlib`.') |
||||
raise e |
||||
plt.cla() |
||||
plt.figure('P-R Curve') |
||||
plt.title('Precision/Recall Curve(IoU={})'.format(iou)) |
||||
plt.xlabel('Recall') |
||||
plt.ylabel('Precision') |
||||
plt.grid(True) |
||||
plt.plot(recall, precision) |
||||
plt.savefig(output_path) |
||||
|
||||
|
||||
def bbox_area(bbox, is_bbox_normalized): |
||||
""" |
||||
Calculate area of a bounding box |
||||
""" |
||||
norm = 1. - float(is_bbox_normalized) |
||||
width = bbox[2] - bbox[0] + norm |
||||
height = bbox[3] - bbox[1] + norm |
||||
return width * height |
||||
|
||||
|
||||
def jaccard_overlap(pred, gt, is_bbox_normalized=False): |
||||
""" |
||||
Calculate jaccard overlap ratio between two bounding box |
||||
""" |
||||
if pred[0] >= gt[2] or pred[2] <= gt[0] or \ |
||||
pred[1] >= gt[3] or pred[3] <= gt[1]: |
||||
return 0. |
||||
inter_xmin = max(pred[0], gt[0]) |
||||
inter_ymin = max(pred[1], gt[1]) |
||||
inter_xmax = min(pred[2], gt[2]) |
||||
inter_ymax = min(pred[3], gt[3]) |
||||
inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax], |
||||
is_bbox_normalized) |
||||
pred_size = bbox_area(pred, is_bbox_normalized) |
||||
gt_size = bbox_area(gt, is_bbox_normalized) |
||||
overlap = float(inter_size) / (pred_size + gt_size - inter_size) |
||||
return overlap |
||||
|
||||
|
||||
def calc_rbox_iou(pred, gt_rbox): |
||||
""" |
||||
calc iou between rotated bbox |
||||
""" |
||||
# calc iou of bounding box for speedup |
||||
pred = np.array(pred, np.float32).reshape(-1, 8) |
||||
pred = pred.reshape(-1, 2) |
||||
gt_poly = rbox2poly_np(np.array(gt_rbox).reshape(-1, 5))[0] |
||||
gt_poly = gt_poly.reshape(-1, 2) |
||||
pred_rect = [ |
||||
np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]), |
||||
np.max(pred[:, 1]) |
||||
] |
||||
gt_rect = [ |
||||
np.min(gt_poly[:, 0]), np.min(gt_poly[:, 1]), np.max(gt_poly[:, 0]), |
||||
np.max(gt_poly[:, 1]) |
||||
] |
||||
iou = jaccard_overlap(pred_rect, gt_rect, False) |
||||
|
||||
if iou <= 0: |
||||
return iou |
||||
|
||||
# calc rbox iou |
||||
pred = pred.reshape(-1, 8) |
||||
|
||||
pred = np.array(pred, np.float32).reshape(-1, 8) |
||||
pred_rbox = poly2rbox(pred) |
||||
pred_rbox = pred_rbox.reshape(-1, 5) |
||||
pred_rbox = pred_rbox.reshape(-1, 5) |
||||
try: |
||||
from rbox_iou_ops import rbox_iou |
||||
except Exception as e: |
||||
print("import custom_ops error, try install rbox_iou_ops " \ |
||||
"following ppdet/ext_op/README.md", e) |
||||
sys.stdout.flush() |
||||
sys.exit(-1) |
||||
gt_rbox = np.array(gt_rbox, np.float32).reshape(-1, 5) |
||||
pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32') |
||||
pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32') |
||||
iou = rbox_iou(pd_gt_rbox, pd_pred_rbox) |
||||
iou = iou.numpy() |
||||
return iou[0][0] |
||||
|
||||
|
||||
def prune_zero_padding(gt_box, gt_label, difficult=None): |
||||
valid_cnt = 0 |
||||
for i in range(len(gt_box)): |
||||
if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \ |
||||
gt_box[i, 2] == 0 and gt_box[i, 3] == 0: |
||||
break |
||||
valid_cnt += 1 |
||||
return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt] |
||||
if difficult is not None else None) |
||||
|
||||
|
||||
class DetectionMAP(object): |
||||
""" |
||||
Calculate detection mean average precision. |
||||
Currently support two types: 11point and integral |
||||
|
||||
Args: |
||||
class_num (int): The class number. |
||||
overlap_thresh (float): The threshold of overlap |
||||
ratio between prediction bounding box and |
||||
ground truth bounding box for deciding |
||||
true/false positive. Default 0.5. |
||||
map_type (str): Calculation method of mean average |
||||
precision, currently support '11point' and |
||||
'integral'. Default '11point'. |
||||
is_bbox_normalized (bool): Whether bounding boxes |
||||
is normalized to range[0, 1]. Default False. |
||||
evaluate_difficult (bool): Whether to evaluate |
||||
difficult bounding boxes. Default False. |
||||
catid2name (dict): Mapping between category id and category name. |
||||
classwise (bool): Whether per-category AP and draw |
||||
P-R Curve or not. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
class_num, |
||||
overlap_thresh=0.5, |
||||
map_type='11point', |
||||
is_bbox_normalized=False, |
||||
evaluate_difficult=False, |
||||
catid2name=None, |
||||
classwise=False): |
||||
self.class_num = class_num |
||||
self.overlap_thresh = overlap_thresh |
||||
assert map_type in ['11point', 'integral'], \ |
||||
"map_type currently only support '11point' "\ |
||||
"and 'integral'" |
||||
self.map_type = map_type |
||||
self.is_bbox_normalized = is_bbox_normalized |
||||
self.evaluate_difficult = evaluate_difficult |
||||
self.classwise = classwise |
||||
self.classes = [] |
||||
for cname in catid2name.values(): |
||||
self.classes.append(cname) |
||||
self.reset() |
||||
|
||||
def update(self, bbox, score, label, gt_box, gt_label, difficult=None): |
||||
""" |
||||
Update metric statics from given prediction and ground |
||||
truth infomations. |
||||
""" |
||||
if difficult is None: |
||||
difficult = np.zeros_like(gt_label) |
||||
|
||||
# record class gt count |
||||
for gtl, diff in zip(gt_label, difficult): |
||||
if self.evaluate_difficult or int(diff) == 0: |
||||
self.class_gt_counts[int(np.array(gtl))] += 1 |
||||
|
||||
# record class score positive |
||||
visited = [False] * len(gt_label) |
||||
for b, s, l in zip(bbox, score, label): |
||||
pred = b.tolist() if isinstance(b, np.ndarray) else b |
||||
max_idx = -1 |
||||
max_overlap = -1.0 |
||||
for i, gl in enumerate(gt_label): |
||||
if int(gl) == int(l): |
||||
if len(gt_box[i]) == 5: |
||||
overlap = calc_rbox_iou(pred, gt_box[i]) |
||||
else: |
||||
overlap = jaccard_overlap(pred, gt_box[i], |
||||
self.is_bbox_normalized) |
||||
if overlap > max_overlap: |
||||
max_overlap = overlap |
||||
max_idx = i |
||||
|
||||
if max_overlap > self.overlap_thresh: |
||||
if self.evaluate_difficult or \ |
||||
int(np.array(difficult[max_idx])) == 0: |
||||
if not visited[max_idx]: |
||||
self.class_score_poss[int(l)].append([s, 1.0]) |
||||
visited[max_idx] = True |
||||
else: |
||||
self.class_score_poss[int(l)].append([s, 0.0]) |
||||
else: |
||||
self.class_score_poss[int(l)].append([s, 0.0]) |
||||
|
||||
def reset(self): |
||||
""" |
||||
Reset metric statics |
||||
""" |
||||
self.class_score_poss = [[] for _ in range(self.class_num)] |
||||
self.class_gt_counts = [0] * self.class_num |
||||
self.mAP = 0.0 |
||||
|
||||
def accumulate(self): |
||||
""" |
||||
Accumulate metric results and calculate mAP |
||||
""" |
||||
mAP = 0. |
||||
valid_cnt = 0 |
||||
eval_results = [] |
||||
for score_pos, count in zip(self.class_score_poss, |
||||
self.class_gt_counts): |
||||
if count == 0: continue |
||||
if len(score_pos) == 0: |
||||
valid_cnt += 1 |
||||
continue |
||||
|
||||
accum_tp_list, accum_fp_list = \ |
||||
self._get_tp_fp_accum(score_pos) |
||||
precision = [] |
||||
recall = [] |
||||
for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list): |
||||
precision.append(float(ac_tp) / (ac_tp + ac_fp)) |
||||
recall.append(float(ac_tp) / count) |
||||
|
||||
one_class_ap = 0.0 |
||||
if self.map_type == '11point': |
||||
max_precisions = [0.] * 11 |
||||
start_idx = len(precision) - 1 |
||||
for j in range(10, -1, -1): |
||||
for i in range(start_idx, -1, -1): |
||||
if recall[i] < float(j) / 10.: |
||||
start_idx = i |
||||
if j > 0: |
||||
max_precisions[j - 1] = max_precisions[j] |
||||
break |
||||
else: |
||||
if max_precisions[j] < precision[i]: |
||||
max_precisions[j] = precision[i] |
||||
one_class_ap = sum(max_precisions) / 11. |
||||
mAP += one_class_ap |
||||
valid_cnt += 1 |
||||
elif self.map_type == 'integral': |
||||
import math |
||||
prev_recall = 0. |
||||
for i in range(len(precision)): |
||||
recall_gap = math.fabs(recall[i] - prev_recall) |
||||
if recall_gap > 1e-6: |
||||
one_class_ap += precision[i] * recall_gap |
||||
prev_recall = recall[i] |
||||
mAP += one_class_ap |
||||
valid_cnt += 1 |
||||
else: |
||||
logger.error("Unspported mAP type {}".format(self.map_type)) |
||||
sys.exit(1) |
||||
eval_results.append({ |
||||
'class': self.classes[valid_cnt - 1], |
||||
'ap': one_class_ap, |
||||
'precision': precision, |
||||
'recall': recall, |
||||
}) |
||||
self.eval_results = eval_results |
||||
self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP |
||||
|
||||
def get_map(self): |
||||
""" |
||||
Get mAP result |
||||
""" |
||||
if self.mAP is None: |
||||
logger.error("mAP is not calculated.") |
||||
if self.classwise: |
||||
# Compute per-category AP and PR curve |
||||
try: |
||||
from terminaltables import AsciiTable |
||||
except Exception as e: |
||||
logger.error( |
||||
'terminaltables not found, plaese install terminaltables. ' |
||||
'for example: `pip install terminaltables`.') |
||||
raise e |
||||
results_per_category = [] |
||||
for eval_result in self.eval_results: |
||||
results_per_category.append( |
||||
(str(eval_result['class']), |
||||
'{:0.3f}'.format(float(eval_result['ap'])))) |
||||
draw_pr_curve( |
||||
eval_result['precision'], |
||||
eval_result['recall'], |
||||
out_dir='voc_pr_curve', |
||||
file_name='{}_precision_recall_curve.jpg'.format( |
||||
eval_result['class'])) |
||||
|
||||
num_columns = min(6, len(results_per_category) * 2) |
||||
results_flatten = list(itertools.chain(*results_per_category)) |
||||
headers = ['category', 'AP'] * (num_columns // 2) |
||||
results_2d = itertools.zip_longest(*[ |
||||
results_flatten[i::num_columns] for i in range(num_columns) |
||||
]) |
||||
table_data = [headers] |
||||
table_data += [result for result in results_2d] |
||||
table = AsciiTable(table_data) |
||||
logger.info('Per-category of VOC AP: \n{}'.format(table.table)) |
||||
logger.info( |
||||
"per-category PR curve has output to voc_pr_curve folder.") |
||||
return self.mAP |
||||
|
||||
def _get_tp_fp_accum(self, score_pos_list): |
||||
""" |
||||
Calculate accumulating true/false positive results from |
||||
[score, pos] records |
||||
""" |
||||
sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True) |
||||
accum_tp = 0 |
||||
accum_fp = 0 |
||||
accum_tp_list = [] |
||||
accum_fp_list = [] |
||||
for (score, pos) in sorted_list: |
||||
accum_tp += int(pos) |
||||
accum_tp_list.append(accum_tp) |
||||
accum_fp += 1 - int(pos) |
||||
accum_fp_list.append(accum_fp) |
||||
return accum_tp_list, accum_fp_list |
||||
|
||||
|
||||
def ap_per_class(tp, conf, pred_cls, target_cls): |
||||
""" |
||||
Computes the average precision, given the recall and precision curves. |
||||
Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics. |
||||
|
||||
Args: |
||||
tp (list): True positives. |
||||
conf (list): Objectness value from 0-1. |
||||
pred_cls (list): Predicted object classes. |
||||
target_cls (list): Target object classes. |
||||
""" |
||||
tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array( |
||||
pred_cls), np.array(target_cls) |
||||
|
||||
# Sort by objectness |
||||
i = np.argsort(-conf) |
||||
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] |
||||
|
||||
# Find unique classes |
||||
unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0)) |
||||
|
||||
# Create Precision-Recall curve and compute AP for each class |
||||
ap, p, r = [], [], [] |
||||
for c in unique_classes: |
||||
i = pred_cls == c |
||||
n_gt = sum(target_cls == c) # Number of ground truth objects |
||||
n_p = sum(i) # Number of predicted objects |
||||
|
||||
if (n_p == 0) and (n_gt == 0): |
||||
continue |
||||
elif (n_p == 0) or (n_gt == 0): |
||||
ap.append(0) |
||||
r.append(0) |
||||
p.append(0) |
||||
else: |
||||
# Accumulate FPs and TPs |
||||
fpc = np.cumsum(1 - tp[i]) |
||||
tpc = np.cumsum(tp[i]) |
||||
|
||||
# Recall |
||||
recall_curve = tpc / (n_gt + 1e-16) |
||||
r.append(tpc[-1] / (n_gt + 1e-16)) |
||||
|
||||
# Precision |
||||
precision_curve = tpc / (tpc + fpc) |
||||
p.append(tpc[-1] / (tpc[-1] + fpc[-1])) |
||||
|
||||
# AP from recall-precision curve |
||||
ap.append(compute_ap(recall_curve, precision_curve)) |
||||
|
||||
return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array( |
||||
p) |
||||
|
||||
|
||||
def compute_ap(recall, precision): |
||||
""" |
||||
Computes the average precision, given the recall and precision curves. |
||||
Code originally from https://github.com/rbgirshick/py-faster-rcnn. |
||||
|
||||
Args: |
||||
recall (list): The recall curve. |
||||
precision (list): The precision curve. |
||||
|
||||
Returns: |
||||
The average precision as computed in py-faster-rcnn. |
||||
""" |
||||
# correct AP calculation |
||||
# first append sentinel values at the end |
||||
mrec = np.concatenate(([0.], recall, [1.])) |
||||
mpre = np.concatenate(([0.], precision, [0.])) |
||||
|
||||
# compute the precision envelope |
||||
for i in range(mpre.size - 1, 0, -1): |
||||
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) |
||||
|
||||
# to calculate area under PR curve, look for points |
||||
# where X axis (recall) changes value |
||||
i = np.where(mrec[1:] != mrec[:-1])[0] |
||||
|
||||
# and sum (\Delta recall) * prec |
||||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) |
||||
return ap |
@ -0,0 +1,470 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import copy |
||||
import sys |
||||
import math |
||||
from collections import defaultdict |
||||
from motmetrics.math_util import quiet_divide |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import paddle |
||||
import paddle.nn.functional as F |
||||
from .metrics import Metric |
||||
import motmetrics as mm |
||||
import openpyxl |
||||
metrics = mm.metrics.motchallenge_metrics |
||||
mh = mm.metrics.create() |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = ['MCMOTEvaluator', 'MCMOTMetric'] |
||||
|
||||
METRICS_LIST = [ |
||||
'num_frames', 'num_matches', 'num_switches', 'num_transfer', 'num_ascend', |
||||
'num_migrate', 'num_false_positives', 'num_misses', 'num_detections', |
||||
'num_objects', 'num_predictions', 'num_unique_objects', 'mostly_tracked', |
||||
'partially_tracked', 'mostly_lost', 'num_fragmentations', 'motp', 'mota', |
||||
'precision', 'recall', 'idfp', 'idfn', 'idtp', 'idp', 'idr', 'idf1' |
||||
] |
||||
|
||||
NAME_MAP = { |
||||
'num_frames': 'num_frames', |
||||
'num_matches': 'num_matches', |
||||
'num_switches': 'IDs', |
||||
'num_transfer': 'IDt', |
||||
'num_ascend': 'IDa', |
||||
'num_migrate': 'IDm', |
||||
'num_false_positives': 'FP', |
||||
'num_misses': 'FN', |
||||
'num_detections': 'num_detections', |
||||
'num_objects': 'num_objects', |
||||
'num_predictions': 'num_predictions', |
||||
'num_unique_objects': 'GT', |
||||
'mostly_tracked': 'MT', |
||||
'partially_tracked': 'partially_tracked', |
||||
'mostly_lost': 'ML', |
||||
'num_fragmentations': 'FM', |
||||
'motp': 'MOTP', |
||||
'mota': 'MOTA', |
||||
'precision': 'Prcn', |
||||
'recall': 'Rcll', |
||||
'idfp': 'idfp', |
||||
'idfn': 'idfn', |
||||
'idtp': 'idtp', |
||||
'idp': 'IDP', |
||||
'idr': 'IDR', |
||||
'idf1': 'IDF1' |
||||
} |
||||
|
||||
|
||||
def parse_accs_metrics(seq_acc, index_name, verbose=False): |
||||
""" |
||||
Parse the evaluation indicators of multiple MOTAccumulator |
||||
""" |
||||
mh = mm.metrics.create() |
||||
summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST) |
||||
summary.loc['OVERALL', 'motp'] = (summary['motp'] * summary['num_detections']).sum() / \ |
||||
summary.loc['OVERALL', 'num_detections'] |
||||
if verbose: |
||||
strsummary = mm.io.render_summary( |
||||
summary, formatters=mh.formatters, namemap=NAME_MAP) |
||||
print(strsummary) |
||||
|
||||
return summary |
||||
|
||||
|
||||
def seqs_overall_metrics(summary_df, verbose=False): |
||||
""" |
||||
Calculate overall metrics for multiple sequences |
||||
""" |
||||
add_col = [ |
||||
'num_frames', 'num_matches', 'num_switches', 'num_transfer', |
||||
'num_ascend', 'num_migrate', 'num_false_positives', 'num_misses', |
||||
'num_detections', 'num_objects', 'num_predictions', |
||||
'num_unique_objects', 'mostly_tracked', 'partially_tracked', |
||||
'mostly_lost', 'num_fragmentations', 'idfp', 'idfn', 'idtp' |
||||
] |
||||
calc_col = ['motp', 'mota', 'precision', 'recall', 'idp', 'idr', 'idf1'] |
||||
calc_df = summary_df.copy() |
||||
|
||||
overall_dic = {} |
||||
for col in add_col: |
||||
overall_dic[col] = calc_df[col].sum() |
||||
|
||||
for col in calc_col: |
||||
overall_dic[col] = getattr(MCMOTMetricOverall, col + '_overall')( |
||||
calc_df, overall_dic) |
||||
|
||||
overall_df = pd.DataFrame(overall_dic, index=['overall_calc']) |
||||
calc_df = pd.concat([calc_df, overall_df]) |
||||
|
||||
if verbose: |
||||
mh = mm.metrics.create() |
||||
str_calc_df = mm.io.render_summary( |
||||
calc_df, formatters=mh.formatters, namemap=NAME_MAP) |
||||
print(str_calc_df) |
||||
|
||||
return calc_df |
||||
|
||||
|
||||
class MCMOTMetricOverall(object): |
||||
def motp_overall(summary_df, overall_dic): |
||||
motp = quiet_divide((summary_df['motp'] * |
||||
summary_df['num_detections']).sum(), |
||||
overall_dic['num_detections']) |
||||
return motp |
||||
|
||||
def mota_overall(summary_df, overall_dic): |
||||
del summary_df |
||||
mota = 1. - quiet_divide( |
||||
(overall_dic['num_misses'] + overall_dic['num_switches'] + |
||||
overall_dic['num_false_positives']), overall_dic['num_objects']) |
||||
return mota |
||||
|
||||
def precision_overall(summary_df, overall_dic): |
||||
del summary_df |
||||
precision = quiet_divide(overall_dic['num_detections'], ( |
||||
overall_dic['num_false_positives'] + overall_dic['num_detections'] |
||||
)) |
||||
return precision |
||||
|
||||
def recall_overall(summary_df, overall_dic): |
||||
del summary_df |
||||
recall = quiet_divide(overall_dic['num_detections'], |
||||
overall_dic['num_objects']) |
||||
return recall |
||||
|
||||
def idp_overall(summary_df, overall_dic): |
||||
del summary_df |
||||
idp = quiet_divide(overall_dic['idtp'], |
||||
(overall_dic['idtp'] + overall_dic['idfp'])) |
||||
return idp |
||||
|
||||
def idr_overall(summary_df, overall_dic): |
||||
del summary_df |
||||
idr = quiet_divide(overall_dic['idtp'], |
||||
(overall_dic['idtp'] + overall_dic['idfn'])) |
||||
return idr |
||||
|
||||
def idf1_overall(summary_df, overall_dic): |
||||
del summary_df |
||||
idf1 = quiet_divide(2. * overall_dic['idtp'], ( |
||||
overall_dic['num_objects'] + overall_dic['num_predictions'])) |
||||
return idf1 |
||||
|
||||
|
||||
def read_mcmot_results_union(filename, is_gt, is_ignore): |
||||
results_dict = dict() |
||||
if os.path.isfile(filename): |
||||
all_result = np.loadtxt(filename, delimiter=',') |
||||
if all_result.shape[0] == 0 or all_result.shape[1] < 7: |
||||
return results_dict |
||||
if is_ignore: |
||||
return results_dict |
||||
if is_gt: |
||||
# only for test use |
||||
all_result = all_result[all_result[:, 7] != 0] |
||||
all_result[:, 7] = all_result[:, 7] - 1 |
||||
|
||||
if all_result.shape[0] == 0: |
||||
return results_dict |
||||
|
||||
class_unique = np.unique(all_result[:, 7]) |
||||
|
||||
last_max_id = 0 |
||||
result_cls_list = [] |
||||
for cls in class_unique: |
||||
result_cls_split = all_result[all_result[:, 7] == cls] |
||||
result_cls_split[:, 1] = result_cls_split[:, 1] + last_max_id |
||||
# make sure track id different between every category |
||||
last_max_id = max(np.unique(result_cls_split[:, 1])) + 1 |
||||
result_cls_list.append(result_cls_split) |
||||
|
||||
results_con = np.concatenate(result_cls_list) |
||||
|
||||
for line in range(len(results_con)): |
||||
linelist = results_con[line] |
||||
fid = int(linelist[0]) |
||||
if fid < 1: |
||||
continue |
||||
results_dict.setdefault(fid, list()) |
||||
|
||||
if is_gt: |
||||
score = 1 |
||||
else: |
||||
score = float(linelist[6]) |
||||
|
||||
tlwh = tuple(map(float, linelist[2:6])) |
||||
target_id = int(linelist[1]) |
||||
cls = int(linelist[7]) |
||||
|
||||
results_dict[fid].append((tlwh, target_id, cls, score)) |
||||
|
||||
return results_dict |
||||
|
||||
|
||||
def read_mcmot_results(filename, is_gt, is_ignore): |
||||
results_dict = dict() |
||||
if os.path.isfile(filename): |
||||
with open(filename, 'r') as f: |
||||
for line in f.readlines(): |
||||
linelist = line.strip().split(',') |
||||
if len(linelist) < 7: |
||||
continue |
||||
fid = int(linelist[0]) |
||||
if fid < 1: |
||||
continue |
||||
cid = int(linelist[7]) |
||||
if is_gt: |
||||
score = 1 |
||||
# only for test use |
||||
cid -= 1 |
||||
else: |
||||
score = float(linelist[6]) |
||||
|
||||
cls_result_dict = results_dict.setdefault(cid, dict()) |
||||
cls_result_dict.setdefault(fid, list()) |
||||
|
||||
tlwh = tuple(map(float, linelist[2:6])) |
||||
target_id = int(linelist[1]) |
||||
cls_result_dict[fid].append((tlwh, target_id, score)) |
||||
return results_dict |
||||
|
||||
|
||||
def read_results(filename, |
||||
data_type, |
||||
is_gt=False, |
||||
is_ignore=False, |
||||
multi_class=False, |
||||
union=False): |
||||
if data_type in ['mcmot', 'lab']: |
||||
if multi_class: |
||||
if union: |
||||
# The results are evaluated by union all the categories. |
||||
# Track IDs between different categories cannot be duplicate. |
||||
read_fun = read_mcmot_results_union |
||||
else: |
||||
# The results are evaluated separately by category. |
||||
read_fun = read_mcmot_results |
||||
else: |
||||
raise ValueError('multi_class: {}, MCMOT should have cls_id.'. |
||||
format(multi_class)) |
||||
else: |
||||
raise ValueError('Unknown data type: {}'.format(data_type)) |
||||
|
||||
return read_fun(filename, is_gt, is_ignore) |
||||
|
||||
|
||||
def unzip_objs(objs): |
||||
if len(objs) > 0: |
||||
tlwhs, ids, scores = zip(*objs) |
||||
else: |
||||
tlwhs, ids, scores = [], [], [] |
||||
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) |
||||
return tlwhs, ids, scores |
||||
|
||||
|
||||
def unzip_objs_cls(objs): |
||||
if len(objs) > 0: |
||||
tlwhs, ids, cls, scores = zip(*objs) |
||||
else: |
||||
tlwhs, ids, cls, scores = [], [], [], [] |
||||
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) |
||||
ids = np.array(ids) |
||||
cls = np.array(cls) |
||||
scores = np.array(scores) |
||||
return tlwhs, ids, cls, scores |
||||
|
||||
|
||||
class MCMOTEvaluator(object): |
||||
def __init__(self, data_root, seq_name, data_type, num_classes): |
||||
self.data_root = data_root |
||||
self.seq_name = seq_name |
||||
self.data_type = data_type |
||||
self.num_classes = num_classes |
||||
|
||||
self.load_annotations() |
||||
self.reset_accumulator() |
||||
|
||||
self.class_accs = [] |
||||
|
||||
def load_annotations(self): |
||||
assert self.data_type == 'mcmot' |
||||
self.gt_filename = os.path.join(self.data_root, '../', '../', |
||||
'sequences', |
||||
'{}.txt'.format(self.seq_name)) |
||||
|
||||
def reset_accumulator(self): |
||||
import motmetrics as mm |
||||
mm.lap.default_solver = 'lap' |
||||
self.acc = mm.MOTAccumulator(auto_id=True) |
||||
|
||||
def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False, |
||||
union=False): |
||||
import motmetrics as mm |
||||
mm.lap.default_solver = 'lap' |
||||
if union: |
||||
trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3] |
||||
gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3] |
||||
|
||||
# get distance matrix |
||||
iou_distance = mm.distances.iou_matrix( |
||||
gt_tlwhs, trk_tlwhs, max_iou=0.5) |
||||
|
||||
# Set the distance between objects of different categories to nan |
||||
gt_cls_len = len(gt_cls) |
||||
trk_cls_len = len(trk_cls) |
||||
# When the number of GT or Trk is 0, iou_distance dimension is (0,0) |
||||
if gt_cls_len != 0 and trk_cls_len != 0: |
||||
gt_cls = gt_cls.reshape(gt_cls_len, 1) |
||||
gt_cls = np.repeat(gt_cls, trk_cls_len, axis=1) |
||||
trk_cls = trk_cls.reshape(1, trk_cls_len) |
||||
trk_cls = np.repeat(trk_cls, gt_cls_len, axis=0) |
||||
iou_distance = np.where(gt_cls == trk_cls, iou_distance, |
||||
np.nan) |
||||
|
||||
else: |
||||
trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] |
||||
gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] |
||||
|
||||
# get distance matrix |
||||
iou_distance = mm.distances.iou_matrix( |
||||
gt_tlwhs, trk_tlwhs, max_iou=0.5) |
||||
|
||||
self.acc.update(gt_ids, trk_ids, iou_distance) |
||||
|
||||
if rtn_events and iou_distance.size > 0 and hasattr(self.acc, |
||||
'mot_events'): |
||||
events = self.acc.mot_events # only supported by https://github.com/longcw/py-motmetrics |
||||
else: |
||||
events = None |
||||
return events |
||||
|
||||
def eval_file(self, result_filename): |
||||
# evaluation of each category |
||||
gt_frame_dict = read_results( |
||||
self.gt_filename, |
||||
self.data_type, |
||||
is_gt=True, |
||||
multi_class=True, |
||||
union=False) |
||||
result_frame_dict = read_results( |
||||
result_filename, |
||||
self.data_type, |
||||
is_gt=False, |
||||
multi_class=True, |
||||
union=False) |
||||
|
||||
for cid in range(self.num_classes): |
||||
self.reset_accumulator() |
||||
cls_result_frame_dict = result_frame_dict.setdefault(cid, dict()) |
||||
cls_gt_frame_dict = gt_frame_dict.setdefault(cid, dict()) |
||||
|
||||
# only labeled frames will be evaluated |
||||
frames = sorted(list(set(cls_gt_frame_dict.keys()))) |
||||
|
||||
for frame_id in frames: |
||||
trk_objs = cls_result_frame_dict.get(frame_id, []) |
||||
gt_objs = cls_gt_frame_dict.get(frame_id, []) |
||||
self.eval_frame_dict(trk_objs, gt_objs, rtn_events=False) |
||||
|
||||
self.class_accs.append(self.acc) |
||||
|
||||
return self.class_accs |
||||
|
||||
@staticmethod |
||||
def get_summary(accs, |
||||
names, |
||||
metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', |
||||
'precision', 'recall')): |
||||
import motmetrics as mm |
||||
mm.lap.default_solver = 'lap' |
||||
|
||||
names = copy.deepcopy(names) |
||||
if metrics is None: |
||||
metrics = mm.metrics.motchallenge_metrics |
||||
metrics = copy.deepcopy(metrics) |
||||
|
||||
mh = mm.metrics.create() |
||||
summary = mh.compute_many( |
||||
accs, metrics=metrics, names=names, generate_overall=True) |
||||
|
||||
return summary |
||||
|
||||
@staticmethod |
||||
def save_summary(summary, filename): |
||||
import pandas as pd |
||||
writer = pd.ExcelWriter(filename) |
||||
summary.to_excel(writer) |
||||
writer.save() |
||||
|
||||
|
||||
class MCMOTMetric(Metric): |
||||
def __init__(self, num_classes, save_summary=False): |
||||
self.num_classes = num_classes |
||||
self.save_summary = save_summary |
||||
self.MCMOTEvaluator = MCMOTEvaluator |
||||
self.result_root = None |
||||
self.reset() |
||||
|
||||
self.seqs_overall = defaultdict(list) |
||||
|
||||
def reset(self): |
||||
self.accs = [] |
||||
self.seqs = [] |
||||
|
||||
def update(self, data_root, seq, data_type, result_root, result_filename): |
||||
evaluator = self.MCMOTEvaluator(data_root, seq, data_type, |
||||
self.num_classes) |
||||
seq_acc = evaluator.eval_file(result_filename) |
||||
self.accs.append(seq_acc) |
||||
self.seqs.append(seq) |
||||
self.result_root = result_root |
||||
|
||||
cls_index_name = [ |
||||
'{}_{}'.format(seq, i) for i in range(self.num_classes) |
||||
] |
||||
summary = parse_accs_metrics(seq_acc, cls_index_name) |
||||
summary.rename( |
||||
index={'OVERALL': '{}_OVERALL'.format(seq)}, inplace=True) |
||||
for row in range(len(summary)): |
||||
self.seqs_overall[row].append(summary.iloc[row:row + 1]) |
||||
|
||||
def accumulate(self): |
||||
self.cls_summary_list = [] |
||||
for row in range(self.num_classes): |
||||
seqs_cls_df = pd.concat(self.seqs_overall[row]) |
||||
seqs_cls_summary = seqs_overall_metrics(seqs_cls_df) |
||||
cls_summary_overall = seqs_cls_summary.iloc[-1:].copy() |
||||
cls_summary_overall.rename( |
||||
index={'overall_calc': 'overall_calc_{}'.format(row)}, |
||||
inplace=True) |
||||
self.cls_summary_list.append(cls_summary_overall) |
||||
|
||||
def log(self): |
||||
seqs_summary = seqs_overall_metrics( |
||||
pd.concat(self.seqs_overall[self.num_classes]), verbose=True) |
||||
class_summary = seqs_overall_metrics( |
||||
pd.concat(self.cls_summary_list), verbose=True) |
||||
|
||||
def get_results(self): |
||||
return 1 |
@ -0,0 +1,434 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import sys |
||||
import json |
||||
import paddle |
||||
import numpy as np |
||||
import typing |
||||
|
||||
from .map_utils import prune_zero_padding, DetectionMAP |
||||
from .coco_utils import get_infer_results, cocoapi_eval |
||||
from .widerface_utils import face_eval_run |
||||
from paddlers.models.ppdet.data.source.category import get_categories |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = [ |
||||
'Metric', 'COCOMetric', 'VOCMetric', 'WiderFaceMetric', |
||||
'get_infer_results', 'RBoxMetric', 'SNIPERCOCOMetric' |
||||
] |
||||
|
||||
COCO_SIGMAS = np.array([ |
||||
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, |
||||
.87, .89, .89 |
||||
]) / 10.0 |
||||
CROWD_SIGMAS = np.array( |
||||
[.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79, |
||||
.79]) / 10.0 |
||||
|
||||
|
||||
class Metric(paddle.metric.Metric): |
||||
def name(self): |
||||
return self.__class__.__name__ |
||||
|
||||
def reset(self): |
||||
pass |
||||
|
||||
def accumulate(self): |
||||
pass |
||||
|
||||
# paddle.metric.Metric defined :metch:`update`, :meth:`accumulate` |
||||
# :metch:`reset`, in ppdet, we also need following 2 methods: |
||||
|
||||
# abstract method for logging metric results |
||||
def log(self): |
||||
pass |
||||
|
||||
# abstract method for getting metric results |
||||
def get_results(self): |
||||
pass |
||||
|
||||
|
||||
class COCOMetric(Metric): |
||||
def __init__(self, anno_file, **kwargs): |
||||
assert os.path.isfile(anno_file), \ |
||||
"anno_file {} not a file".format(anno_file) |
||||
self.anno_file = anno_file |
||||
self.clsid2catid = kwargs.get('clsid2catid', None) |
||||
if self.clsid2catid is None: |
||||
self.clsid2catid, _ = get_categories('COCO', anno_file) |
||||
self.classwise = kwargs.get('classwise', False) |
||||
self.output_eval = kwargs.get('output_eval', None) |
||||
# TODO: bias should be unified |
||||
self.bias = kwargs.get('bias', 0) |
||||
self.save_prediction_only = kwargs.get('save_prediction_only', False) |
||||
self.iou_type = kwargs.get('IouType', 'bbox') |
||||
self.reset() |
||||
|
||||
def reset(self): |
||||
# only bbox and mask evaluation support currently |
||||
self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []} |
||||
self.eval_results = {} |
||||
|
||||
def update(self, inputs, outputs): |
||||
outs = {} |
||||
# outputs Tensor -> numpy.ndarray |
||||
for k, v in outputs.items(): |
||||
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v |
||||
|
||||
# multi-scale inputs: all inputs have same im_id |
||||
if isinstance(inputs, typing.Sequence): |
||||
im_id = inputs[0]['im_id'] |
||||
else: |
||||
im_id = inputs['im_id'] |
||||
outs['im_id'] = im_id.numpy() if isinstance(im_id, |
||||
paddle.Tensor) else im_id |
||||
|
||||
infer_results = get_infer_results( |
||||
outs, self.clsid2catid, bias=self.bias) |
||||
self.results['bbox'] += infer_results[ |
||||
'bbox'] if 'bbox' in infer_results else [] |
||||
self.results['mask'] += infer_results[ |
||||
'mask'] if 'mask' in infer_results else [] |
||||
self.results['segm'] += infer_results[ |
||||
'segm'] if 'segm' in infer_results else [] |
||||
self.results['keypoint'] += infer_results[ |
||||
'keypoint'] if 'keypoint' in infer_results else [] |
||||
|
||||
def accumulate(self): |
||||
if len(self.results['bbox']) > 0: |
||||
output = "bbox.json" |
||||
if self.output_eval: |
||||
output = os.path.join(self.output_eval, output) |
||||
with open(output, 'w') as f: |
||||
json.dump(self.results['bbox'], f) |
||||
logger.info('The bbox result is saved to bbox.json.') |
||||
|
||||
if self.save_prediction_only: |
||||
logger.info('The bbox result is saved to {} and do not ' |
||||
'evaluate the mAP.'.format(output)) |
||||
else: |
||||
bbox_stats = cocoapi_eval( |
||||
output, |
||||
'bbox', |
||||
anno_file=self.anno_file, |
||||
classwise=self.classwise) |
||||
self.eval_results['bbox'] = bbox_stats |
||||
sys.stdout.flush() |
||||
|
||||
if len(self.results['mask']) > 0: |
||||
output = "mask.json" |
||||
if self.output_eval: |
||||
output = os.path.join(self.output_eval, output) |
||||
with open(output, 'w') as f: |
||||
json.dump(self.results['mask'], f) |
||||
logger.info('The mask result is saved to mask.json.') |
||||
|
||||
if self.save_prediction_only: |
||||
logger.info('The mask result is saved to {} and do not ' |
||||
'evaluate the mAP.'.format(output)) |
||||
else: |
||||
seg_stats = cocoapi_eval( |
||||
output, |
||||
'segm', |
||||
anno_file=self.anno_file, |
||||
classwise=self.classwise) |
||||
self.eval_results['mask'] = seg_stats |
||||
sys.stdout.flush() |
||||
|
||||
if len(self.results['segm']) > 0: |
||||
output = "segm.json" |
||||
if self.output_eval: |
||||
output = os.path.join(self.output_eval, output) |
||||
with open(output, 'w') as f: |
||||
json.dump(self.results['segm'], f) |
||||
logger.info('The segm result is saved to segm.json.') |
||||
|
||||
if self.save_prediction_only: |
||||
logger.info('The segm result is saved to {} and do not ' |
||||
'evaluate the mAP.'.format(output)) |
||||
else: |
||||
seg_stats = cocoapi_eval( |
||||
output, |
||||
'segm', |
||||
anno_file=self.anno_file, |
||||
classwise=self.classwise) |
||||
self.eval_results['mask'] = seg_stats |
||||
sys.stdout.flush() |
||||
|
||||
if len(self.results['keypoint']) > 0: |
||||
output = "keypoint.json" |
||||
if self.output_eval: |
||||
output = os.path.join(self.output_eval, output) |
||||
with open(output, 'w') as f: |
||||
json.dump(self.results['keypoint'], f) |
||||
logger.info('The keypoint result is saved to keypoint.json.') |
||||
|
||||
if self.save_prediction_only: |
||||
logger.info('The keypoint result is saved to {} and do not ' |
||||
'evaluate the mAP.'.format(output)) |
||||
else: |
||||
style = 'keypoints' |
||||
use_area = True |
||||
sigmas = COCO_SIGMAS |
||||
if self.iou_type == 'keypoints_crowd': |
||||
style = 'keypoints_crowd' |
||||
use_area = False |
||||
sigmas = CROWD_SIGMAS |
||||
keypoint_stats = cocoapi_eval( |
||||
output, |
||||
style, |
||||
anno_file=self.anno_file, |
||||
classwise=self.classwise, |
||||
sigmas=sigmas, |
||||
use_area=use_area) |
||||
self.eval_results['keypoint'] = keypoint_stats |
||||
sys.stdout.flush() |
||||
|
||||
def log(self): |
||||
pass |
||||
|
||||
def get_results(self): |
||||
return self.eval_results |
||||
|
||||
|
||||
class VOCMetric(Metric): |
||||
def __init__(self, |
||||
label_list, |
||||
class_num=20, |
||||
overlap_thresh=0.5, |
||||
map_type='11point', |
||||
is_bbox_normalized=False, |
||||
evaluate_difficult=False, |
||||
classwise=False): |
||||
assert os.path.isfile(label_list), \ |
||||
"label_list {} not a file".format(label_list) |
||||
self.clsid2catid, self.catid2name = get_categories('VOC', label_list) |
||||
|
||||
self.overlap_thresh = overlap_thresh |
||||
self.map_type = map_type |
||||
self.evaluate_difficult = evaluate_difficult |
||||
self.detection_map = DetectionMAP( |
||||
class_num=class_num, |
||||
overlap_thresh=overlap_thresh, |
||||
map_type=map_type, |
||||
is_bbox_normalized=is_bbox_normalized, |
||||
evaluate_difficult=evaluate_difficult, |
||||
catid2name=self.catid2name, |
||||
classwise=classwise) |
||||
|
||||
self.reset() |
||||
|
||||
def reset(self): |
||||
self.detection_map.reset() |
||||
|
||||
def update(self, inputs, outputs): |
||||
bbox_np = outputs['bbox'].numpy() |
||||
bboxes = bbox_np[:, 2:] |
||||
scores = bbox_np[:, 1] |
||||
labels = bbox_np[:, 0] |
||||
bbox_lengths = outputs['bbox_num'].numpy() |
||||
|
||||
if bboxes.shape == (1, 1) or bboxes is None: |
||||
return |
||||
gt_boxes = inputs['gt_bbox'] |
||||
gt_labels = inputs['gt_class'] |
||||
difficults = inputs['difficult'] if not self.evaluate_difficult \ |
||||
else None |
||||
|
||||
scale_factor = inputs['scale_factor'].numpy( |
||||
) if 'scale_factor' in inputs else np.ones( |
||||
(gt_boxes.shape[0], 2)).astype('float32') |
||||
|
||||
bbox_idx = 0 |
||||
for i in range(len(gt_boxes)): |
||||
gt_box = gt_boxes[i].numpy() |
||||
h, w = scale_factor[i] |
||||
gt_box = gt_box / np.array([w, h, w, h]) |
||||
gt_label = gt_labels[i].numpy() |
||||
difficult = None if difficults is None \ |
||||
else difficults[i].numpy() |
||||
bbox_num = bbox_lengths[i] |
||||
bbox = bboxes[bbox_idx:bbox_idx + bbox_num] |
||||
score = scores[bbox_idx:bbox_idx + bbox_num] |
||||
label = labels[bbox_idx:bbox_idx + bbox_num] |
||||
gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label, |
||||
difficult) |
||||
self.detection_map.update(bbox, score, label, gt_box, gt_label, |
||||
difficult) |
||||
bbox_idx += bbox_num |
||||
|
||||
def accumulate(self): |
||||
logger.info("Accumulating evaluatation results...") |
||||
self.detection_map.accumulate() |
||||
|
||||
def log(self): |
||||
map_stat = 100. * self.detection_map.get_map() |
||||
logger.info("mAP({:.2f}, {}) = {:.2f}%".format( |
||||
self.overlap_thresh, self.map_type, map_stat)) |
||||
|
||||
def get_results(self): |
||||
return {'bbox': [self.detection_map.get_map()]} |
||||
|
||||
|
||||
class WiderFaceMetric(Metric): |
||||
def __init__(self, image_dir, anno_file, multi_scale=True): |
||||
self.image_dir = image_dir |
||||
self.anno_file = anno_file |
||||
self.multi_scale = multi_scale |
||||
self.clsid2catid, self.catid2name = get_categories('widerface') |
||||
|
||||
def update(self, model): |
||||
|
||||
face_eval_run( |
||||
model, |
||||
self.image_dir, |
||||
self.anno_file, |
||||
pred_dir='output/pred', |
||||
eval_mode='widerface', |
||||
multi_scale=self.multi_scale) |
||||
|
||||
|
||||
class RBoxMetric(Metric): |
||||
def __init__(self, anno_file, **kwargs): |
||||
assert os.path.isfile(anno_file), \ |
||||
"anno_file {} not a file".format(anno_file) |
||||
assert os.path.exists(anno_file), "anno_file {} not exists".format( |
||||
anno_file) |
||||
self.anno_file = anno_file |
||||
self.gt_anno = json.load(open(self.anno_file)) |
||||
cats = self.gt_anno['categories'] |
||||
self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} |
||||
self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)} |
||||
self.catid2name = {cat['id']: cat['name'] for cat in cats} |
||||
self.classwise = kwargs.get('classwise', False) |
||||
self.output_eval = kwargs.get('output_eval', None) |
||||
# TODO: bias should be unified |
||||
self.bias = kwargs.get('bias', 0) |
||||
self.save_prediction_only = kwargs.get('save_prediction_only', False) |
||||
self.iou_type = kwargs.get('IouType', 'bbox') |
||||
self.overlap_thresh = kwargs.get('overlap_thresh', 0.5) |
||||
self.map_type = kwargs.get('map_type', '11point') |
||||
self.evaluate_difficult = kwargs.get('evaluate_difficult', False) |
||||
class_num = len(self.catid2name) |
||||
self.detection_map = DetectionMAP( |
||||
class_num=class_num, |
||||
overlap_thresh=self.overlap_thresh, |
||||
map_type=self.map_type, |
||||
is_bbox_normalized=False, |
||||
evaluate_difficult=self.evaluate_difficult, |
||||
catid2name=self.catid2name, |
||||
classwise=self.classwise) |
||||
|
||||
self.reset() |
||||
|
||||
def reset(self): |
||||
self.result_bbox = [] |
||||
self.detection_map.reset() |
||||
|
||||
def update(self, inputs, outputs): |
||||
outs = {} |
||||
# outputs Tensor -> numpy.ndarray |
||||
for k, v in outputs.items(): |
||||
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v |
||||
|
||||
im_id = inputs['im_id'] |
||||
outs['im_id'] = im_id.numpy() if isinstance(im_id, |
||||
paddle.Tensor) else im_id |
||||
|
||||
infer_results = get_infer_results( |
||||
outs, self.clsid2catid, bias=self.bias) |
||||
self.result_bbox += infer_results[ |
||||
'bbox'] if 'bbox' in infer_results else [] |
||||
bbox = [b['bbox'] for b in self.result_bbox] |
||||
score = [b['score'] for b in self.result_bbox] |
||||
label = [b['category_id'] for b in self.result_bbox] |
||||
label = [self.catid2clsid[e] for e in label] |
||||
gt_box = [ |
||||
e['bbox'] for e in self.gt_anno['annotations'] |
||||
if e['image_id'] == outs['im_id'] |
||||
] |
||||
gt_label = [ |
||||
e['category_id'] for e in self.gt_anno['annotations'] |
||||
if e['image_id'] == outs['im_id'] |
||||
] |
||||
gt_label = [self.catid2clsid[e] for e in gt_label] |
||||
self.detection_map.update(bbox, score, label, gt_box, gt_label) |
||||
|
||||
def accumulate(self): |
||||
if len(self.result_bbox) > 0: |
||||
output = "bbox.json" |
||||
if self.output_eval: |
||||
output = os.path.join(self.output_eval, output) |
||||
with open(output, 'w') as f: |
||||
json.dump(self.result_bbox, f) |
||||
logger.info('The bbox result is saved to bbox.json.') |
||||
|
||||
if self.save_prediction_only: |
||||
logger.info('The bbox result is saved to {} and do not ' |
||||
'evaluate the mAP.'.format(output)) |
||||
else: |
||||
logger.info("Accumulating evaluatation results...") |
||||
self.detection_map.accumulate() |
||||
|
||||
def log(self): |
||||
map_stat = 100. * self.detection_map.get_map() |
||||
logger.info("mAP({:.2f}, {}) = {:.2f}%".format( |
||||
self.overlap_thresh, self.map_type, map_stat)) |
||||
|
||||
def get_results(self): |
||||
return {'bbox': [self.detection_map.get_map()]} |
||||
|
||||
|
||||
class SNIPERCOCOMetric(COCOMetric): |
||||
def __init__(self, anno_file, **kwargs): |
||||
super(SNIPERCOCOMetric, self).__init__(anno_file, **kwargs) |
||||
self.dataset = kwargs["dataset"] |
||||
self.chip_results = [] |
||||
|
||||
def reset(self): |
||||
# only bbox and mask evaluation support currently |
||||
self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []} |
||||
self.eval_results = {} |
||||
self.chip_results = [] |
||||
|
||||
def update(self, inputs, outputs): |
||||
outs = {} |
||||
# outputs Tensor -> numpy.ndarray |
||||
for k, v in outputs.items(): |
||||
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v |
||||
|
||||
im_id = inputs['im_id'] |
||||
outs['im_id'] = im_id.numpy() if isinstance(im_id, |
||||
paddle.Tensor) else im_id |
||||
|
||||
self.chip_results.append(outs) |
||||
|
||||
def accumulate(self): |
||||
results = self.dataset.anno_cropper.aggregate_chips_detections( |
||||
self.chip_results) |
||||
for outs in results: |
||||
infer_results = get_infer_results( |
||||
outs, self.clsid2catid, bias=self.bias) |
||||
self.results['bbox'] += infer_results[ |
||||
'bbox'] if 'bbox' in infer_results else [] |
||||
|
||||
super(SNIPERCOCOMetric, self).accumulate() |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,428 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py |
||||
""" |
||||
|
||||
import sys |
||||
|
||||
__all__ = ['Munkres', 'make_cost_matrix'] |
||||
|
||||
|
||||
class Munkres: |
||||
""" |
||||
Calculate the Munkres solution to the classical assignment problem. |
||||
See the module documentation for usage. |
||||
""" |
||||
|
||||
def __init__(self): |
||||
"""Create a new instance""" |
||||
self.C = None |
||||
self.row_covered = [] |
||||
self.col_covered = [] |
||||
self.n = 0 |
||||
self.Z0_r = 0 |
||||
self.Z0_c = 0 |
||||
self.marked = None |
||||
self.path = None |
||||
|
||||
def make_cost_matrix(profit_matrix, inversion_function): |
||||
""" |
||||
**DEPRECATED** |
||||
|
||||
Please use the module function ``make_cost_matrix()``. |
||||
""" |
||||
import munkres |
||||
return munkres.make_cost_matrix(profit_matrix, inversion_function) |
||||
|
||||
make_cost_matrix = staticmethod(make_cost_matrix) |
||||
|
||||
def pad_matrix(self, matrix, pad_value=0): |
||||
""" |
||||
Pad a possibly non-square matrix to make it square. |
||||
|
||||
:Parameters: |
||||
matrix : list of lists |
||||
matrix to pad |
||||
|
||||
pad_value : int |
||||
value to use to pad the matrix |
||||
|
||||
:rtype: list of lists |
||||
:return: a new, possibly padded, matrix |
||||
""" |
||||
max_columns = 0 |
||||
total_rows = len(matrix) |
||||
|
||||
for row in matrix: |
||||
max_columns = max(max_columns, len(row)) |
||||
|
||||
total_rows = max(max_columns, total_rows) |
||||
|
||||
new_matrix = [] |
||||
for row in matrix: |
||||
row_len = len(row) |
||||
new_row = row[:] |
||||
if total_rows > row_len: |
||||
# Row too short. Pad it. |
||||
new_row += [0] * (total_rows - row_len) |
||||
new_matrix += [new_row] |
||||
|
||||
while len(new_matrix) < total_rows: |
||||
new_matrix += [[0] * total_rows] |
||||
|
||||
return new_matrix |
||||
|
||||
def compute(self, cost_matrix): |
||||
""" |
||||
Compute the indexes for the lowest-cost pairings between rows and |
||||
columns in the database. Returns a list of (row, column) tuples |
||||
that can be used to traverse the matrix. |
||||
|
||||
:Parameters: |
||||
cost_matrix : list of lists |
||||
The cost matrix. If this cost matrix is not square, it |
||||
will be padded with zeros, via a call to ``pad_matrix()``. |
||||
(This method does *not* modify the caller's matrix. It |
||||
operates on a copy of the matrix.) |
||||
|
||||
**WARNING**: This code handles square and rectangular |
||||
matrices. It does *not* handle irregular matrices. |
||||
|
||||
:rtype: list |
||||
:return: A list of ``(row, column)`` tuples that describe the lowest |
||||
cost path through the matrix |
||||
|
||||
""" |
||||
self.C = self.pad_matrix(cost_matrix) |
||||
self.n = len(self.C) |
||||
self.original_length = len(cost_matrix) |
||||
self.original_width = len(cost_matrix[0]) |
||||
self.row_covered = [False for i in range(self.n)] |
||||
self.col_covered = [False for i in range(self.n)] |
||||
self.Z0_r = 0 |
||||
self.Z0_c = 0 |
||||
self.path = self.__make_matrix(self.n * 2, 0) |
||||
self.marked = self.__make_matrix(self.n, 0) |
||||
|
||||
done = False |
||||
step = 1 |
||||
|
||||
steps = { |
||||
1: self.__step1, |
||||
2: self.__step2, |
||||
3: self.__step3, |
||||
4: self.__step4, |
||||
5: self.__step5, |
||||
6: self.__step6 |
||||
} |
||||
|
||||
while not done: |
||||
try: |
||||
func = steps[step] |
||||
step = func() |
||||
except KeyError: |
||||
done = True |
||||
|
||||
# Look for the starred columns |
||||
results = [] |
||||
for i in range(self.original_length): |
||||
for j in range(self.original_width): |
||||
if self.marked[i][j] == 1: |
||||
results += [(i, j)] |
||||
|
||||
return results |
||||
|
||||
def __copy_matrix(self, matrix): |
||||
"""Return an exact copy of the supplied matrix""" |
||||
return copy.deepcopy(matrix) |
||||
|
||||
def __make_matrix(self, n, val): |
||||
"""Create an *n*x*n* matrix, populating it with the specific value.""" |
||||
matrix = [] |
||||
for i in range(n): |
||||
matrix += [[val for j in range(n)]] |
||||
return matrix |
||||
|
||||
def __step1(self): |
||||
""" |
||||
For each row of the matrix, find the smallest element and |
||||
subtract it from every element in its row. Go to Step 2. |
||||
""" |
||||
C = self.C |
||||
n = self.n |
||||
for i in range(n): |
||||
minval = min(self.C[i]) |
||||
# Find the minimum value for this row and subtract that minimum |
||||
# from every element in the row. |
||||
for j in range(n): |
||||
self.C[i][j] -= minval |
||||
|
||||
return 2 |
||||
|
||||
def __step2(self): |
||||
""" |
||||
Find a zero (Z) in the resulting matrix. If there is no starred |
||||
zero in its row or column, star Z. Repeat for each element in the |
||||
matrix. Go to Step 3. |
||||
""" |
||||
n = self.n |
||||
for i in range(n): |
||||
for j in range(n): |
||||
if (self.C[i][j] == 0) and \ |
||||
(not self.col_covered[j]) and \ |
||||
(not self.row_covered[i]): |
||||
self.marked[i][j] = 1 |
||||
self.col_covered[j] = True |
||||
self.row_covered[i] = True |
||||
|
||||
self.__clear_covers() |
||||
return 3 |
||||
|
||||
def __step3(self): |
||||
""" |
||||
Cover each column containing a starred zero. If K columns are |
||||
covered, the starred zeros describe a complete set of unique |
||||
assignments. In this case, Go to DONE, otherwise, Go to Step 4. |
||||
""" |
||||
n = self.n |
||||
count = 0 |
||||
for i in range(n): |
||||
for j in range(n): |
||||
if self.marked[i][j] == 1: |
||||
self.col_covered[j] = True |
||||
count += 1 |
||||
|
||||
if count >= n: |
||||
step = 7 # done |
||||
else: |
||||
step = 4 |
||||
|
||||
return step |
||||
|
||||
def __step4(self): |
||||
""" |
||||
Find a noncovered zero and prime it. If there is no starred zero |
||||
in the row containing this primed zero, Go to Step 5. Otherwise, |
||||
cover this row and uncover the column containing the starred |
||||
zero. Continue in this manner until there are no uncovered zeros |
||||
left. Save the smallest uncovered value and Go to Step 6. |
||||
""" |
||||
step = 0 |
||||
done = False |
||||
row = -1 |
||||
col = -1 |
||||
star_col = -1 |
||||
while not done: |
||||
(row, col) = self.__find_a_zero() |
||||
if row < 0: |
||||
done = True |
||||
step = 6 |
||||
else: |
||||
self.marked[row][col] = 2 |
||||
star_col = self.__find_star_in_row(row) |
||||
if star_col >= 0: |
||||
col = star_col |
||||
self.row_covered[row] = True |
||||
self.col_covered[col] = False |
||||
else: |
||||
done = True |
||||
self.Z0_r = row |
||||
self.Z0_c = col |
||||
step = 5 |
||||
|
||||
return step |
||||
|
||||
def __step5(self): |
||||
""" |
||||
Construct a series of alternating primed and starred zeros as |
||||
follows. Let Z0 represent the uncovered primed zero found in Step 4. |
||||
Let Z1 denote the starred zero in the column of Z0 (if any). |
||||
Let Z2 denote the primed zero in the row of Z1 (there will always |
||||
be one). Continue until the series terminates at a primed zero |
||||
that has no starred zero in its column. Unstar each starred zero |
||||
of the series, star each primed zero of the series, erase all |
||||
primes and uncover every line in the matrix. Return to Step 3 |
||||
""" |
||||
count = 0 |
||||
path = self.path |
||||
path[count][0] = self.Z0_r |
||||
path[count][1] = self.Z0_c |
||||
done = False |
||||
while not done: |
||||
row = self.__find_star_in_col(path[count][1]) |
||||
if row >= 0: |
||||
count += 1 |
||||
path[count][0] = row |
||||
path[count][1] = path[count - 1][1] |
||||
else: |
||||
done = True |
||||
|
||||
if not done: |
||||
col = self.__find_prime_in_row(path[count][0]) |
||||
count += 1 |
||||
path[count][0] = path[count - 1][0] |
||||
path[count][1] = col |
||||
|
||||
self.__convert_path(path, count) |
||||
self.__clear_covers() |
||||
self.__erase_primes() |
||||
return 3 |
||||
|
||||
def __step6(self): |
||||
""" |
||||
Add the value found in Step 4 to every element of each covered |
||||
row, and subtract it from every element of each uncovered column. |
||||
Return to Step 4 without altering any stars, primes, or covered |
||||
lines. |
||||
""" |
||||
minval = self.__find_smallest() |
||||
for i in range(self.n): |
||||
for j in range(self.n): |
||||
if self.row_covered[i]: |
||||
self.C[i][j] += minval |
||||
if not self.col_covered[j]: |
||||
self.C[i][j] -= minval |
||||
return 4 |
||||
|
||||
def __find_smallest(self): |
||||
"""Find the smallest uncovered value in the matrix.""" |
||||
minval = 2e9 # sys.maxint |
||||
for i in range(self.n): |
||||
for j in range(self.n): |
||||
if (not self.row_covered[i]) and (not self.col_covered[j]): |
||||
if minval > self.C[i][j]: |
||||
minval = self.C[i][j] |
||||
return minval |
||||
|
||||
def __find_a_zero(self): |
||||
"""Find the first uncovered element with value 0""" |
||||
row = -1 |
||||
col = -1 |
||||
i = 0 |
||||
n = self.n |
||||
done = False |
||||
|
||||
while not done: |
||||
j = 0 |
||||
while True: |
||||
if (self.C[i][j] == 0) and \ |
||||
(not self.row_covered[i]) and \ |
||||
(not self.col_covered[j]): |
||||
row = i |
||||
col = j |
||||
done = True |
||||
j += 1 |
||||
if j >= n: |
||||
break |
||||
i += 1 |
||||
if i >= n: |
||||
done = True |
||||
|
||||
return (row, col) |
||||
|
||||
def __find_star_in_row(self, row): |
||||
""" |
||||
Find the first starred element in the specified row. Returns |
||||
the column index, or -1 if no starred element was found. |
||||
""" |
||||
col = -1 |
||||
for j in range(self.n): |
||||
if self.marked[row][j] == 1: |
||||
col = j |
||||
break |
||||
|
||||
return col |
||||
|
||||
def __find_star_in_col(self, col): |
||||
""" |
||||
Find the first starred element in the specified row. Returns |
||||
the row index, or -1 if no starred element was found. |
||||
""" |
||||
row = -1 |
||||
for i in range(self.n): |
||||
if self.marked[i][col] == 1: |
||||
row = i |
||||
break |
||||
|
||||
return row |
||||
|
||||
def __find_prime_in_row(self, row): |
||||
""" |
||||
Find the first prime element in the specified row. Returns |
||||
the column index, or -1 if no starred element was found. |
||||
""" |
||||
col = -1 |
||||
for j in range(self.n): |
||||
if self.marked[row][j] == 2: |
||||
col = j |
||||
break |
||||
|
||||
return col |
||||
|
||||
def __convert_path(self, path, count): |
||||
for i in range(count + 1): |
||||
if self.marked[path[i][0]][path[i][1]] == 1: |
||||
self.marked[path[i][0]][path[i][1]] = 0 |
||||
else: |
||||
self.marked[path[i][0]][path[i][1]] = 1 |
||||
|
||||
def __clear_covers(self): |
||||
"""Clear all covered matrix cells""" |
||||
for i in range(self.n): |
||||
self.row_covered[i] = False |
||||
self.col_covered[i] = False |
||||
|
||||
def __erase_primes(self): |
||||
"""Erase all prime markings""" |
||||
for i in range(self.n): |
||||
for j in range(self.n): |
||||
if self.marked[i][j] == 2: |
||||
self.marked[i][j] = 0 |
||||
|
||||
|
||||
def make_cost_matrix(profit_matrix, inversion_function): |
||||
""" |
||||
Create a cost matrix from a profit matrix by calling |
||||
'inversion_function' to invert each value. The inversion |
||||
function must take one numeric argument (of any type) and return |
||||
another numeric argument which is presumed to be the cost inverse |
||||
of the original profit. |
||||
|
||||
This is a static method. Call it like this: |
||||
|
||||
.. python:: |
||||
|
||||
cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func) |
||||
|
||||
For example: |
||||
|
||||
.. python:: |
||||
|
||||
cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxint - x) |
||||
|
||||
:Parameters: |
||||
profit_matrix : list of lists |
||||
The matrix to convert from a profit to a cost matrix |
||||
|
||||
inversion_function : function |
||||
The function to use to invert each entry in the profit matrix |
||||
|
||||
:rtype: list of lists |
||||
:return: The converted matrix |
||||
""" |
||||
cost_matrix = [] |
||||
for row in profit_matrix: |
||||
cost_matrix.append([inversion_function(value) for value in row]) |
||||
return cost_matrix |
@ -0,0 +1,393 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import cv2 |
||||
import numpy as np |
||||
from collections import OrderedDict |
||||
|
||||
import paddle |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = ['face_eval_run', 'lmk2out'] |
||||
|
||||
|
||||
def face_eval_run(model, |
||||
image_dir, |
||||
gt_file, |
||||
pred_dir='output/pred', |
||||
eval_mode='widerface', |
||||
multi_scale=False): |
||||
# load ground truth files |
||||
with open(gt_file, 'r') as f: |
||||
gt_lines = f.readlines() |
||||
imid2path = [] |
||||
pos_gt = 0 |
||||
while pos_gt < len(gt_lines): |
||||
name_gt = gt_lines[pos_gt].strip('\n\t').split()[0] |
||||
imid2path.append(name_gt) |
||||
pos_gt += 1 |
||||
n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0]) |
||||
pos_gt += 1 + n_gt |
||||
logger.info('The ground truth file load {} images'.format(len(imid2path))) |
||||
|
||||
dets_dist = OrderedDict() |
||||
for iter_id, im_path in enumerate(imid2path): |
||||
image_path = os.path.join(image_dir, im_path) |
||||
if eval_mode == 'fddb': |
||||
image_path += '.jpg' |
||||
assert os.path.exists(image_path) |
||||
image = cv2.imread(image_path) |
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
||||
if multi_scale: |
||||
shrink, max_shrink = get_shrink(image.shape[0], image.shape[1]) |
||||
det0 = detect_face(model, image, shrink) |
||||
det1 = flip_test(model, image, shrink) |
||||
[det2, det3] = multi_scale_test(model, image, max_shrink) |
||||
det4 = multi_scale_test_pyramid(model, image, max_shrink) |
||||
det = np.row_stack((det0, det1, det2, det3, det4)) |
||||
dets = bbox_vote(det) |
||||
else: |
||||
dets = detect_face(model, image, 1) |
||||
if eval_mode == 'widerface': |
||||
save_widerface_bboxes(image_path, dets, pred_dir) |
||||
else: |
||||
dets_dist[im_path] = dets |
||||
if iter_id % 100 == 0: |
||||
logger.info('Test iter {}'.format(iter_id)) |
||||
if eval_mode == 'fddb': |
||||
save_fddb_bboxes(dets_dist, pred_dir) |
||||
logger.info("Finish evaluation.") |
||||
|
||||
|
||||
def detect_face(model, image, shrink): |
||||
image_shape = [image.shape[0], image.shape[1]] |
||||
if shrink != 1: |
||||
h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink) |
||||
image = cv2.resize(image, (w, h)) |
||||
image_shape = [h, w] |
||||
|
||||
img = face_img_process(image) |
||||
image_shape = np.asarray([image_shape]) |
||||
scale_factor = np.asarray([[shrink, shrink]]) |
||||
data = { |
||||
"image": paddle.to_tensor( |
||||
img, dtype='float32'), |
||||
"im_shape": paddle.to_tensor( |
||||
image_shape, dtype='float32'), |
||||
"scale_factor": paddle.to_tensor( |
||||
scale_factor, dtype='float32') |
||||
} |
||||
model.eval() |
||||
detection = model(data) |
||||
detection = detection['bbox'].numpy() |
||||
# layout: xmin, ymin, xmax. ymax, score |
||||
if np.prod(detection.shape) == 1: |
||||
logger.info("No face detected") |
||||
return np.array([[0, 0, 0, 0, 0]]) |
||||
det_conf = detection[:, 1] |
||||
det_xmin = detection[:, 2] |
||||
det_ymin = detection[:, 3] |
||||
det_xmax = detection[:, 4] |
||||
det_ymax = detection[:, 5] |
||||
|
||||
det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) |
||||
return det |
||||
|
||||
|
||||
def flip_test(model, image, shrink): |
||||
img = cv2.flip(image, 1) |
||||
det_f = detect_face(model, img, shrink) |
||||
det_t = np.zeros(det_f.shape) |
||||
img_width = image.shape[1] |
||||
det_t[:, 0] = img_width - det_f[:, 2] |
||||
det_t[:, 1] = det_f[:, 1] |
||||
det_t[:, 2] = img_width - det_f[:, 0] |
||||
det_t[:, 3] = det_f[:, 3] |
||||
det_t[:, 4] = det_f[:, 4] |
||||
return det_t |
||||
|
||||
|
||||
def multi_scale_test(model, image, max_shrink): |
||||
# Shrink detecting is only used to detect big faces |
||||
st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink |
||||
det_s = detect_face(model, image, st) |
||||
index = np.where( |
||||
np.maximum(det_s[:, 2] - det_s[:, 0] + 1, |
||||
det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] |
||||
det_s = det_s[index, :] |
||||
# Enlarge one times |
||||
bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2 |
||||
det_b = detect_face(model, image, bt) |
||||
|
||||
# Enlarge small image x times for small faces |
||||
if max_shrink > 2: |
||||
bt *= 2 |
||||
while bt < max_shrink: |
||||
det_b = np.row_stack((det_b, detect_face(model, image, bt))) |
||||
bt *= 2 |
||||
det_b = np.row_stack((det_b, detect_face(model, image, max_shrink))) |
||||
|
||||
# Enlarged images are only used to detect small faces. |
||||
if bt > 1: |
||||
index = np.where( |
||||
np.minimum(det_b[:, 2] - det_b[:, 0] + 1, |
||||
det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] |
||||
det_b = det_b[index, :] |
||||
# Shrinked images are only used to detect big faces. |
||||
else: |
||||
index = np.where( |
||||
np.maximum(det_b[:, 2] - det_b[:, 0] + 1, |
||||
det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] |
||||
det_b = det_b[index, :] |
||||
return det_s, det_b |
||||
|
||||
|
||||
def multi_scale_test_pyramid(model, image, max_shrink): |
||||
# Use image pyramids to detect faces |
||||
det_b = detect_face(model, image, 0.25) |
||||
index = np.where( |
||||
np.maximum(det_b[:, 2] - det_b[:, 0] + 1, |
||||
det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] |
||||
det_b = det_b[index, :] |
||||
|
||||
st = [0.75, 1.25, 1.5, 1.75] |
||||
for i in range(len(st)): |
||||
if st[i] <= max_shrink: |
||||
det_temp = detect_face(model, image, st[i]) |
||||
# Enlarged images are only used to detect small faces. |
||||
if st[i] > 1: |
||||
index = np.where( |
||||
np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, |
||||
det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0] |
||||
det_temp = det_temp[index, :] |
||||
# Shrinked images are only used to detect big faces. |
||||
else: |
||||
index = np.where( |
||||
np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1, |
||||
det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0] |
||||
det_temp = det_temp[index, :] |
||||
det_b = np.row_stack((det_b, det_temp)) |
||||
return det_b |
||||
|
||||
|
||||
def to_chw(image): |
||||
""" |
||||
Transpose image from HWC to CHW. |
||||
Args: |
||||
image (np.array): an image with HWC layout. |
||||
""" |
||||
# HWC to CHW |
||||
if len(image.shape) == 3: |
||||
image = np.swapaxes(image, 1, 2) |
||||
image = np.swapaxes(image, 1, 0) |
||||
return image |
||||
|
||||
|
||||
def face_img_process(image, |
||||
mean=[104., 117., 123.], |
||||
std=[127.502231, 127.502231, 127.502231]): |
||||
img = np.array(image) |
||||
img = to_chw(img) |
||||
img = img.astype('float32') |
||||
img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') |
||||
img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32') |
||||
img = [img] |
||||
img = np.array(img) |
||||
return img |
||||
|
||||
|
||||
def get_shrink(height, width): |
||||
""" |
||||
Args: |
||||
height (int): image height. |
||||
width (int): image width. |
||||
""" |
||||
# avoid out of memory |
||||
max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5 |
||||
max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5 |
||||
|
||||
def get_round(x, loc): |
||||
str_x = str(x) |
||||
if '.' in str_x: |
||||
str_before, str_after = str_x.split('.') |
||||
len_after = len(str_after) |
||||
if len_after >= 3: |
||||
str_final = str_before + '.' + str_after[0:loc] |
||||
return float(str_final) |
||||
else: |
||||
return x |
||||
|
||||
max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3 |
||||
if max_shrink >= 1.5 and max_shrink < 2: |
||||
max_shrink = max_shrink - 0.1 |
||||
elif max_shrink >= 2 and max_shrink < 3: |
||||
max_shrink = max_shrink - 0.2 |
||||
elif max_shrink >= 3 and max_shrink < 4: |
||||
max_shrink = max_shrink - 0.3 |
||||
elif max_shrink >= 4 and max_shrink < 5: |
||||
max_shrink = max_shrink - 0.4 |
||||
elif max_shrink >= 5: |
||||
max_shrink = max_shrink - 0.5 |
||||
elif max_shrink <= 0.1: |
||||
max_shrink = 0.1 |
||||
|
||||
shrink = max_shrink if max_shrink < 1 else 1 |
||||
return shrink, max_shrink |
||||
|
||||
|
||||
def bbox_vote(det): |
||||
order = det[:, 4].ravel().argsort()[::-1] |
||||
det = det[order, :] |
||||
if det.shape[0] == 0: |
||||
dets = np.array([[10, 10, 20, 20, 0.002]]) |
||||
det = np.empty(shape=[0, 5]) |
||||
while det.shape[0] > 0: |
||||
# IOU |
||||
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) |
||||
xx1 = np.maximum(det[0, 0], det[:, 0]) |
||||
yy1 = np.maximum(det[0, 1], det[:, 1]) |
||||
xx2 = np.minimum(det[0, 2], det[:, 2]) |
||||
yy2 = np.minimum(det[0, 3], det[:, 3]) |
||||
w = np.maximum(0.0, xx2 - xx1 + 1) |
||||
h = np.maximum(0.0, yy2 - yy1 + 1) |
||||
inter = w * h |
||||
o = inter / (area[0] + area[:] - inter) |
||||
|
||||
# nms |
||||
merge_index = np.where(o >= 0.3)[0] |
||||
det_accu = det[merge_index, :] |
||||
det = np.delete(det, merge_index, 0) |
||||
if merge_index.shape[0] <= 1: |
||||
if det.shape[0] == 0: |
||||
try: |
||||
dets = np.row_stack((dets, det_accu)) |
||||
except: |
||||
dets = det_accu |
||||
continue |
||||
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) |
||||
max_score = np.max(det_accu[:, 4]) |
||||
det_accu_sum = np.zeros((1, 5)) |
||||
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], |
||||
axis=0) / np.sum(det_accu[:, -1:]) |
||||
det_accu_sum[:, 4] = max_score |
||||
try: |
||||
dets = np.row_stack((dets, det_accu_sum)) |
||||
except: |
||||
dets = det_accu_sum |
||||
dets = dets[0:750, :] |
||||
keep_index = np.where(dets[:, 4] >= 0.01)[0] |
||||
dets = dets[keep_index, :] |
||||
return dets |
||||
|
||||
|
||||
def save_widerface_bboxes(image_path, bboxes_scores, output_dir): |
||||
image_name = image_path.split('/')[-1] |
||||
image_class = image_path.split('/')[-2] |
||||
odir = os.path.join(output_dir, image_class) |
||||
if not os.path.exists(odir): |
||||
os.makedirs(odir) |
||||
|
||||
ofname = os.path.join(odir, '%s.txt' % (image_name[:-4])) |
||||
f = open(ofname, 'w') |
||||
f.write('{:s}\n'.format(image_class + '/' + image_name)) |
||||
f.write('{:d}\n'.format(bboxes_scores.shape[0])) |
||||
for box_score in bboxes_scores: |
||||
xmin, ymin, xmax, ymax, score = box_score |
||||
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, ( |
||||
xmax - xmin + 1), (ymax - ymin + 1), score)) |
||||
f.close() |
||||
logger.info("The predicted result is saved as {}".format(ofname)) |
||||
|
||||
|
||||
def save_fddb_bboxes(bboxes_scores, |
||||
output_dir, |
||||
output_fname='pred_fddb_res.txt'): |
||||
if not os.path.exists(output_dir): |
||||
os.makedirs(output_dir) |
||||
predict_file = os.path.join(output_dir, output_fname) |
||||
f = open(predict_file, 'w') |
||||
for image_path, dets in bboxes_scores.iteritems(): |
||||
f.write('{:s}\n'.format(image_path)) |
||||
f.write('{:d}\n'.format(dets.shape[0])) |
||||
for box_score in dets: |
||||
xmin, ymin, xmax, ymax, score = box_score |
||||
width, height = xmax - xmin, ymax - ymin |
||||
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n' |
||||
.format(xmin, ymin, width, height, score)) |
||||
logger.info("The predicted result is saved as {}".format(predict_file)) |
||||
return predict_file |
||||
|
||||
|
||||
def lmk2out(results, is_bbox_normalized=False): |
||||
""" |
||||
Args: |
||||
results: request a dict, should include: `landmark`, `im_id`, |
||||
if is_bbox_normalized=True, also need `im_shape`. |
||||
is_bbox_normalized: whether or not landmark is normalized. |
||||
""" |
||||
xywh_res = [] |
||||
for t in results: |
||||
bboxes = t['bbox'][0] |
||||
lengths = t['bbox'][1][0] |
||||
im_ids = np.array(t['im_id'][0]).flatten() |
||||
if bboxes.shape == (1, 1) or bboxes is None: |
||||
continue |
||||
face_index = t['face_index'][0] |
||||
prior_box = t['prior_boxes'][0] |
||||
predict_lmk = t['landmark'][0] |
||||
prior = np.reshape(prior_box, (-1, 4)) |
||||
predictlmk = np.reshape(predict_lmk, (-1, 10)) |
||||
|
||||
k = 0 |
||||
for a in range(len(lengths)): |
||||
num = lengths[a] |
||||
im_id = int(im_ids[a]) |
||||
for i in range(num): |
||||
score = bboxes[k][1] |
||||
theindex = face_index[i][0] |
||||
me_prior = prior[theindex, :] |
||||
lmk_pred = predictlmk[theindex, :] |
||||
prior_w = me_prior[2] - me_prior[0] |
||||
prior_h = me_prior[3] - me_prior[1] |
||||
prior_w_center = (me_prior[2] + me_prior[0]) / 2 |
||||
prior_h_center = (me_prior[3] + me_prior[1]) / 2 |
||||
lmk_decode = np.zeros((10)) |
||||
for j in [0, 2, 4, 6, 8]: |
||||
lmk_decode[j] = lmk_pred[ |
||||
j] * 0.1 * prior_w + prior_w_center |
||||
for j in [1, 3, 5, 7, 9]: |
||||
lmk_decode[j] = lmk_pred[ |
||||
j] * 0.1 * prior_h + prior_h_center |
||||
im_shape = t['im_shape'][0][a].tolist() |
||||
image_h, image_w = int(im_shape[0]), int(im_shape[1]) |
||||
if is_bbox_normalized: |
||||
lmk_decode = lmk_decode * np.array([ |
||||
image_w, image_h, image_w, image_h, image_w, image_h, |
||||
image_w, image_h, image_w, image_h |
||||
]) |
||||
lmk_res = { |
||||
'image_id': im_id, |
||||
'landmark': lmk_decode, |
||||
'score': score, |
||||
} |
||||
xywh_res.append(lmk_res) |
||||
k += 1 |
||||
return xywh_res |
@ -0,0 +1,18 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import model_zoo |
||||
from .model_zoo import * |
||||
|
||||
__all__ = model_zoo.__all__ |
@ -0,0 +1,84 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os.path as osp |
||||
import pkg_resources |
||||
|
||||
try: |
||||
from collections.abc import Sequence |
||||
except: |
||||
from collections import Sequence |
||||
|
||||
from paddlers.models.ppdet.core.workspace import load_config, create |
||||
from paddlers.models.ppdet.utils.checkpoint import load_weight |
||||
from paddlers.models.ppdet.utils.download import get_config_path |
||||
|
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
__all__ = [ |
||||
'list_model', 'get_config_file', 'get_weights_url', 'get_model', |
||||
'MODEL_ZOO_FILENAME' |
||||
] |
||||
|
||||
MODEL_ZOO_FILENAME = 'MODEL_ZOO' |
||||
|
||||
|
||||
def list_model(filters=[]): |
||||
model_zoo_file = pkg_resources.resource_filename('ppdet.model_zoo', |
||||
MODEL_ZOO_FILENAME) |
||||
with open(model_zoo_file) as f: |
||||
model_names = f.read().splitlines() |
||||
|
||||
# filter model_name |
||||
def filt(name): |
||||
for f in filters: |
||||
if name.find(f) < 0: |
||||
return False |
||||
return True |
||||
|
||||
if isinstance(filters, str) or not isinstance(filters, Sequence): |
||||
filters = [filters] |
||||
model_names = [name for name in model_names if filt(name)] |
||||
if len(model_names) == 0 and len(filters) > 0: |
||||
raise ValueError("no model found, please check filters seeting, " |
||||
"filters can be set as following kinds:\n" |
||||
"\tDataset: coco, voc ...\n" |
||||
"\tArchitecture: yolo, rcnn, ssd ...\n" |
||||
"\tBackbone: resnet, vgg, darknet ...\n") |
||||
|
||||
model_str = "Available Models:\n" |
||||
for model_name in model_names: |
||||
model_str += "\t{}\n".format(model_name) |
||||
logger.info(model_str) |
||||
|
||||
|
||||
# models and configs save on bcebos under dygraph directory |
||||
def get_config_file(model_name): |
||||
return get_config_path("ppdet://configs/{}.yml".format(model_name)) |
||||
|
||||
|
||||
def get_weights_url(model_name): |
||||
return "ppdet://models/{}.pdparams".format(osp.split(model_name)[-1]) |
||||
|
||||
|
||||
def get_model(model_name, pretrained=True): |
||||
cfg_file = get_config_file(model_name) |
||||
cfg = load_config(cfg_file) |
||||
model = create(cfg.architecture) |
||||
|
||||
if pretrained: |
||||
load_weight(model, get_weights_url(model_name)) |
||||
|
||||
return model |
@ -0,0 +1,45 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import warnings |
||||
warnings.filterwarnings( |
||||
action='ignore', category=DeprecationWarning, module='ops') |
||||
|
||||
from . import ops |
||||
from . import backbones |
||||
from . import necks |
||||
from . import proposal_generator |
||||
from . import heads |
||||
from . import losses |
||||
from . import architectures |
||||
from . import post_process |
||||
from . import layers |
||||
from . import reid |
||||
from . import mot |
||||
from . import transformers |
||||
from . import assigners |
||||
|
||||
from .ops import * |
||||
from .backbones import * |
||||
from .necks import * |
||||
from .proposal_generator import * |
||||
from .heads import * |
||||
from .losses import * |
||||
from .architectures import * |
||||
from .post_process import * |
||||
from .layers import * |
||||
from .reid import * |
||||
from .mot import * |
||||
from .transformers import * |
||||
from .assigners import * |
@ -0,0 +1,51 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
from . import meta_arch |
||||
from . import faster_rcnn |
||||
from . import mask_rcnn |
||||
from . import yolo |
||||
from . import cascade_rcnn |
||||
from . import ssd |
||||
from . import fcos |
||||
from . import solov2 |
||||
from . import ttfnet |
||||
from . import s2anet |
||||
from . import keypoint_hrhrnet |
||||
from . import keypoint_hrnet |
||||
from . import jde |
||||
from . import deepsort |
||||
from . import fairmot |
||||
from . import centernet |
||||
from . import gfl |
||||
from . import picodet |
||||
from . import detr |
||||
from . import sparse_rcnn |
||||
from . import tood |
||||
|
||||
from .meta_arch import * |
||||
from .faster_rcnn import * |
||||
from .mask_rcnn import * |
||||
from .yolo import * |
||||
from .cascade_rcnn import * |
||||
from .ssd import * |
||||
from .fcos import * |
||||
from .solov2 import * |
||||
from .ttfnet import * |
||||
from .s2anet import * |
||||
from .keypoint_hrhrnet import * |
||||
from .keypoint_hrnet import * |
||||
from .jde import * |
||||
from .deepsort import * |
||||
from .fairmot import * |
||||
from .centernet import * |
||||
from .blazeface import * |
||||
from .gfl import * |
||||
from .picodet import * |
||||
from .detr import * |
||||
from .sparse_rcnn import * |
||||
from .tood import * |
@ -0,0 +1,91 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['BlazeFace'] |
||||
|
||||
|
||||
@register |
||||
class BlazeFace(BaseArch): |
||||
""" |
||||
BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs, |
||||
see https://arxiv.org/abs/1907.05047 |
||||
|
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
neck (nn.Layer): neck instance |
||||
blaze_head (nn.Layer): `blazeHead` instance |
||||
post_process (object): `BBoxPostProcess` instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
__inject__ = ['post_process'] |
||||
|
||||
def __init__(self, backbone, blaze_head, neck, post_process): |
||||
super(BlazeFace, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.blaze_head = blaze_head |
||||
self.post_process = post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
# fpn |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
# head |
||||
kwargs = {'input_shape': neck.out_shape} |
||||
blaze_head = create(cfg['blaze_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
'blaze_head': blaze_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
# Backbone |
||||
body_feats = self.backbone(self.inputs) |
||||
# neck |
||||
neck_feats = self.neck(body_feats) |
||||
# blaze Head |
||||
if self.training: |
||||
return self.blaze_head(neck_feats, self.inputs['image'], |
||||
self.inputs['gt_bbox'], |
||||
self.inputs['gt_class']) |
||||
else: |
||||
preds, anchors = self.blaze_head(neck_feats, self.inputs['image']) |
||||
bbox, bbox_num = self.post_process(preds, anchors, |
||||
self.inputs['im_shape'], |
||||
self.inputs['scale_factor']) |
||||
return bbox, bbox_num |
||||
|
||||
def get_loss(self, ): |
||||
return {"loss": self._forward()} |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = { |
||||
"bbox": bbox_pred, |
||||
"bbox_num": bbox_num, |
||||
} |
||||
return output |
@ -0,0 +1,144 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['CascadeRCNN'] |
||||
|
||||
|
||||
@register |
||||
class CascadeRCNN(BaseArch): |
||||
""" |
||||
Cascade R-CNN network, see https://arxiv.org/abs/1712.00726 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
rpn_head (object): `RPNHead` instance |
||||
bbox_head (object): `BBoxHead` instance |
||||
bbox_post_process (object): `BBoxPostProcess` instance |
||||
neck (object): 'FPN' instance |
||||
mask_head (object): `MaskHead` instance |
||||
mask_post_process (object): `MaskPostProcess` instance |
||||
""" |
||||
__category__ = 'architecture' |
||||
__inject__ = [ |
||||
'bbox_post_process', |
||||
'mask_post_process', |
||||
] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
rpn_head, |
||||
bbox_head, |
||||
bbox_post_process, |
||||
neck=None, |
||||
mask_head=None, |
||||
mask_post_process=None): |
||||
super(CascadeRCNN, self).__init__() |
||||
self.backbone = backbone |
||||
self.rpn_head = rpn_head |
||||
self.bbox_head = bbox_head |
||||
self.bbox_post_process = bbox_post_process |
||||
self.neck = neck |
||||
self.mask_head = mask_head |
||||
self.mask_post_process = mask_post_process |
||||
self.with_mask = mask_head is not None |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs) |
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
rpn_head = create(cfg['rpn_head'], **kwargs) |
||||
bbox_head = create(cfg['bbox_head'], **kwargs) |
||||
|
||||
out_shape = neck and out_shape or bbox_head.get_head().out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs) |
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"rpn_head": rpn_head, |
||||
"bbox_head": bbox_head, |
||||
"mask_head": mask_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
if self.neck is not None: |
||||
body_feats = self.neck(body_feats) |
||||
|
||||
if self.training: |
||||
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs) |
||||
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num, |
||||
self.inputs) |
||||
rois, rois_num = self.bbox_head.get_assigned_rois() |
||||
bbox_targets = self.bbox_head.get_assigned_targets() |
||||
if self.with_mask: |
||||
mask_loss = self.mask_head(body_feats, rois, rois_num, |
||||
self.inputs, bbox_targets, |
||||
bbox_feat) |
||||
return rpn_loss, bbox_loss, mask_loss |
||||
else: |
||||
return rpn_loss, bbox_loss, {} |
||||
else: |
||||
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs) |
||||
preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs) |
||||
refined_rois = self.bbox_head.get_refined_rois() |
||||
|
||||
im_shape = self.inputs['im_shape'] |
||||
scale_factor = self.inputs['scale_factor'] |
||||
|
||||
bbox, bbox_num = self.bbox_post_process( |
||||
preds, (refined_rois, rois_num), im_shape, scale_factor) |
||||
# rescale the prediction back to origin image |
||||
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num, |
||||
im_shape, scale_factor) |
||||
if not self.with_mask: |
||||
return bbox_pred, bbox_num, None |
||||
mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs) |
||||
origin_shape = self.bbox_post_process.get_origin_shape() |
||||
mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred, |
||||
bbox_num, origin_shape) |
||||
return bbox_pred, bbox_num, mask_pred |
||||
|
||||
def get_loss(self, ): |
||||
rpn_loss, bbox_loss, mask_loss = self._forward() |
||||
loss = {} |
||||
loss.update(rpn_loss) |
||||
loss.update(bbox_loss) |
||||
if self.with_mask: |
||||
loss.update(mask_loss) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num, mask_pred = self._forward() |
||||
output = { |
||||
'bbox': bbox_pred, |
||||
'bbox_num': bbox_num, |
||||
} |
||||
if self.with_mask: |
||||
output.update({'mask': mask_pred}) |
||||
return output |
@ -0,0 +1,108 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['CenterNet'] |
||||
|
||||
|
||||
@register |
||||
class CenterNet(BaseArch): |
||||
""" |
||||
CenterNet network, see http://arxiv.org/abs/1904.07850 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
neck (object): FPN instance, default use 'CenterNetDLAFPN' |
||||
head (object): 'CenterNetHead' instance |
||||
post_process (object): 'CenterNetPostProcess' instance |
||||
for_mot (bool): whether return other features used in tracking model |
||||
|
||||
""" |
||||
__category__ = 'architecture' |
||||
__inject__ = ['post_process'] |
||||
__shared__ = ['for_mot'] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
neck='CenterNetDLAFPN', |
||||
head='CenterNetHead', |
||||
post_process='CenterNetPostProcess', |
||||
for_mot=False): |
||||
super(CenterNet, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
self.post_process = post_process |
||||
self.for_mot = for_mot |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs) |
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return {'backbone': backbone, 'neck': neck, "head": head} |
||||
|
||||
def _forward(self): |
||||
neck_feat = self.backbone(self.inputs) |
||||
if self.neck is not None: |
||||
neck_feat = self.neck(neck_feat) |
||||
head_out = self.head(neck_feat, self.inputs) |
||||
if self.for_mot: |
||||
head_out.update({'neck_feat': neck_feat}) |
||||
elif self.training: |
||||
head_out['loss'] = head_out.pop('det_loss') |
||||
return head_out |
||||
|
||||
def get_pred(self): |
||||
head_out = self._forward() |
||||
if self.for_mot: |
||||
bbox, bbox_inds, topk_clses = self.post_process( |
||||
head_out['heatmap'], |
||||
head_out['size'], |
||||
head_out['offset'], |
||||
im_shape=self.inputs['im_shape'], |
||||
scale_factor=self.inputs['scale_factor']) |
||||
output = { |
||||
"bbox": bbox, |
||||
"bbox_inds": bbox_inds, |
||||
"topk_clses": topk_clses, |
||||
"neck_feat": head_out['neck_feat'] |
||||
} |
||||
else: |
||||
bbox, bbox_num, _ = self.post_process( |
||||
head_out['heatmap'], |
||||
head_out['size'], |
||||
head_out['offset'], |
||||
im_shape=self.inputs['im_shape'], |
||||
scale_factor=self.inputs['scale_factor']) |
||||
output = { |
||||
"bbox": bbox, |
||||
"bbox_num": bbox_num, |
||||
} |
||||
return output |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
@ -0,0 +1,69 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box |
||||
|
||||
__all__ = ['DeepSORT'] |
||||
|
||||
|
||||
@register |
||||
class DeepSORT(BaseArch): |
||||
""" |
||||
DeepSORT network, see https://arxiv.org/abs/1703.07402 |
||||
|
||||
Args: |
||||
detector (object): detector model instance |
||||
reid (object): reid model instance |
||||
tracker (object): tracker instance |
||||
""" |
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, |
||||
detector='YOLOv3', |
||||
reid='PCBPyramid', |
||||
tracker='DeepSORTTracker'): |
||||
super(DeepSORT, self).__init__() |
||||
self.detector = detector |
||||
self.reid = reid |
||||
self.tracker = tracker |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
if cfg['detector'] != 'None': |
||||
detector = create(cfg['detector']) |
||||
else: |
||||
detector = None |
||||
reid = create(cfg['reid']) |
||||
tracker = create(cfg['tracker']) |
||||
|
||||
return { |
||||
"detector": detector, |
||||
"reid": reid, |
||||
"tracker": tracker, |
||||
} |
||||
|
||||
def _forward(self): |
||||
crops = self.inputs['crops'] |
||||
features = self.reid(crops) |
||||
return features |
||||
|
||||
def get_pred(self): |
||||
return self._forward() |
@ -0,0 +1,93 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from .meta_arch import BaseArch |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
|
||||
__all__ = ['DETR'] |
||||
|
||||
|
||||
@register |
||||
class DETR(BaseArch): |
||||
__category__ = 'architecture' |
||||
__inject__ = ['post_process'] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
transformer, |
||||
detr_head, |
||||
post_process='DETRBBoxPostProcess'): |
||||
super(DETR, self).__init__() |
||||
self.backbone = backbone |
||||
self.transformer = transformer |
||||
self.detr_head = detr_head |
||||
self.post_process = post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
# transformer |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
transformer = create(cfg['transformer'], **kwargs) |
||||
# head |
||||
kwargs = { |
||||
'hidden_dim': transformer.hidden_dim, |
||||
'nhead': transformer.nhead, |
||||
'input_shape': backbone.out_shape |
||||
} |
||||
detr_head = create(cfg['detr_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'transformer': transformer, |
||||
"detr_head": detr_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
# Backbone |
||||
body_feats = self.backbone(self.inputs) |
||||
|
||||
# Transformer |
||||
out_transformer = self.transformer(body_feats, self.inputs['pad_mask']) |
||||
|
||||
# DETR Head |
||||
if self.training: |
||||
return self.detr_head(out_transformer, body_feats, self.inputs) |
||||
else: |
||||
preds = self.detr_head(out_transformer, body_feats) |
||||
bbox, bbox_num = self.post_process(preds, self.inputs['im_shape'], |
||||
self.inputs['scale_factor']) |
||||
return bbox, bbox_num |
||||
|
||||
def get_loss(self, ): |
||||
losses = self._forward() |
||||
losses.update({ |
||||
'loss': |
||||
paddle.add_n([v for k, v in losses.items() if 'log' not in k]) |
||||
}) |
||||
return losses |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = { |
||||
"bbox": bbox_pred, |
||||
"bbox_num": bbox_num, |
||||
} |
||||
return output |
@ -0,0 +1,100 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['FairMOT'] |
||||
|
||||
|
||||
@register |
||||
class FairMOT(BaseArch): |
||||
""" |
||||
FairMOT network, see http://arxiv.org/abs/2004.01888 |
||||
|
||||
Args: |
||||
detector (object): 'CenterNet' instance |
||||
reid (object): 'FairMOTEmbeddingHead' instance |
||||
tracker (object): 'JDETracker' instance |
||||
loss (object): 'FairMOTLoss' instance |
||||
|
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
__inject__ = ['loss'] |
||||
|
||||
def __init__(self, |
||||
detector='CenterNet', |
||||
reid='FairMOTEmbeddingHead', |
||||
tracker='JDETracker', |
||||
loss='FairMOTLoss'): |
||||
super(FairMOT, self).__init__() |
||||
self.detector = detector |
||||
self.reid = reid |
||||
self.tracker = tracker |
||||
self.loss = loss |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
detector = create(cfg['detector']) |
||||
detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape |
||||
|
||||
kwargs = {'input_shape': detector_out_shape} |
||||
reid = create(cfg['reid'], **kwargs) |
||||
loss = create(cfg['loss']) |
||||
tracker = create(cfg['tracker']) |
||||
|
||||
return { |
||||
'detector': detector, |
||||
'reid': reid, |
||||
'loss': loss, |
||||
'tracker': tracker |
||||
} |
||||
|
||||
def _forward(self): |
||||
loss = dict() |
||||
# det_outs keys: |
||||
# train: neck_feat, det_loss, heatmap_loss, size_loss, offset_loss (optional: iou_loss) |
||||
# eval/infer: neck_feat, bbox, bbox_inds |
||||
det_outs = self.detector(self.inputs) |
||||
neck_feat = det_outs['neck_feat'] |
||||
if self.training: |
||||
reid_loss = self.reid(neck_feat, self.inputs) |
||||
|
||||
det_loss = det_outs['det_loss'] |
||||
loss = self.loss(det_loss, reid_loss) |
||||
for k, v in det_outs.items(): |
||||
if 'loss' not in k: |
||||
continue |
||||
loss.update({k: v}) |
||||
loss.update({'reid_loss': reid_loss}) |
||||
return loss |
||||
else: |
||||
pred_dets, pred_embs = self.reid( |
||||
neck_feat, self.inputs, det_outs['bbox'], |
||||
det_outs['bbox_inds'], det_outs['topk_clses']) |
||||
return pred_dets, pred_embs |
||||
|
||||
def get_pred(self): |
||||
output = self._forward() |
||||
return output |
||||
|
||||
def get_loss(self): |
||||
loss = self._forward() |
||||
return loss |
@ -0,0 +1,106 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['FasterRCNN'] |
||||
|
||||
|
||||
@register |
||||
class FasterRCNN(BaseArch): |
||||
""" |
||||
Faster R-CNN network, see https://arxiv.org/abs/1506.01497 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
rpn_head (object): `RPNHead` instance |
||||
bbox_head (object): `BBoxHead` instance |
||||
bbox_post_process (object): `BBoxPostProcess` instance |
||||
neck (object): 'FPN' instance |
||||
""" |
||||
__category__ = 'architecture' |
||||
__inject__ = ['bbox_post_process'] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
rpn_head, |
||||
bbox_head, |
||||
bbox_post_process, |
||||
neck=None): |
||||
super(FasterRCNN, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.rpn_head = rpn_head |
||||
self.bbox_head = bbox_head |
||||
self.bbox_post_process = bbox_post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs) |
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
rpn_head = create(cfg['rpn_head'], **kwargs) |
||||
bbox_head = create(cfg['bbox_head'], **kwargs) |
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"rpn_head": rpn_head, |
||||
"bbox_head": bbox_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
if self.neck is not None: |
||||
body_feats = self.neck(body_feats) |
||||
if self.training: |
||||
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs) |
||||
bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num, |
||||
self.inputs) |
||||
return rpn_loss, bbox_loss |
||||
else: |
||||
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs) |
||||
preds, _ = self.bbox_head(body_feats, rois, rois_num, None) |
||||
|
||||
im_shape = self.inputs['im_shape'] |
||||
scale_factor = self.inputs['scale_factor'] |
||||
bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num), |
||||
im_shape, scale_factor) |
||||
|
||||
# rescale the prediction back to origin image |
||||
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num, |
||||
im_shape, scale_factor) |
||||
return bbox_pred, bbox_num |
||||
|
||||
def get_loss(self, ): |
||||
rpn_loss, bbox_loss = self._forward() |
||||
loss = {} |
||||
loss.update(rpn_loss) |
||||
loss.update(bbox_loss) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num} |
||||
return output |
@ -0,0 +1,105 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['FCOS'] |
||||
|
||||
|
||||
@register |
||||
class FCOS(BaseArch): |
||||
""" |
||||
FCOS network, see https://arxiv.org/abs/1904.01355 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
neck (object): 'FPN' instance |
||||
fcos_head (object): 'FCOSHead' instance |
||||
post_process (object): 'FCOSPostProcess' instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
__inject__ = ['fcos_post_process'] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
neck, |
||||
fcos_head='FCOSHead', |
||||
fcos_post_process='FCOSPostProcess'): |
||||
super(FCOS, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.fcos_head = fcos_head |
||||
self.fcos_post_process = fcos_post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
fcos_head = create(cfg['fcos_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"fcos_head": fcos_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
fpn_feats = self.neck(body_feats) |
||||
fcos_head_outs = self.fcos_head(fpn_feats, self.training) |
||||
if not self.training: |
||||
scale_factor = self.inputs['scale_factor'] |
||||
bboxes = self.fcos_post_process(fcos_head_outs, scale_factor) |
||||
return bboxes |
||||
else: |
||||
return fcos_head_outs |
||||
|
||||
def get_loss(self, ): |
||||
loss = {} |
||||
tag_labels, tag_bboxes, tag_centerness = [], [], [] |
||||
for i in range(len(self.fcos_head.fpn_stride)): |
||||
# labels, reg_target, centerness |
||||
k_lbl = 'labels{}'.format(i) |
||||
if k_lbl in self.inputs: |
||||
tag_labels.append(self.inputs[k_lbl]) |
||||
k_box = 'reg_target{}'.format(i) |
||||
if k_box in self.inputs: |
||||
tag_bboxes.append(self.inputs[k_box]) |
||||
k_ctn = 'centerness{}'.format(i) |
||||
if k_ctn in self.inputs: |
||||
tag_centerness.append(self.inputs[k_ctn]) |
||||
|
||||
fcos_head_outs = self._forward() |
||||
loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels, |
||||
tag_bboxes, tag_centerness) |
||||
loss.update(loss_fcos) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num} |
||||
return output |
@ -0,0 +1,87 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['GFL'] |
||||
|
||||
|
||||
@register |
||||
class GFL(BaseArch): |
||||
""" |
||||
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
neck (object): 'FPN' instance |
||||
head (object): 'GFLHead' instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, backbone, neck, head='GFLHead'): |
||||
super(GFL, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"head": head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
fpn_feats = self.neck(body_feats) |
||||
head_outs = self.head(fpn_feats) |
||||
if not self.training: |
||||
im_shape = self.inputs['im_shape'] |
||||
scale_factor = self.inputs['scale_factor'] |
||||
bboxes, bbox_num = self.head.post_process(head_outs, im_shape, |
||||
scale_factor) |
||||
return bboxes, bbox_num |
||||
else: |
||||
return head_outs |
||||
|
||||
def get_loss(self, ): |
||||
loss = {} |
||||
|
||||
head_outs = self._forward() |
||||
loss_gfl = self.head.get_loss(head_outs, self.inputs) |
||||
loss.update(loss_gfl) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num} |
||||
return output |
@ -0,0 +1,111 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['JDE'] |
||||
|
||||
|
||||
@register |
||||
class JDE(BaseArch): |
||||
__category__ = 'architecture' |
||||
__shared__ = ['metric'] |
||||
""" |
||||
JDE network, see https://arxiv.org/abs/1909.12605v1 |
||||
|
||||
Args: |
||||
detector (object): detector model instance |
||||
reid (object): reid model instance |
||||
tracker (object): tracker instance |
||||
metric (str): 'MOTDet' for training and detection evaluation, 'ReID' |
||||
for ReID embedding evaluation, or 'MOT' for multi object tracking |
||||
evaluation. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
detector='YOLOv3', |
||||
reid='JDEEmbeddingHead', |
||||
tracker='JDETracker', |
||||
metric='MOT'): |
||||
super(JDE, self).__init__() |
||||
self.detector = detector |
||||
self.reid = reid |
||||
self.tracker = tracker |
||||
self.metric = metric |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
detector = create(cfg['detector']) |
||||
kwargs = {'input_shape': detector.neck.out_shape} |
||||
|
||||
reid = create(cfg['reid'], **kwargs) |
||||
|
||||
tracker = create(cfg['tracker']) |
||||
|
||||
return { |
||||
"detector": detector, |
||||
"reid": reid, |
||||
"tracker": tracker, |
||||
} |
||||
|
||||
def _forward(self): |
||||
det_outs = self.detector(self.inputs) |
||||
|
||||
if self.training: |
||||
emb_feats = det_outs['emb_feats'] |
||||
loss_confs = det_outs['det_losses']['loss_confs'] |
||||
loss_boxes = det_outs['det_losses']['loss_boxes'] |
||||
jde_losses = self.reid( |
||||
emb_feats, |
||||
self.inputs, |
||||
loss_confs=loss_confs, |
||||
loss_boxes=loss_boxes) |
||||
return jde_losses |
||||
else: |
||||
if self.metric == 'MOTDet': |
||||
det_results = { |
||||
'bbox': det_outs['bbox'], |
||||
'bbox_num': det_outs['bbox_num'], |
||||
} |
||||
return det_results |
||||
|
||||
elif self.metric == 'MOT': |
||||
emb_feats = det_outs['emb_feats'] |
||||
bboxes = det_outs['bbox'] |
||||
boxes_idx = det_outs['boxes_idx'] |
||||
nms_keep_idx = det_outs['nms_keep_idx'] |
||||
|
||||
pred_dets, pred_embs = self.reid( |
||||
emb_feats, |
||||
self.inputs, |
||||
bboxes=bboxes, |
||||
boxes_idx=boxes_idx, |
||||
nms_keep_idx=nms_keep_idx) |
||||
return pred_dets, pred_embs |
||||
|
||||
else: |
||||
raise ValueError( |
||||
"Unknown metric {} for multi object tracking.".format( |
||||
self.metric)) |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
return self._forward() |
@ -0,0 +1,287 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from scipy.optimize import linear_sum_assignment |
||||
from collections import abc, defaultdict |
||||
import numpy as np |
||||
import paddle |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create, serializable |
||||
from .meta_arch import BaseArch |
||||
from .. import layers as L |
||||
from ..keypoint_utils import transpred |
||||
|
||||
__all__ = ['HigherHRNet'] |
||||
|
||||
|
||||
@register |
||||
class HigherHRNet(BaseArch): |
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, |
||||
backbone='HRNet', |
||||
hrhrnet_head='HrHRNetHead', |
||||
post_process='HrHRNetPostProcess', |
||||
eval_flip=True, |
||||
flip_perm=None, |
||||
max_num_people=30): |
||||
""" |
||||
HigherHRNet network, see https://arxiv.org/abs/1908.10357; |
||||
HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175 |
||||
|
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
hrhrnet_head (nn.Layer): keypoint_head instance |
||||
bbox_post_process (object): `BBoxPostProcess` instance |
||||
""" |
||||
super(HigherHRNet, self).__init__() |
||||
self.backbone = backbone |
||||
self.hrhrnet_head = hrhrnet_head |
||||
self.post_process = post_process |
||||
self.flip = eval_flip |
||||
self.flip_perm = paddle.to_tensor(flip_perm) |
||||
self.deploy = False |
||||
self.interpolate = L.Upsample(2, mode='bilinear') |
||||
self.pool = L.MaxPool(5, 1, 2) |
||||
self.max_num_people = max_num_people |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
# head |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs) |
||||
post_process = create(cfg['post_process']) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
"hrhrnet_head": hrhrnet_head, |
||||
"post_process": post_process, |
||||
} |
||||
|
||||
def _forward(self): |
||||
if self.flip and not self.training and not self.deploy: |
||||
self.inputs['image'] = paddle.concat( |
||||
(self.inputs['image'], paddle.flip(self.inputs['image'], [3]))) |
||||
body_feats = self.backbone(self.inputs) |
||||
|
||||
if self.training: |
||||
return self.hrhrnet_head(body_feats, self.inputs) |
||||
else: |
||||
outputs = self.hrhrnet_head(body_feats) |
||||
|
||||
if self.flip and not self.deploy: |
||||
outputs = [paddle.split(o, 2) for o in outputs] |
||||
output_rflip = [ |
||||
paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3]) |
||||
for o in outputs |
||||
] |
||||
output1 = [o[0] for o in outputs] |
||||
heatmap = (output1[0] + output_rflip[0]) / 2. |
||||
tagmaps = [output1[1], output_rflip[1]] |
||||
outputs = [heatmap] + tagmaps |
||||
outputs = self.get_topk(outputs) |
||||
|
||||
if self.deploy: |
||||
return outputs |
||||
|
||||
res_lst = [] |
||||
h = self.inputs['im_shape'][0, 0].numpy().item() |
||||
w = self.inputs['im_shape'][0, 1].numpy().item() |
||||
kpts, scores = self.post_process(*outputs, h, w) |
||||
res_lst.append([kpts, scores]) |
||||
return res_lst |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
outputs = {} |
||||
res_lst = self._forward() |
||||
outputs['keypoint'] = res_lst |
||||
return outputs |
||||
|
||||
def get_topk(self, outputs): |
||||
# resize to image size |
||||
outputs = [self.interpolate(x) for x in outputs] |
||||
if len(outputs) == 3: |
||||
tagmap = paddle.concat( |
||||
(outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4) |
||||
else: |
||||
tagmap = outputs[1].unsqueeze(4) |
||||
|
||||
heatmap = outputs[0] |
||||
N, J = 1, self.hrhrnet_head.num_joints |
||||
heatmap_maxpool = self.pool(heatmap) |
||||
# topk |
||||
maxmap = heatmap * (heatmap == heatmap_maxpool) |
||||
maxmap = maxmap.reshape([N, J, -1]) |
||||
heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2) |
||||
|
||||
outputs = [heatmap, tagmap, heat_k, inds_k] |
||||
return outputs |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class HrHRNetPostProcess(object): |
||||
''' |
||||
HrHRNet postprocess contain: |
||||
1) get topk keypoints in the output heatmap |
||||
2) sample the tagmap's value corresponding to each of the topk coordinate |
||||
3) match different joints to combine to some people with Hungary algorithm |
||||
4) adjust the coordinate by +-0.25 to decrease error std |
||||
5) salvage missing joints by check positivity of heatmap - tagdiff_norm |
||||
Args: |
||||
max_num_people (int): max number of people support in postprocess |
||||
heat_thresh (float): value of topk below this threshhold will be ignored |
||||
tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init |
||||
|
||||
inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk |
||||
original_height, original_width (float): the original image size |
||||
''' |
||||
|
||||
def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.): |
||||
self.max_num_people = max_num_people |
||||
self.heat_thresh = heat_thresh |
||||
self.tag_thresh = tag_thresh |
||||
|
||||
def lerp(self, j, y, x, heatmap): |
||||
H, W = heatmap.shape[-2:] |
||||
left = np.clip(x - 1, 0, W - 1) |
||||
right = np.clip(x + 1, 0, W - 1) |
||||
up = np.clip(y - 1, 0, H - 1) |
||||
down = np.clip(y + 1, 0, H - 1) |
||||
offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25, |
||||
-0.25) |
||||
offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25, |
||||
-0.25) |
||||
return offset_y + 0.5, offset_x + 0.5 |
||||
|
||||
def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height, |
||||
original_width): |
||||
|
||||
N, J, H, W = heatmap.shape |
||||
assert N == 1, "only support batch size 1" |
||||
heatmap = heatmap[0].cpu().detach().numpy() |
||||
tagmap = tagmap[0].cpu().detach().numpy() |
||||
heats = heat_k[0].cpu().detach().numpy() |
||||
inds_np = inds_k[0].cpu().detach().numpy() |
||||
y = inds_np // W |
||||
x = inds_np % W |
||||
tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people), y. |
||||
flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1]) |
||||
coords = np.stack((y, x), axis=2) |
||||
# threshold |
||||
mask = heats > self.heat_thresh |
||||
# cluster |
||||
cluster = defaultdict(lambda: { |
||||
'coords': np.zeros((J, 2), dtype=np.float32), |
||||
'scores': np.zeros(J, dtype=np.float32), |
||||
'tags': [] |
||||
}) |
||||
for jid, m in enumerate(mask): |
||||
num_valid = m.sum() |
||||
if num_valid == 0: |
||||
continue |
||||
valid_inds = np.where(m)[0] |
||||
valid_tags = tags[jid, m, :] |
||||
if len(cluster) == 0: # initialize |
||||
for i in valid_inds: |
||||
tag = tags[jid, i] |
||||
key = tag[0] |
||||
cluster[key]['tags'].append(tag) |
||||
cluster[key]['scores'][jid] = heats[jid, i] |
||||
cluster[key]['coords'][jid] = coords[jid, i] |
||||
continue |
||||
candidates = list(cluster.keys())[:self.max_num_people] |
||||
centroids = [ |
||||
np.mean( |
||||
cluster[k]['tags'], axis=0) for k in candidates |
||||
] |
||||
num_clusters = len(centroids) |
||||
# shape is (num_valid, num_clusters, tag_dim) |
||||
dist = valid_tags[:, None, :] - np.array(centroids)[None, ...] |
||||
l2_dist = np.linalg.norm(dist, ord=2, axis=2) |
||||
# modulate dist with heat value, see `use_detection_val` |
||||
cost = np.round(l2_dist) * 100 - heats[jid, m, None] |
||||
# pad the cost matrix, otherwise new pose are ignored |
||||
if num_valid > num_clusters: |
||||
cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)), |
||||
'constant', |
||||
constant_values=((0, 0), (0, 1e-10))) |
||||
rows, cols = linear_sum_assignment(cost) |
||||
for y, x in zip(rows, cols): |
||||
tag = tags[jid, y] |
||||
if y < num_valid and x < num_clusters and \ |
||||
l2_dist[y, x] < self.tag_thresh: |
||||
key = candidates[x] # merge to cluster |
||||
else: |
||||
key = tag[0] # initialize new cluster |
||||
cluster[key]['tags'].append(tag) |
||||
cluster[key]['scores'][jid] = heats[jid, y] |
||||
cluster[key]['coords'][jid] = coords[jid, y] |
||||
|
||||
# shape is [k, J, 2] and [k, J] |
||||
pose_tags = np.array([cluster[k]['tags'] for k in cluster]) |
||||
pose_coords = np.array([cluster[k]['coords'] for k in cluster]) |
||||
pose_scores = np.array([cluster[k]['scores'] for k in cluster]) |
||||
valid = pose_scores > 0 |
||||
|
||||
pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32) |
||||
if valid.sum() == 0: |
||||
return pose_kpts, pose_kpts |
||||
|
||||
# refine coords |
||||
valid_coords = pose_coords[valid].astype(np.int32) |
||||
y = valid_coords[..., 0].flatten() |
||||
x = valid_coords[..., 1].flatten() |
||||
_, j = np.nonzero(valid) |
||||
offsets = self.lerp(j, y, x, heatmap) |
||||
pose_coords[valid, 0] += offsets[0] |
||||
pose_coords[valid, 1] += offsets[1] |
||||
|
||||
# mean score before salvage |
||||
mean_score = pose_scores.mean(axis=1) |
||||
pose_kpts[valid, 2] = pose_scores[valid] |
||||
|
||||
# salvage missing joints |
||||
if True: |
||||
for pid, coords in enumerate(pose_coords): |
||||
tag_mean = np.array(pose_tags[pid]).mean(axis=0) |
||||
norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5 |
||||
score = heatmap - np.round(norm) # (J, H, W) |
||||
flat_score = score.reshape(J, -1) |
||||
max_inds = np.argmax(flat_score, axis=1) |
||||
max_scores = np.max(flat_score, axis=1) |
||||
salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0) |
||||
if salvage_joints.sum() == 0: |
||||
continue |
||||
y = max_inds[salvage_joints] // W |
||||
x = max_inds[salvage_joints] % W |
||||
offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap) |
||||
y = y.astype(np.float32) + offsets[0] |
||||
x = x.astype(np.float32) + offsets[1] |
||||
pose_coords[pid][salvage_joints, 0] = y |
||||
pose_coords[pid][salvage_joints, 1] = x |
||||
pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints] |
||||
pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1], |
||||
original_height, original_width, |
||||
min(H, W)) |
||||
return pose_kpts, mean_score |
@ -0,0 +1,267 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import numpy as np |
||||
import math |
||||
import cv2 |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
from ..keypoint_utils import transform_preds |
||||
from .. import layers as L |
||||
|
||||
__all__ = ['TopDownHRNet'] |
||||
|
||||
|
||||
@register |
||||
class TopDownHRNet(BaseArch): |
||||
__category__ = 'architecture' |
||||
__inject__ = ['loss'] |
||||
|
||||
def __init__(self, |
||||
width, |
||||
num_joints, |
||||
backbone='HRNet', |
||||
loss='KeyPointMSELoss', |
||||
post_process='HRNetPostProcess', |
||||
flip_perm=None, |
||||
flip=True, |
||||
shift_heatmap=True, |
||||
use_dark=True): |
||||
""" |
||||
HRNet network, see https://arxiv.org/abs/1902.09212 |
||||
|
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
post_process (object): `HRNetPostProcess` instance |
||||
flip_perm (list): The left-right joints exchange order list |
||||
use_dark(bool): Whether to use DARK in post processing |
||||
""" |
||||
super(TopDownHRNet, self).__init__() |
||||
self.backbone = backbone |
||||
self.post_process = HRNetPostProcess(use_dark) |
||||
self.loss = loss |
||||
self.flip_perm = flip_perm |
||||
self.flip = flip |
||||
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True) |
||||
self.shift_heatmap = shift_heatmap |
||||
self.deploy = False |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
return {'backbone': backbone, } |
||||
|
||||
def _forward(self): |
||||
feats = self.backbone(self.inputs) |
||||
hrnet_outputs = self.final_conv(feats[0]) |
||||
|
||||
if self.training: |
||||
return self.loss(hrnet_outputs, self.inputs) |
||||
elif self.deploy: |
||||
outshape = hrnet_outputs.shape |
||||
max_idx = paddle.argmax( |
||||
hrnet_outputs.reshape( |
||||
(outshape[0], outshape[1], outshape[2] * outshape[3])), |
||||
axis=-1) |
||||
return hrnet_outputs, max_idx |
||||
else: |
||||
if self.flip: |
||||
self.inputs['image'] = self.inputs['image'].flip([3]) |
||||
feats = self.backbone(self.inputs) |
||||
output_flipped = self.final_conv(feats[0]) |
||||
output_flipped = self.flip_back(output_flipped.numpy(), |
||||
self.flip_perm) |
||||
output_flipped = paddle.to_tensor(output_flipped.copy()) |
||||
if self.shift_heatmap: |
||||
output_flipped[:, :, :, 1:] = output_flipped.clone( |
||||
)[:, :, :, 0:-1] |
||||
hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5 |
||||
imshape = (self.inputs['im_shape'].numpy() |
||||
)[:, ::-1] if 'im_shape' in self.inputs else None |
||||
center = self.inputs['center'].numpy( |
||||
) if 'center' in self.inputs else np.round(imshape / 2.) |
||||
scale = self.inputs['scale'].numpy( |
||||
) if 'scale' in self.inputs else imshape / 200. |
||||
outputs = self.post_process(hrnet_outputs, center, scale) |
||||
return outputs |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
res_lst = self._forward() |
||||
outputs = {'keypoint': res_lst} |
||||
return outputs |
||||
|
||||
def flip_back(self, output_flipped, matched_parts): |
||||
assert output_flipped.ndim == 4,\ |
||||
'output_flipped should be [batch_size, num_joints, height, width]' |
||||
|
||||
output_flipped = output_flipped[:, :, :, ::-1] |
||||
|
||||
for pair in matched_parts: |
||||
tmp = output_flipped[:, pair[0], :, :].copy() |
||||
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] |
||||
output_flipped[:, pair[1], :, :] = tmp |
||||
|
||||
return output_flipped |
||||
|
||||
|
||||
class HRNetPostProcess(object): |
||||
def __init__(self, use_dark=True): |
||||
self.use_dark = use_dark |
||||
|
||||
def get_max_preds(self, heatmaps): |
||||
'''get predictions from score maps |
||||
|
||||
Args: |
||||
heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) |
||||
|
||||
Returns: |
||||
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords |
||||
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints |
||||
''' |
||||
assert isinstance(heatmaps, |
||||
np.ndarray), 'heatmaps should be numpy.ndarray' |
||||
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim' |
||||
|
||||
batch_size = heatmaps.shape[0] |
||||
num_joints = heatmaps.shape[1] |
||||
width = heatmaps.shape[3] |
||||
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1)) |
||||
idx = np.argmax(heatmaps_reshaped, 2) |
||||
maxvals = np.amax(heatmaps_reshaped, 2) |
||||
|
||||
maxvals = maxvals.reshape((batch_size, num_joints, 1)) |
||||
idx = idx.reshape((batch_size, num_joints, 1)) |
||||
|
||||
preds = np.tile(idx, (1, 1, 2)).astype(np.float32) |
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0]) % width |
||||
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) |
||||
|
||||
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) |
||||
pred_mask = pred_mask.astype(np.float32) |
||||
|
||||
preds *= pred_mask |
||||
|
||||
return preds, maxvals |
||||
|
||||
def gaussian_blur(self, heatmap, kernel): |
||||
border = (kernel - 1) // 2 |
||||
batch_size = heatmap.shape[0] |
||||
num_joints = heatmap.shape[1] |
||||
height = heatmap.shape[2] |
||||
width = heatmap.shape[3] |
||||
for i in range(batch_size): |
||||
for j in range(num_joints): |
||||
origin_max = np.max(heatmap[i, j]) |
||||
dr = np.zeros((height + 2 * border, width + 2 * border)) |
||||
dr[border:-border, border:-border] = heatmap[i, j].copy() |
||||
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0) |
||||
heatmap[i, j] = dr[border:-border, border:-border].copy() |
||||
heatmap[i, j] *= origin_max / np.max(heatmap[i, j]) |
||||
return heatmap |
||||
|
||||
def dark_parse(self, hm, coord): |
||||
heatmap_height = hm.shape[0] |
||||
heatmap_width = hm.shape[1] |
||||
px = int(coord[0]) |
||||
py = int(coord[1]) |
||||
if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2: |
||||
dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1]) |
||||
dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px]) |
||||
dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2]) |
||||
dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \ |
||||
+ hm[py-1][px-1]) |
||||
dyy = 0.25 * ( |
||||
hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px]) |
||||
derivative = np.matrix([[dx], [dy]]) |
||||
hessian = np.matrix([[dxx, dxy], [dxy, dyy]]) |
||||
if dxx * dyy - dxy**2 != 0: |
||||
hessianinv = hessian.I |
||||
offset = -hessianinv * derivative |
||||
offset = np.squeeze(np.array(offset.T), axis=0) |
||||
coord += offset |
||||
return coord |
||||
|
||||
def dark_postprocess(self, hm, coords, kernelsize): |
||||
'''DARK postpocessing, Zhang et al. Distribution-Aware Coordinate |
||||
Representation for Human Pose Estimation (CVPR 2020). |
||||
''' |
||||
|
||||
hm = self.gaussian_blur(hm, kernelsize) |
||||
hm = np.maximum(hm, 1e-10) |
||||
hm = np.log(hm) |
||||
for n in range(coords.shape[0]): |
||||
for p in range(coords.shape[1]): |
||||
coords[n, p] = self.dark_parse(hm[n][p], coords[n][p]) |
||||
return coords |
||||
|
||||
def get_final_preds(self, heatmaps, center, scale, kernelsize=3): |
||||
"""the highest heatvalue location with a quarter offset in the |
||||
direction from the highest response to the second highest response. |
||||
|
||||
Args: |
||||
heatmaps (numpy.ndarray): The predicted heatmaps |
||||
center (numpy.ndarray): The boxes center |
||||
scale (numpy.ndarray): The scale factor |
||||
|
||||
Returns: |
||||
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords |
||||
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints |
||||
""" |
||||
coords, maxvals = self.get_max_preds(heatmaps) |
||||
|
||||
heatmap_height = heatmaps.shape[2] |
||||
heatmap_width = heatmaps.shape[3] |
||||
|
||||
if self.use_dark: |
||||
coords = self.dark_postprocess(heatmaps, coords, kernelsize) |
||||
else: |
||||
for n in range(coords.shape[0]): |
||||
for p in range(coords.shape[1]): |
||||
hm = heatmaps[n][p] |
||||
px = int(math.floor(coords[n][p][0] + 0.5)) |
||||
py = int(math.floor(coords[n][p][1] + 0.5)) |
||||
if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: |
||||
diff = np.array([ |
||||
hm[py][px + 1] - hm[py][px - 1], |
||||
hm[py + 1][px] - hm[py - 1][px] |
||||
]) |
||||
coords[n][p] += np.sign(diff) * .25 |
||||
preds = coords.copy() |
||||
|
||||
# Transform back |
||||
for i in range(coords.shape[0]): |
||||
preds[i] = transform_preds(coords[i], center[i], scale[i], |
||||
[heatmap_width, heatmap_height]) |
||||
|
||||
return preds, maxvals |
||||
|
||||
def __call__(self, output, center, scale): |
||||
preds, maxvals = self.get_final_preds(output.numpy(), center, scale) |
||||
outputs = [[ |
||||
np.concatenate( |
||||
(preds, maxvals), axis=-1), np.mean( |
||||
maxvals, axis=1) |
||||
]] |
||||
return outputs |
@ -0,0 +1,135 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['MaskRCNN'] |
||||
|
||||
|
||||
@register |
||||
class MaskRCNN(BaseArch): |
||||
""" |
||||
Mask R-CNN network, see https://arxiv.org/abs/1703.06870 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
rpn_head (object): `RPNHead` instance |
||||
bbox_head (object): `BBoxHead` instance |
||||
mask_head (object): `MaskHead` instance |
||||
bbox_post_process (object): `BBoxPostProcess` instance |
||||
mask_post_process (object): `MaskPostProcess` instance |
||||
neck (object): 'FPN' instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
__inject__ = [ |
||||
'bbox_post_process', |
||||
'mask_post_process', |
||||
] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
rpn_head, |
||||
bbox_head, |
||||
mask_head, |
||||
bbox_post_process, |
||||
mask_post_process, |
||||
neck=None): |
||||
super(MaskRCNN, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.rpn_head = rpn_head |
||||
self.bbox_head = bbox_head |
||||
self.mask_head = mask_head |
||||
|
||||
self.bbox_post_process = bbox_post_process |
||||
self.mask_post_process = mask_post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs) |
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
rpn_head = create(cfg['rpn_head'], **kwargs) |
||||
bbox_head = create(cfg['bbox_head'], **kwargs) |
||||
|
||||
out_shape = neck and out_shape or bbox_head.get_head().out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
mask_head = create(cfg['mask_head'], **kwargs) |
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"rpn_head": rpn_head, |
||||
"bbox_head": bbox_head, |
||||
"mask_head": mask_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
if self.neck is not None: |
||||
body_feats = self.neck(body_feats) |
||||
|
||||
if self.training: |
||||
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs) |
||||
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num, |
||||
self.inputs) |
||||
rois, rois_num = self.bbox_head.get_assigned_rois() |
||||
bbox_targets = self.bbox_head.get_assigned_targets() |
||||
# Mask Head needs bbox_feat in Mask RCNN |
||||
mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs, |
||||
bbox_targets, bbox_feat) |
||||
return rpn_loss, bbox_loss, mask_loss |
||||
else: |
||||
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs) |
||||
preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None) |
||||
|
||||
im_shape = self.inputs['im_shape'] |
||||
scale_factor = self.inputs['scale_factor'] |
||||
|
||||
bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num), |
||||
im_shape, scale_factor) |
||||
mask_out = self.mask_head( |
||||
body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func) |
||||
|
||||
# rescale the prediction back to origin image |
||||
bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num, |
||||
im_shape, scale_factor) |
||||
origin_shape = self.bbox_post_process.get_origin_shape() |
||||
mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred, |
||||
bbox_num, origin_shape) |
||||
return bbox_pred, bbox_num, mask_pred |
||||
|
||||
def get_loss(self, ): |
||||
bbox_loss, mask_loss, rpn_loss = self._forward() |
||||
loss = {} |
||||
loss.update(rpn_loss) |
||||
loss.update(bbox_loss) |
||||
loss.update(mask_loss) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num, mask_pred = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred} |
||||
return output |
@ -0,0 +1,141 @@ |
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import numpy as np |
||||
import paddle |
||||
import paddle.nn as nn |
||||
import typing |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from paddlers.models.ppdet.modeling.post_process import nms |
||||
|
||||
__all__ = ['BaseArch'] |
||||
|
||||
|
||||
@register |
||||
class BaseArch(nn.Layer): |
||||
def __init__(self, data_format='NCHW'): |
||||
super(BaseArch, self).__init__() |
||||
self.data_format = data_format |
||||
self.inputs = {} |
||||
self.fuse_norm = False |
||||
|
||||
def load_meanstd(self, cfg_transform): |
||||
self.scale = 1. |
||||
self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape( |
||||
(1, 3, 1, 1)) |
||||
self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape( |
||||
(1, 3, 1, 1)) |
||||
for item in cfg_transform: |
||||
if 'NormalizeImage' in item: |
||||
self.mean = paddle.to_tensor(item['NormalizeImage'][ |
||||
'mean']).reshape((1, 3, 1, 1)) |
||||
self.std = paddle.to_tensor(item['NormalizeImage'][ |
||||
'std']).reshape((1, 3, 1, 1)) |
||||
if item['NormalizeImage'].get('is_scale', True): |
||||
self.scale = 1. / 255. |
||||
break |
||||
if self.data_format == 'NHWC': |
||||
self.mean = self.mean.reshape(1, 1, 1, 3) |
||||
self.std = self.std.reshape(1, 1, 1, 3) |
||||
|
||||
def forward(self, inputs): |
||||
if self.data_format == 'NHWC': |
||||
image = inputs['image'] |
||||
inputs['image'] = paddle.transpose(image, [0, 2, 3, 1]) |
||||
|
||||
if self.fuse_norm: |
||||
image = inputs['image'] |
||||
self.inputs['image'] = (image * self.scale - self.mean) / self.std |
||||
self.inputs['im_shape'] = inputs['im_shape'] |
||||
self.inputs['scale_factor'] = inputs['scale_factor'] |
||||
else: |
||||
self.inputs = inputs |
||||
|
||||
self.model_arch() |
||||
|
||||
if self.training: |
||||
out = self.get_loss() |
||||
else: |
||||
inputs_list = [] |
||||
# multi-scale input |
||||
if not isinstance(inputs, typing.Sequence): |
||||
inputs_list.append(inputs) |
||||
else: |
||||
inputs_list.extend(inputs) |
||||
|
||||
outs = [] |
||||
for inp in inputs_list: |
||||
self.inputs = inp |
||||
outs.append(self.get_pred()) |
||||
|
||||
# multi-scale test |
||||
if len(outs) > 1: |
||||
out = self.merge_multi_scale_predictions(outs) |
||||
else: |
||||
out = outs[0] |
||||
return out |
||||
|
||||
def merge_multi_scale_predictions(self, outs): |
||||
# default values for architectures not included in following list |
||||
num_classes = 80 |
||||
nms_threshold = 0.5 |
||||
keep_top_k = 100 |
||||
|
||||
if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN' |
||||
): |
||||
num_classes = self.bbox_head.num_classes |
||||
keep_top_k = self.bbox_post_process.nms.keep_top_k |
||||
nms_threshold = self.bbox_post_process.nms.nms_threshold |
||||
else: |
||||
raise Exception( |
||||
"Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now" |
||||
) |
||||
|
||||
final_boxes = [] |
||||
all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy() |
||||
for c in range(num_classes): |
||||
idxs = all_scale_outs[:, 0] == c |
||||
if np.count_nonzero(idxs) == 0: |
||||
continue |
||||
r = nms(all_scale_outs[idxs, 1:], nms_threshold) |
||||
final_boxes.append( |
||||
np.concatenate([np.full((r.shape[0], 1), c), r], 1)) |
||||
out = np.concatenate(final_boxes) |
||||
out = np.concatenate(sorted( |
||||
out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6)) |
||||
out = { |
||||
'bbox': paddle.to_tensor(out), |
||||
'bbox_num': paddle.to_tensor(np.array([out.shape[0], ])) |
||||
} |
||||
|
||||
return out |
||||
|
||||
def build_inputs(self, data, input_def): |
||||
inputs = {} |
||||
for i, k in enumerate(input_def): |
||||
inputs[k] = data[i] |
||||
return inputs |
||||
|
||||
def model_arch(self, ): |
||||
pass |
||||
|
||||
def get_loss(self, ): |
||||
raise NotImplementedError("Should implement get_loss method!") |
||||
|
||||
def get_pred(self, ): |
||||
raise NotImplementedError("Should implement get_pred method!") |
||||
|
||||
@classmethod |
||||
def convert_sync_batchnorm(cls, layer): |
||||
layer_output = layer |
||||
if getattr(layer, 'norm_type', None) == 'sync_bn': |
||||
layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer) |
||||
else: |
||||
for name, sublayer in layer.named_children(): |
||||
layer_output.add_sublayer(name, |
||||
cls.convert_sync_batchnorm(sublayer)) |
||||
|
||||
del layer |
||||
return layer_output |
@ -0,0 +1,91 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['PicoDet'] |
||||
|
||||
|
||||
@register |
||||
class PicoDet(BaseArch): |
||||
""" |
||||
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
neck (object): 'FPN' instance |
||||
head (object): 'PicoHead' instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, backbone, neck, head='PicoHead'): |
||||
super(PicoDet, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
self.deploy = False |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"head": head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
fpn_feats = self.neck(body_feats) |
||||
head_outs = self.head(fpn_feats, self.deploy) |
||||
if self.training or self.deploy: |
||||
return head_outs, None |
||||
else: |
||||
im_shape = self.inputs['im_shape'] |
||||
scale_factor = self.inputs['scale_factor'] |
||||
bboxes, bbox_num = self.head.post_process(head_outs, im_shape, |
||||
scale_factor) |
||||
return bboxes, bbox_num |
||||
|
||||
def get_loss(self, ): |
||||
loss = {} |
||||
|
||||
head_outs, _ = self._forward() |
||||
loss_gfl = self.head.get_loss(head_outs, self.inputs) |
||||
loss.update(loss_gfl) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
if self.deploy: |
||||
return {'picodet': self._forward()[0]} |
||||
else: |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num} |
||||
return output |
@ -0,0 +1,102 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['S2ANet'] |
||||
|
||||
|
||||
@register |
||||
class S2ANet(BaseArch): |
||||
__category__ = 'architecture' |
||||
__inject__ = [ |
||||
's2anet_head', |
||||
's2anet_bbox_post_process', |
||||
] |
||||
|
||||
def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process): |
||||
""" |
||||
S2ANet, see https://arxiv.org/pdf/2008.09397.pdf |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
neck (object): `FPN` instance |
||||
s2anet_head (object): `S2ANetHead` instance |
||||
s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance |
||||
""" |
||||
super(S2ANet, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.s2anet_head = s2anet_head |
||||
self.s2anet_bbox_post_process = s2anet_bbox_post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs) |
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape |
||||
kwargs = {'input_shape': out_shape} |
||||
s2anet_head = create(cfg['s2anet_head'], **kwargs) |
||||
s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'], |
||||
**kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"s2anet_head": s2anet_head, |
||||
"s2anet_bbox_post_process": s2anet_bbox_post_process, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
if self.neck is not None: |
||||
body_feats = self.neck(body_feats) |
||||
self.s2anet_head(body_feats) |
||||
if self.training: |
||||
loss = self.s2anet_head.get_loss(self.inputs) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
else: |
||||
im_shape = self.inputs['im_shape'] |
||||
scale_factor = self.inputs['scale_factor'] |
||||
nms_pre = self.s2anet_bbox_post_process.nms_pre |
||||
pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre) |
||||
|
||||
# post_process |
||||
pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores, |
||||
pred_bboxes) |
||||
# rescale the prediction back to origin image |
||||
pred_bboxes = self.s2anet_bbox_post_process.get_pred( |
||||
pred_bboxes, bbox_num, im_shape, scale_factor) |
||||
|
||||
# output |
||||
output = {'bbox': pred_bboxes, 'bbox_num': bbox_num} |
||||
return output |
||||
|
||||
def get_loss(self, ): |
||||
loss = self._forward() |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
output = self._forward() |
||||
return output |
@ -0,0 +1,110 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['SOLOv2'] |
||||
|
||||
|
||||
@register |
||||
class SOLOv2(BaseArch): |
||||
""" |
||||
SOLOv2 network, see https://arxiv.org/abs/2003.10152 |
||||
|
||||
Args: |
||||
backbone (object): an backbone instance |
||||
solov2_head (object): an `SOLOv2Head` instance |
||||
mask_head (object): an `SOLOv2MaskHead` instance |
||||
neck (object): neck of network, such as feature pyramid network instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, backbone, solov2_head, mask_head, neck=None): |
||||
super(SOLOv2, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.solov2_head = solov2_head |
||||
self.mask_head = mask_head |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
solov2_head = create(cfg['solov2_head'], **kwargs) |
||||
mask_head = create(cfg['mask_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
'solov2_head': solov2_head, |
||||
'mask_head': mask_head, |
||||
} |
||||
|
||||
def model_arch(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
|
||||
body_feats = self.neck(body_feats) |
||||
|
||||
self.seg_pred = self.mask_head(body_feats) |
||||
|
||||
self.cate_pred_list, self.kernel_pred_list = self.solov2_head( |
||||
body_feats) |
||||
|
||||
def get_loss(self, ): |
||||
loss = {} |
||||
# get gt_ins_labels, gt_cate_labels, etc. |
||||
gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], [] |
||||
fg_num = self.inputs['fg_num'] |
||||
for i in range(len(self.solov2_head.seg_num_grids)): |
||||
ins_label = 'ins_label{}'.format(i) |
||||
if ins_label in self.inputs: |
||||
gt_ins_labels.append(self.inputs[ins_label]) |
||||
cate_label = 'cate_label{}'.format(i) |
||||
if cate_label in self.inputs: |
||||
gt_cate_labels.append(self.inputs[cate_label]) |
||||
grid_order = 'grid_order{}'.format(i) |
||||
if grid_order in self.inputs: |
||||
gt_grid_orders.append(self.inputs[grid_order]) |
||||
|
||||
loss_solov2 = self.solov2_head.get_loss( |
||||
self.cate_pred_list, self.kernel_pred_list, self.seg_pred, |
||||
gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num) |
||||
loss.update(loss_solov2) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction( |
||||
self.cate_pred_list, self.kernel_pred_list, self.seg_pred, |
||||
self.inputs['im_shape'], self.inputs['scale_factor']) |
||||
outs = { |
||||
"segm": seg_masks, |
||||
"bbox_num": bbox_num, |
||||
'cate_label': cate_labels, |
||||
'cate_score': cate_scores |
||||
} |
||||
return outs |
@ -0,0 +1,99 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ["SparseRCNN"] |
||||
|
||||
|
||||
@register |
||||
class SparseRCNN(BaseArch): |
||||
__category__ = 'architecture' |
||||
__inject__ = ["postprocess"] |
||||
|
||||
def __init__(self, |
||||
backbone, |
||||
neck, |
||||
head="SparsercnnHead", |
||||
postprocess="SparsePostProcess"): |
||||
super(SparseRCNN, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
self.postprocess = postprocess |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'roi_input_shape': neck.out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"head": head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
fpn_feats = self.neck(body_feats) |
||||
head_outs = self.head(fpn_feats, self.inputs["img_whwh"]) |
||||
|
||||
if not self.training: |
||||
bboxes = self.postprocess( |
||||
head_outs["pred_logits"], head_outs["pred_boxes"], |
||||
self.inputs["scale_factor_wh"], self.inputs["img_whwh"]) |
||||
return bboxes |
||||
else: |
||||
return head_outs |
||||
|
||||
def get_loss(self): |
||||
batch_gt_class = self.inputs["gt_class"] |
||||
batch_gt_box = self.inputs["gt_bbox"] |
||||
batch_whwh = self.inputs["img_whwh"] |
||||
targets = [] |
||||
|
||||
for i in range(len(batch_gt_class)): |
||||
boxes = batch_gt_box[i] |
||||
labels = batch_gt_class[i].squeeze(-1) |
||||
img_whwh = batch_whwh[i] |
||||
img_whwh_tgt = img_whwh.unsqueeze(0).tile([int(boxes.shape[0]), 1]) |
||||
targets.append({ |
||||
"boxes": boxes, |
||||
"labels": labels, |
||||
"img_whwh": img_whwh, |
||||
"img_whwh_tgt": img_whwh_tgt |
||||
}) |
||||
|
||||
outputs = self._forward() |
||||
loss_dict = self.head.get_loss(outputs, targets) |
||||
acc = loss_dict["acc"] |
||||
loss_dict.pop("acc") |
||||
total_loss = sum(loss_dict.values()) |
||||
loss_dict.update({"loss": total_loss, "acc": acc}) |
||||
return loss_dict |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num} |
||||
return output |
@ -0,0 +1,93 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['SSD'] |
||||
|
||||
|
||||
@register |
||||
class SSD(BaseArch): |
||||
""" |
||||
Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325 |
||||
|
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
ssd_head (nn.Layer): `SSDHead` instance |
||||
post_process (object): `BBoxPostProcess` instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
__inject__ = ['post_process'] |
||||
|
||||
def __init__(self, backbone, ssd_head, post_process, r34_backbone=False): |
||||
super(SSD, self).__init__() |
||||
self.backbone = backbone |
||||
self.ssd_head = ssd_head |
||||
self.post_process = post_process |
||||
self.r34_backbone = r34_backbone |
||||
if self.r34_backbone: |
||||
from paddlers.models.ppdet.modeling.backbones.resnet import ResNet |
||||
assert isinstance(self.backbone, ResNet) and \ |
||||
self.backbone.depth == 34, \ |
||||
"If you set r34_backbone=True, please use ResNet-34 as backbone." |
||||
self.backbone.res_layers[2].blocks[ |
||||
0].branch2a.conv._stride = [1, 1] |
||||
self.backbone.res_layers[2].blocks[0].short.conv._stride = [1, 1] |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
# head |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
ssd_head = create(cfg['ssd_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
"ssd_head": ssd_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
# Backbone |
||||
body_feats = self.backbone(self.inputs) |
||||
|
||||
# SSD Head |
||||
if self.training: |
||||
return self.ssd_head(body_feats, self.inputs['image'], |
||||
self.inputs['gt_bbox'], |
||||
self.inputs['gt_class']) |
||||
else: |
||||
preds, anchors = self.ssd_head(body_feats, self.inputs['image']) |
||||
bbox, bbox_num = self.post_process(preds, anchors, |
||||
self.inputs['im_shape'], |
||||
self.inputs['scale_factor']) |
||||
return bbox, bbox_num |
||||
|
||||
def get_loss(self, ): |
||||
return {"loss": self._forward()} |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = { |
||||
"bbox": bbox_pred, |
||||
"bbox_num": bbox_num, |
||||
} |
||||
return output |
@ -0,0 +1,78 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['TOOD'] |
||||
|
||||
|
||||
@register |
||||
class TOOD(BaseArch): |
||||
""" |
||||
TOOD: Task-aligned One-stage Object Detection, see https://arxiv.org/abs/2108.07755 |
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
neck (nn.Layer): 'FPN' instance |
||||
head (nn.Layer): 'TOODHead' instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, backbone, neck, head): |
||||
super(TOOD, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"head": head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
fpn_feats = self.neck(body_feats) |
||||
head_outs = self.head(fpn_feats) |
||||
if not self.training: |
||||
bboxes, bbox_num = self.head.post_process( |
||||
head_outs, self.inputs['im_shape'], |
||||
self.inputs['scale_factor']) |
||||
return bboxes, bbox_num |
||||
else: |
||||
loss = self.head.get_loss(head_outs, self.inputs) |
||||
return loss |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num} |
||||
return output |
@ -0,0 +1,98 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['TTFNet'] |
||||
|
||||
|
||||
@register |
||||
class TTFNet(BaseArch): |
||||
""" |
||||
TTFNet network, see https://arxiv.org/abs/1909.00700 |
||||
|
||||
Args: |
||||
backbone (object): backbone instance |
||||
neck (object): 'TTFFPN' instance |
||||
ttf_head (object): 'TTFHead' instance |
||||
post_process (object): 'BBoxPostProcess' instance |
||||
""" |
||||
|
||||
__category__ = 'architecture' |
||||
__inject__ = ['post_process'] |
||||
|
||||
def __init__(self, |
||||
backbone='DarkNet', |
||||
neck='TTFFPN', |
||||
ttf_head='TTFHead', |
||||
post_process='BBoxPostProcess'): |
||||
super(TTFNet, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.ttf_head = ttf_head |
||||
self.post_process = post_process |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
ttf_head = create(cfg['ttf_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"ttf_head": ttf_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
body_feats = self.neck(body_feats) |
||||
hm, wh = self.ttf_head(body_feats) |
||||
if self.training: |
||||
return hm, wh |
||||
else: |
||||
bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'], |
||||
self.inputs['scale_factor']) |
||||
return bbox, bbox_num |
||||
|
||||
def get_loss(self, ): |
||||
loss = {} |
||||
heatmap = self.inputs['ttf_heatmap'] |
||||
box_target = self.inputs['ttf_box_target'] |
||||
reg_weight = self.inputs['ttf_reg_weight'] |
||||
hm, wh = self._forward() |
||||
head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target, |
||||
reg_weight) |
||||
loss.update(head_loss) |
||||
total_loss = paddle.add_n(list(loss.values())) |
||||
loss.update({'loss': total_loss}) |
||||
return loss |
||||
|
||||
def get_pred(self): |
||||
bbox_pred, bbox_num = self._forward() |
||||
output = { |
||||
"bbox": bbox_pred, |
||||
"bbox_num": bbox_num, |
||||
} |
||||
return output |
@ -0,0 +1,124 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
from ..post_process import JDEBBoxPostProcess |
||||
|
||||
__all__ = ['YOLOv3'] |
||||
|
||||
|
||||
@register |
||||
class YOLOv3(BaseArch): |
||||
__category__ = 'architecture' |
||||
__shared__ = ['data_format'] |
||||
__inject__ = ['post_process'] |
||||
|
||||
def __init__(self, |
||||
backbone='DarkNet', |
||||
neck='YOLOv3FPN', |
||||
yolo_head='YOLOv3Head', |
||||
post_process='BBoxPostProcess', |
||||
data_format='NCHW', |
||||
for_mot=False): |
||||
""" |
||||
YOLOv3 network, see https://arxiv.org/abs/1804.02767 |
||||
|
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
neck (nn.Layer): neck instance |
||||
yolo_head (nn.Layer): anchor_head instance |
||||
bbox_post_process (object): `BBoxPostProcess` instance |
||||
data_format (str): data format, NCHW or NHWC |
||||
for_mot (bool): whether return other features for multi-object tracking |
||||
models, default False in pure object detection models. |
||||
""" |
||||
super(YOLOv3, self).__init__(data_format=data_format) |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.yolo_head = yolo_head |
||||
self.post_process = post_process |
||||
self.for_mot = for_mot |
||||
self.return_idx = isinstance(post_process, JDEBBoxPostProcess) |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
# fpn |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
# head |
||||
kwargs = {'input_shape': neck.out_shape} |
||||
yolo_head = create(cfg['yolo_head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"yolo_head": yolo_head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
neck_feats = self.neck(body_feats, self.for_mot) |
||||
|
||||
if isinstance(neck_feats, dict): |
||||
assert self.for_mot == True |
||||
emb_feats = neck_feats['emb_feats'] |
||||
neck_feats = neck_feats['yolo_feats'] |
||||
|
||||
if self.training: |
||||
yolo_losses = self.yolo_head(neck_feats, self.inputs) |
||||
|
||||
if self.for_mot: |
||||
return {'det_losses': yolo_losses, 'emb_feats': emb_feats} |
||||
else: |
||||
return yolo_losses |
||||
|
||||
else: |
||||
yolo_head_outs = self.yolo_head(neck_feats) |
||||
|
||||
if self.for_mot: |
||||
boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process( |
||||
yolo_head_outs, self.yolo_head.mask_anchors) |
||||
output = { |
||||
'bbox': bbox, |
||||
'bbox_num': bbox_num, |
||||
'boxes_idx': boxes_idx, |
||||
'nms_keep_idx': nms_keep_idx, |
||||
'emb_feats': emb_feats, |
||||
} |
||||
else: |
||||
if self.return_idx: |
||||
_, bbox, bbox_num, _ = self.post_process( |
||||
yolo_head_outs, self.yolo_head.mask_anchors) |
||||
else: |
||||
bbox, bbox_num = self.post_process( |
||||
yolo_head_outs, self.yolo_head.mask_anchors, |
||||
self.inputs['im_shape'], self.inputs['scale_factor']) |
||||
output = {'bbox': bbox, 'bbox_num': bbox_num} |
||||
|
||||
return output |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
return self._forward() |
@ -0,0 +1,23 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import utils |
||||
from . import task_aligned_assigner |
||||
from . import atss_assigner |
||||
from . import simota_assigner |
||||
|
||||
from .utils import * |
||||
from .task_aligned_assigner import * |
||||
from .atss_assigner import * |
||||
from .simota_assigner import * |
@ -0,0 +1,211 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import numpy as np |
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from ..ops import iou_similarity |
||||
from ..bbox_utils import bbox_center |
||||
from .utils import (pad_gt, check_points_inside_bboxes, compute_max_iou_anchor, |
||||
compute_max_iou_gt) |
||||
|
||||
|
||||
@register |
||||
class ATSSAssigner(nn.Layer): |
||||
"""Bridging the Gap Between Anchor-based and Anchor-free Detection |
||||
via Adaptive Training Sample Selection |
||||
""" |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
topk=9, |
||||
num_classes=80, |
||||
force_gt_matching=False, |
||||
eps=1e-9): |
||||
super(ATSSAssigner, self).__init__() |
||||
self.topk = topk |
||||
self.num_classes = num_classes |
||||
self.force_gt_matching = force_gt_matching |
||||
self.eps = eps |
||||
|
||||
def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list, |
||||
pad_gt_mask): |
||||
pad_gt_mask = pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool) |
||||
gt2anchor_distances_list = paddle.split( |
||||
gt2anchor_distances, num_anchors_list, axis=-1) |
||||
num_anchors_index = np.cumsum(num_anchors_list).tolist() |
||||
num_anchors_index = [0, ] + num_anchors_index[:-1] |
||||
is_in_topk_list = [] |
||||
topk_idxs_list = [] |
||||
for distances, anchors_index in zip(gt2anchor_distances_list, |
||||
num_anchors_index): |
||||
num_anchors = distances.shape[-1] |
||||
topk_metrics, topk_idxs = paddle.topk( |
||||
distances, self.topk, axis=-1, largest=False) |
||||
topk_idxs_list.append(topk_idxs + anchors_index) |
||||
topk_idxs = paddle.where(pad_gt_mask, topk_idxs, |
||||
paddle.zeros_like(topk_idxs)) |
||||
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2) |
||||
is_in_topk = paddle.where(is_in_topk > 1, |
||||
paddle.zeros_like(is_in_topk), |
||||
is_in_topk) |
||||
is_in_topk_list.append( |
||||
is_in_topk.astype(gt2anchor_distances.dtype)) |
||||
is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1) |
||||
topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1) |
||||
return is_in_topk_list, topk_idxs_list |
||||
|
||||
@paddle.no_grad() |
||||
def forward(self, |
||||
anchor_bboxes, |
||||
num_anchors_list, |
||||
gt_labels, |
||||
gt_bboxes, |
||||
bg_index, |
||||
gt_scores=None): |
||||
r"""This code is based on |
||||
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py |
||||
|
||||
The assignment is done in following steps |
||||
1. compute iou between all bbox (bbox of all pyramid levels) and gt |
||||
2. compute center distance between all bbox and gt |
||||
3. on each pyramid level, for each gt, select k bbox whose center |
||||
are closest to the gt center, so we total select k*l bbox as |
||||
candidates for each gt |
||||
4. get corresponding iou for the these candidates, and compute the |
||||
mean and std, set mean + std as the iou threshold |
||||
5. select these candidates whose iou are greater than or equal to |
||||
the threshold as positive |
||||
6. limit the positive sample's center in gt |
||||
7. if an anchor box is assigned to multiple gts, the one with the |
||||
highest iou will be selected. |
||||
Args: |
||||
anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4), |
||||
"xmin, xmax, ymin, ymax" format |
||||
num_anchors_list (List): num of anchors in each level |
||||
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1) |
||||
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4) |
||||
bg_index (int): background index |
||||
gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes, |
||||
shape(B, n, 1), if None, then it will initialize with one_hot label |
||||
Returns: |
||||
assigned_labels (Tensor): (B, L) |
||||
assigned_bboxes (Tensor): (B, L, 4) |
||||
assigned_scores (Tensor): (B, L, C) |
||||
""" |
||||
gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt( |
||||
gt_labels, gt_bboxes, gt_scores) |
||||
assert gt_labels.ndim == gt_bboxes.ndim and \ |
||||
gt_bboxes.ndim == 3 |
||||
|
||||
num_anchors, _ = anchor_bboxes.shape |
||||
batch_size, num_max_boxes, _ = gt_bboxes.shape |
||||
|
||||
# negative batch |
||||
if num_max_boxes == 0: |
||||
assigned_labels = paddle.full([batch_size, num_anchors], bg_index) |
||||
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4]) |
||||
assigned_scores = paddle.zeros( |
||||
[batch_size, num_anchors, self.num_classes]) |
||||
return assigned_labels, assigned_bboxes, assigned_scores |
||||
|
||||
# 1. compute iou between gt and anchor bbox, [B, n, L] |
||||
ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes) |
||||
ious = ious.reshape([batch_size, -1, num_anchors]) |
||||
|
||||
# 2. compute center distance between all anchors and gt, [B, n, L] |
||||
gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1) |
||||
anchor_centers = bbox_center(anchor_bboxes) |
||||
gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \ |
||||
.norm(2, axis=-1).reshape([batch_size, -1, num_anchors]) |
||||
|
||||
# 3. on each pyramid level, selecting topk closest candidates |
||||
# based on the center distance, [B, n, L] |
||||
is_in_topk, topk_idxs = self._gather_topk_pyramid( |
||||
gt2anchor_distances, num_anchors_list, pad_gt_mask) |
||||
|
||||
# 4. get corresponding iou for the these candidates, and compute the |
||||
# mean and std, 5. set mean + std as the iou threshold |
||||
iou_candidates = ious * is_in_topk |
||||
iou_threshold = paddle.index_sample( |
||||
iou_candidates.flatten(stop_axis=-2), |
||||
topk_idxs.flatten(stop_axis=-2)) |
||||
iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1]) |
||||
iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \ |
||||
iou_threshold.std(axis=-1, keepdim=True) |
||||
is_in_topk = paddle.where( |
||||
iou_candidates > iou_threshold.tile([1, 1, num_anchors]), |
||||
is_in_topk, paddle.zeros_like(is_in_topk)) |
||||
|
||||
# 6. check the positive sample's center in gt, [B, n, L] |
||||
is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes) |
||||
|
||||
# select positive sample, [B, n, L] |
||||
mask_positive = is_in_topk * is_in_gts * pad_gt_mask |
||||
|
||||
# 7. if an anchor box is assigned to multiple gts, |
||||
# the one with the highest iou will be selected. |
||||
mask_positive_sum = mask_positive.sum(axis=-2) |
||||
if mask_positive_sum.max() > 1: |
||||
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile( |
||||
[1, num_max_boxes, 1]) |
||||
is_max_iou = compute_max_iou_anchor(ious) |
||||
mask_positive = paddle.where(mask_multiple_gts, is_max_iou, |
||||
mask_positive) |
||||
mask_positive_sum = mask_positive.sum(axis=-2) |
||||
# 8. make sure every gt_bbox matches the anchor |
||||
if self.force_gt_matching: |
||||
is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask |
||||
mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile( |
||||
[1, num_max_boxes, 1]) |
||||
mask_positive = paddle.where(mask_max_iou, is_max_iou, |
||||
mask_positive) |
||||
mask_positive_sum = mask_positive.sum(axis=-2) |
||||
assigned_gt_index = mask_positive.argmax(axis=-2) |
||||
assert mask_positive_sum.max() == 1, \ |
||||
("one anchor just assign one gt, but received not equals 1. " |
||||
"Received: %f" % mask_positive_sum.max().item()) |
||||
|
||||
# assigned target |
||||
batch_ind = paddle.arange( |
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1) |
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes |
||||
assigned_labels = paddle.gather( |
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0) |
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors]) |
||||
assigned_labels = paddle.where( |
||||
mask_positive_sum > 0, assigned_labels, |
||||
paddle.full_like(assigned_labels, bg_index)) |
||||
|
||||
assigned_bboxes = paddle.gather( |
||||
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0) |
||||
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4]) |
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, self.num_classes) |
||||
if gt_scores is not None: |
||||
gather_scores = paddle.gather( |
||||
pad_gt_scores.flatten(), assigned_gt_index.flatten(), axis=0) |
||||
gather_scores = gather_scores.reshape([batch_size, num_anchors]) |
||||
gather_scores = paddle.where(mask_positive_sum > 0, gather_scores, |
||||
paddle.zeros_like(gather_scores)) |
||||
assigned_scores *= gather_scores.unsqueeze(-1) |
||||
|
||||
return assigned_labels, assigned_bboxes, assigned_scores |
@ -0,0 +1,262 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# The code is based on: |
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/sim_ota_assigner.py |
||||
|
||||
import paddle |
||||
import numpy as np |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppdet.modeling.losses.varifocal_loss import varifocal_loss |
||||
from paddlers.models.ppdet.modeling.bbox_utils import batch_bbox_overlaps |
||||
from paddlers.models.ppdet.core.workspace import register |
||||
|
||||
|
||||
@register |
||||
class SimOTAAssigner(object): |
||||
"""Computes matching between predictions and ground truth. |
||||
Args: |
||||
center_radius (int | float, optional): Ground truth center size |
||||
to judge whether a prior is in center. Default 2.5. |
||||
candidate_topk (int, optional): The candidate top-k which used to |
||||
get top-k ious to calculate dynamic-k. Default 10. |
||||
iou_weight (int | float, optional): The scale factor for regression |
||||
iou cost. Default 3.0. |
||||
cls_weight (int | float, optional): The scale factor for classification |
||||
cost. Default 1.0. |
||||
num_classes (int): The num_classes of dataset. |
||||
use_vfl (int): Whether to use varifocal_loss when calculating the cost matrix. |
||||
""" |
||||
__shared__ = ['num_classes'] |
||||
|
||||
def __init__(self, |
||||
center_radius=2.5, |
||||
candidate_topk=10, |
||||
iou_weight=3.0, |
||||
cls_weight=1.0, |
||||
num_classes=80, |
||||
use_vfl=True): |
||||
self.center_radius = center_radius |
||||
self.candidate_topk = candidate_topk |
||||
self.iou_weight = iou_weight |
||||
self.cls_weight = cls_weight |
||||
self.num_classes = num_classes |
||||
self.use_vfl = use_vfl |
||||
|
||||
def get_in_gt_and_in_center_info(self, flatten_center_and_stride, |
||||
gt_bboxes): |
||||
num_gt = gt_bboxes.shape[0] |
||||
|
||||
flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile( |
||||
[1, num_gt]) |
||||
flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile( |
||||
[1, num_gt]) |
||||
flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile( |
||||
[1, num_gt]) |
||||
flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile( |
||||
[1, num_gt]) |
||||
|
||||
# is prior centers in gt bboxes, shape: [n_center, n_gt] |
||||
l_ = flatten_x - gt_bboxes[:, 0] |
||||
t_ = flatten_y - gt_bboxes[:, 1] |
||||
r_ = gt_bboxes[:, 2] - flatten_x |
||||
b_ = gt_bboxes[:, 3] - flatten_y |
||||
|
||||
deltas = paddle.stack([l_, t_, r_, b_], axis=1) |
||||
is_in_gts = deltas.min(axis=1) > 0 |
||||
is_in_gts_all = is_in_gts.sum(axis=1) > 0 |
||||
|
||||
# is prior centers in gt centers |
||||
gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 |
||||
gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 |
||||
ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x |
||||
ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y |
||||
ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x |
||||
ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y |
||||
|
||||
cl_ = flatten_x - ct_bound_l |
||||
ct_ = flatten_y - ct_bound_t |
||||
cr_ = ct_bound_r - flatten_x |
||||
cb_ = ct_bound_b - flatten_y |
||||
|
||||
ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1) |
||||
is_in_cts = ct_deltas.min(axis=1) > 0 |
||||
is_in_cts_all = is_in_cts.sum(axis=1) > 0 |
||||
|
||||
# in any of gts or gt centers, shape: [n_center] |
||||
is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all, |
||||
is_in_cts_all) |
||||
|
||||
is_in_gts_or_centers_all_inds = paddle.nonzero( |
||||
is_in_gts_or_centers_all).squeeze(1) |
||||
|
||||
# both in gts and gt centers, shape: [num_fg, num_gt] |
||||
is_in_gts_and_centers = paddle.logical_and( |
||||
paddle.gather( |
||||
is_in_gts.cast('int'), is_in_gts_or_centers_all_inds, |
||||
axis=0).cast('bool'), |
||||
paddle.gather( |
||||
is_in_cts.cast('int'), is_in_gts_or_centers_all_inds, |
||||
axis=0).cast('bool')) |
||||
return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers |
||||
|
||||
def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt): |
||||
match_matrix = np.zeros_like(cost_matrix.numpy()) |
||||
# select candidate topk ious for dynamic-k calculation |
||||
topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0) |
||||
# calculate dynamic k for each gt |
||||
dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1) |
||||
for gt_idx in range(num_gt): |
||||
_, pos_idx = paddle.topk( |
||||
cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False) |
||||
match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0 |
||||
|
||||
del topk_ious, dynamic_ks, pos_idx |
||||
|
||||
# match points more than two gts |
||||
extra_match_gts_mask = match_matrix.sum(1) > 1 |
||||
if extra_match_gts_mask.sum() > 0: |
||||
cost_matrix = cost_matrix.numpy() |
||||
cost_argmin = np.argmin( |
||||
cost_matrix[extra_match_gts_mask, :], axis=1) |
||||
match_matrix[extra_match_gts_mask, :] *= 0.0 |
||||
match_matrix[extra_match_gts_mask, cost_argmin] = 1.0 |
||||
# get foreground mask |
||||
match_fg_mask_inmatrix = match_matrix.sum(1) > 0 |
||||
match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1) |
||||
|
||||
return match_gt_inds_to_fg, match_fg_mask_inmatrix |
||||
|
||||
def get_sample(self, assign_gt_inds, gt_bboxes): |
||||
pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0]) |
||||
neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0]) |
||||
pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1 |
||||
|
||||
if gt_bboxes.size == 0: |
||||
# hack for index error case |
||||
assert pos_assigned_gt_inds.size == 0 |
||||
pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4) |
||||
else: |
||||
if len(gt_bboxes.shape) < 2: |
||||
gt_bboxes = gt_bboxes.resize(-1, 4) |
||||
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] |
||||
return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds |
||||
|
||||
def __call__(self, |
||||
flatten_cls_pred_scores, |
||||
flatten_center_and_stride, |
||||
flatten_bboxes, |
||||
gt_bboxes, |
||||
gt_labels, |
||||
eps=1e-7): |
||||
"""Assign gt to priors using SimOTA. |
||||
TODO: add comment. |
||||
Returns: |
||||
assign_result: The assigned result. |
||||
""" |
||||
num_gt = gt_bboxes.shape[0] |
||||
num_bboxes = flatten_bboxes.shape[0] |
||||
|
||||
if num_gt == 0 or num_bboxes == 0: |
||||
# No ground truth or boxes |
||||
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes |
||||
label_weight = np.ones([num_bboxes], dtype=np.float32) |
||||
bbox_target = np.zeros_like(flatten_center_and_stride) |
||||
return 0, label, label_weight, bbox_target |
||||
|
||||
is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info( |
||||
flatten_center_and_stride, gt_bboxes) |
||||
|
||||
# bboxes and scores to calculate matrix |
||||
valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds] |
||||
valid_cls_pred_scores = flatten_cls_pred_scores[ |
||||
is_in_gts_or_centers_all_inds] |
||||
num_valid_bboxes = valid_flatten_bboxes.shape[0] |
||||
|
||||
pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes, |
||||
gt_bboxes) # [num_points,num_gts] |
||||
if self.use_vfl: |
||||
gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile( |
||||
[num_valid_bboxes, 1]).reshape([-1]) |
||||
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile( |
||||
[1, num_gt, 1]).reshape([-1, self.num_classes]) |
||||
vfl_score = np.zeros(valid_pred_scores.shape) |
||||
vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy( |
||||
)] = pairwise_ious.reshape([-1]) |
||||
vfl_score = paddle.to_tensor(vfl_score) |
||||
losses_vfl = varifocal_loss( |
||||
valid_pred_scores, vfl_score, |
||||
use_sigmoid=False).reshape([num_valid_bboxes, num_gt]) |
||||
losses_giou = batch_bbox_overlaps( |
||||
valid_flatten_bboxes, gt_bboxes, mode='giou') |
||||
cost_matrix = ( |
||||
losses_vfl * self.cls_weight + losses_giou * self.iou_weight + |
||||
paddle.logical_not(is_in_boxes_and_center).cast('float32') * |
||||
100000000) |
||||
else: |
||||
iou_cost = -paddle.log(pairwise_ious + eps) |
||||
gt_onehot_label = (F.one_hot( |
||||
gt_labels.squeeze(-1).cast(paddle.int64), |
||||
flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze(0) |
||||
.tile([num_valid_bboxes, 1, 1])) |
||||
|
||||
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile( |
||||
[1, num_gt, 1]) |
||||
cls_cost = F.binary_cross_entropy( |
||||
valid_pred_scores, gt_onehot_label, reduction='none').sum(-1) |
||||
|
||||
cost_matrix = ( |
||||
cls_cost * self.cls_weight + iou_cost * self.iou_weight + |
||||
paddle.logical_not(is_in_boxes_and_center).cast('float32') * |
||||
100000000) |
||||
|
||||
match_gt_inds_to_fg, match_fg_mask_inmatrix = \ |
||||
self.dynamic_k_matching( |
||||
cost_matrix, pairwise_ious, num_gt) |
||||
|
||||
# sample and assign results |
||||
assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64) |
||||
match_fg_mask_inall = np.zeros_like(assigned_gt_inds) |
||||
match_fg_mask_inall[is_in_gts_or_centers_all.numpy( |
||||
)] = match_fg_mask_inmatrix |
||||
|
||||
assigned_gt_inds[match_fg_mask_inall.astype( |
||||
np.bool)] = match_gt_inds_to_fg + 1 |
||||
|
||||
pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \ |
||||
= self.get_sample(assigned_gt_inds, gt_bboxes.numpy()) |
||||
|
||||
bbox_target = np.zeros_like(flatten_bboxes) |
||||
bbox_weight = np.zeros_like(flatten_bboxes) |
||||
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes |
||||
label_weight = np.zeros([num_bboxes], dtype=np.float32) |
||||
|
||||
if len(pos_inds) > 0: |
||||
gt_labels = gt_labels.numpy() |
||||
pos_bbox_targets = pos_gt_bboxes |
||||
bbox_target[pos_inds, :] = pos_bbox_targets |
||||
bbox_weight[pos_inds, :] = 1.0 |
||||
if not np.any(gt_labels): |
||||
label[pos_inds] = 0 |
||||
else: |
||||
label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds] |
||||
|
||||
label_weight[pos_inds] = 1.0 |
||||
if len(neg_inds) > 0: |
||||
label_weight[neg_inds] = 1.0 |
||||
|
||||
pos_num = max(pos_inds.size, 1) |
||||
|
||||
return pos_num, label, label_weight, bbox_target |
@ -0,0 +1,158 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from ..bbox_utils import iou_similarity |
||||
from .utils import (pad_gt, gather_topk_anchors, check_points_inside_bboxes, |
||||
compute_max_iou_anchor) |
||||
|
||||
|
||||
@register |
||||
class TaskAlignedAssigner(nn.Layer): |
||||
"""TOOD: Task-aligned One-stage Object Detection |
||||
""" |
||||
|
||||
def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9): |
||||
super(TaskAlignedAssigner, self).__init__() |
||||
self.topk = topk |
||||
self.alpha = alpha |
||||
self.beta = beta |
||||
self.eps = eps |
||||
|
||||
@paddle.no_grad() |
||||
def forward(self, |
||||
pred_scores, |
||||
pred_bboxes, |
||||
anchor_points, |
||||
gt_labels, |
||||
gt_bboxes, |
||||
bg_index, |
||||
gt_scores=None): |
||||
r"""This code is based on |
||||
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py |
||||
|
||||
The assignment is done in following steps |
||||
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt |
||||
2. select top-k bbox as candidates for each gt |
||||
3. limit the positive sample's center in gt (because the anchor-free detector |
||||
only can predict positive distance) |
||||
4. if an anchor box is assigned to multiple gts, the one with the |
||||
highest iou will be selected. |
||||
Args: |
||||
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C) |
||||
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4) |
||||
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format |
||||
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1) |
||||
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4) |
||||
bg_index (int): background index |
||||
gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes, |
||||
shape(B, n, 1), if None, then it will initialize with one_hot label |
||||
Returns: |
||||
assigned_labels (Tensor): (B, L) |
||||
assigned_bboxes (Tensor): (B, L, 4) |
||||
assigned_scores (Tensor): (B, L, C) |
||||
""" |
||||
assert pred_scores.ndim == pred_bboxes.ndim |
||||
|
||||
gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt( |
||||
gt_labels, gt_bboxes, gt_scores) |
||||
assert gt_labels.ndim == gt_bboxes.ndim and \ |
||||
gt_bboxes.ndim == 3 |
||||
|
||||
batch_size, num_anchors, num_classes = pred_scores.shape |
||||
_, num_max_boxes, _ = gt_bboxes.shape |
||||
|
||||
# negative batch |
||||
if num_max_boxes == 0: |
||||
assigned_labels = paddle.full([batch_size, num_anchors], bg_index) |
||||
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4]) |
||||
assigned_scores = paddle.zeros( |
||||
[batch_size, num_anchors, num_classes]) |
||||
return assigned_labels, assigned_bboxes, assigned_scores |
||||
|
||||
# compute iou between gt and pred bbox, [B, n, L] |
||||
ious = iou_similarity(gt_bboxes, pred_bboxes) |
||||
# gather pred bboxes class score |
||||
pred_scores = pred_scores.transpose([0, 2, 1]) |
||||
batch_ind = paddle.arange( |
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1) |
||||
gt_labels_ind = paddle.stack( |
||||
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)], |
||||
axis=-1) |
||||
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind) |
||||
# compute alignment metrics, [B, n, L] |
||||
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow( |
||||
self.beta) |
||||
|
||||
# check the positive sample's center in gt, [B, n, L] |
||||
is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes) |
||||
|
||||
# select topk largest alignment metrics pred bbox as candidates |
||||
# for each gt, [B, n, L] |
||||
is_in_topk = gather_topk_anchors( |
||||
alignment_metrics * is_in_gts, |
||||
self.topk, |
||||
topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)) |
||||
|
||||
# select positive sample, [B, n, L] |
||||
mask_positive = is_in_topk * is_in_gts * pad_gt_mask |
||||
|
||||
# if an anchor box is assigned to multiple gts, |
||||
# the one with the highest iou will be selected, [B, n, L] |
||||
mask_positive_sum = mask_positive.sum(axis=-2) |
||||
if mask_positive_sum.max() > 1: |
||||
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile( |
||||
[1, num_max_boxes, 1]) |
||||
is_max_iou = compute_max_iou_anchor(ious) |
||||
mask_positive = paddle.where(mask_multiple_gts, is_max_iou, |
||||
mask_positive) |
||||
mask_positive_sum = mask_positive.sum(axis=-2) |
||||
assigned_gt_index = mask_positive.argmax(axis=-2) |
||||
assert mask_positive_sum.max() == 1, \ |
||||
("one anchor just assign one gt, but received not equals 1. " |
||||
"Received: %f" % mask_positive_sum.max().item()) |
||||
|
||||
# assigned target |
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes |
||||
assigned_labels = paddle.gather( |
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0) |
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors]) |
||||
assigned_labels = paddle.where( |
||||
mask_positive_sum > 0, assigned_labels, |
||||
paddle.full_like(assigned_labels, bg_index)) |
||||
|
||||
assigned_bboxes = paddle.gather( |
||||
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0) |
||||
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4]) |
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, num_classes) |
||||
# rescale alignment metrics |
||||
alignment_metrics *= mask_positive |
||||
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True) |
||||
max_ious_per_instance = (ious * mask_positive).max(axis=-1, |
||||
keepdim=True) |
||||
alignment_metrics = alignment_metrics / ( |
||||
max_metrics_per_instance + self.eps) * max_ious_per_instance |
||||
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1) |
||||
assigned_scores = assigned_scores * alignment_metrics |
||||
|
||||
return assigned_labels, assigned_bboxes, assigned_scores |
@ -0,0 +1,195 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn.functional as F |
||||
|
||||
__all__ = [ |
||||
'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes', |
||||
'compute_max_iou_anchor', 'compute_max_iou_gt', |
||||
'generate_anchors_for_grid_cell' |
||||
] |
||||
|
||||
|
||||
def pad_gt(gt_labels, gt_bboxes, gt_scores=None): |
||||
r""" Pad 0 in gt_labels and gt_bboxes. |
||||
Args: |
||||
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, |
||||
shape is [B, n, 1] or [[n_1, 1], [n_2, 1], ...], here n = sum(n_i) |
||||
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, |
||||
shape is [B, n, 4] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i) |
||||
gt_scores (Tensor|List[Tensor]|None, float32): Score of gt_bboxes, |
||||
shape is [B, n, 1] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i) |
||||
Returns: |
||||
pad_gt_labels (Tensor, int64): shape[B, n, 1] |
||||
pad_gt_bboxes (Tensor, float32): shape[B, n, 4] |
||||
pad_gt_scores (Tensor, float32): shape[B, n, 1] |
||||
pad_gt_mask (Tensor, float32): shape[B, n, 1], 1 means bbox, 0 means no bbox |
||||
""" |
||||
if isinstance(gt_labels, paddle.Tensor) and isinstance(gt_bboxes, |
||||
paddle.Tensor): |
||||
assert gt_labels.ndim == gt_bboxes.ndim and \ |
||||
gt_bboxes.ndim == 3 |
||||
pad_gt_mask = ( |
||||
gt_bboxes.sum(axis=-1, keepdim=True) > 0).astype(gt_bboxes.dtype) |
||||
if gt_scores is None: |
||||
gt_scores = pad_gt_mask.clone() |
||||
assert gt_labels.ndim == gt_scores.ndim |
||||
|
||||
return gt_labels, gt_bboxes, gt_scores, pad_gt_mask |
||||
elif isinstance(gt_labels, list) and isinstance(gt_bboxes, list): |
||||
assert len(gt_labels) == len(gt_bboxes), \ |
||||
'The number of `gt_labels` and `gt_bboxes` is not equal. ' |
||||
num_max_boxes = max([len(a) for a in gt_bboxes]) |
||||
batch_size = len(gt_bboxes) |
||||
# pad label and bbox |
||||
pad_gt_labels = paddle.zeros( |
||||
[batch_size, num_max_boxes, 1], dtype=gt_labels[0].dtype) |
||||
pad_gt_bboxes = paddle.zeros( |
||||
[batch_size, num_max_boxes, 4], dtype=gt_bboxes[0].dtype) |
||||
pad_gt_scores = paddle.zeros( |
||||
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype) |
||||
pad_gt_mask = paddle.zeros( |
||||
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype) |
||||
for i, (label, bbox) in enumerate(zip(gt_labels, gt_bboxes)): |
||||
if len(label) > 0 and len(bbox) > 0: |
||||
pad_gt_labels[i, :len(label)] = label |
||||
pad_gt_bboxes[i, :len(bbox)] = bbox |
||||
pad_gt_mask[i, :len(bbox)] = 1. |
||||
if gt_scores is not None: |
||||
pad_gt_scores[i, :len(gt_scores[i])] = gt_scores[i] |
||||
if gt_scores is None: |
||||
pad_gt_scores = pad_gt_mask.clone() |
||||
return pad_gt_labels, pad_gt_bboxes, pad_gt_scores, pad_gt_mask |
||||
else: |
||||
raise ValueError('The input `gt_labels` or `gt_bboxes` is invalid! ') |
||||
|
||||
|
||||
def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9): |
||||
r""" |
||||
Args: |
||||
metrics (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors |
||||
topk (int): The number of top elements to look for along the axis. |
||||
largest (bool) : largest is a flag, if set to true, |
||||
algorithm will sort by descending order, otherwise sort by |
||||
ascending order. Default: True |
||||
topk_mask (Tensor, bool|None): shape[B, n, topk], ignore bbox mask, |
||||
Default: None |
||||
eps (float): Default: 1e-9 |
||||
Returns: |
||||
is_in_topk (Tensor, float32): shape[B, n, L], value=1. means selected |
||||
""" |
||||
num_anchors = metrics.shape[-1] |
||||
topk_metrics, topk_idxs = paddle.topk( |
||||
metrics, topk, axis=-1, largest=largest) |
||||
if topk_mask is None: |
||||
topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > eps).tile( |
||||
[1, 1, topk]) |
||||
topk_idxs = paddle.where(topk_mask, topk_idxs, |
||||
paddle.zeros_like(topk_idxs)) |
||||
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2) |
||||
is_in_topk = paddle.where(is_in_topk > 1, |
||||
paddle.zeros_like(is_in_topk), is_in_topk) |
||||
return is_in_topk.astype(metrics.dtype) |
||||
|
||||
|
||||
def check_points_inside_bboxes(points, bboxes, eps=1e-9): |
||||
r""" |
||||
Args: |
||||
points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors |
||||
bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format |
||||
eps (float): Default: 1e-9 |
||||
Returns: |
||||
is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected |
||||
""" |
||||
points = points.unsqueeze([0, 1]) |
||||
x, y = points.chunk(2, axis=-1) |
||||
xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1) |
||||
l = x - xmin |
||||
t = y - ymin |
||||
r = xmax - x |
||||
b = ymax - y |
||||
bbox_ltrb = paddle.concat([l, t, r, b], axis=-1) |
||||
return (bbox_ltrb.min(axis=-1) > eps).astype(bboxes.dtype) |
||||
|
||||
|
||||
def compute_max_iou_anchor(ious): |
||||
r""" |
||||
For each anchor, find the GT with the largest IOU. |
||||
Args: |
||||
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors |
||||
Returns: |
||||
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected |
||||
""" |
||||
num_max_boxes = ious.shape[-2] |
||||
max_iou_index = ious.argmax(axis=-2) |
||||
is_max_iou = F.one_hot(max_iou_index, num_max_boxes).transpose([0, 2, 1]) |
||||
return is_max_iou.astype(ious.dtype) |
||||
|
||||
|
||||
def compute_max_iou_gt(ious): |
||||
r""" |
||||
For each GT, find the anchor with the largest IOU. |
||||
Args: |
||||
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors |
||||
Returns: |
||||
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected |
||||
""" |
||||
num_anchors = ious.shape[-1] |
||||
max_iou_index = ious.argmax(axis=-1) |
||||
is_max_iou = F.one_hot(max_iou_index, num_anchors) |
||||
return is_max_iou.astype(ious.dtype) |
||||
|
||||
|
||||
def generate_anchors_for_grid_cell(feats, |
||||
fpn_strides, |
||||
grid_cell_size=5.0, |
||||
grid_cell_offset=0.5): |
||||
r""" |
||||
Like ATSS, generate anchors based on grid size. |
||||
Args: |
||||
feats (List[Tensor]): shape[s, (b, c, h, w)] |
||||
fpn_strides (tuple|list): shape[s], stride for each scale feature |
||||
grid_cell_size (float): anchor size |
||||
grid_cell_offset (float): The range is between 0 and 1. |
||||
Returns: |
||||
anchors (List[Tensor]): shape[s, (l, 4)] |
||||
num_anchors_list (List[int]): shape[s] |
||||
stride_tensor_list (List[Tensor]): shape[s, (l, 1)] |
||||
""" |
||||
assert len(feats) == len(fpn_strides) |
||||
anchors = [] |
||||
num_anchors_list = [] |
||||
stride_tensor_list = [] |
||||
for feat, stride in zip(feats, fpn_strides): |
||||
_, _, h, w = feat.shape |
||||
cell_half_size = grid_cell_size * stride * 0.5 |
||||
shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride |
||||
shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride |
||||
shift_y, shift_x = paddle.meshgrid(shift_y, shift_x) |
||||
anchor = paddle.stack( |
||||
[ |
||||
shift_x - cell_half_size, shift_y - cell_half_size, |
||||
shift_x + cell_half_size, shift_y + cell_half_size |
||||
], |
||||
axis=-1).astype(feat.dtype) |
||||
anchors.append(anchor.reshape([-1, 4])) |
||||
num_anchors_list.append(len(anchors[-1])) |
||||
stride_tensor_list.append( |
||||
paddle.full([num_anchors_list[-1], 1], stride)) |
||||
return anchors, num_anchors_list, stride_tensor_list |
@ -0,0 +1,49 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from . import vgg |
||||
from . import resnet |
||||
from . import darknet |
||||
from . import mobilenet_v1 |
||||
from . import mobilenet_v3 |
||||
from . import hrnet |
||||
from . import lite_hrnet |
||||
from . import blazenet |
||||
from . import ghostnet |
||||
from . import senet |
||||
from . import res2net |
||||
from . import dla |
||||
from . import shufflenet_v2 |
||||
from . import swin_transformer |
||||
from . import lcnet |
||||
from . import hardnet |
||||
from . import esnet |
||||
|
||||
from .vgg import * |
||||
from .resnet import * |
||||
from .darknet import * |
||||
from .mobilenet_v1 import * |
||||
from .mobilenet_v3 import * |
||||
from .hrnet import * |
||||
from .lite_hrnet import * |
||||
from .blazenet import * |
||||
from .ghostnet import * |
||||
from .senet import * |
||||
from .res2net import * |
||||
from .dla import * |
||||
from .shufflenet_v2 import * |
||||
from .swin_transformer import * |
||||
from .lcnet import * |
||||
from .hardnet import * |
||||
from .esnet import * |
@ -0,0 +1,320 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.nn.initializer import KaimingNormal |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['BlazeNet'] |
||||
|
||||
|
||||
def hard_swish(x): |
||||
return x * F.relu6(x + 3) / 6. |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride, |
||||
padding, |
||||
num_groups=1, |
||||
act='relu', |
||||
conv_lr=0.1, |
||||
conv_decay=0., |
||||
norm_decay=0., |
||||
norm_type='bn', |
||||
name=None): |
||||
super(ConvBNLayer, self).__init__() |
||||
self.act = act |
||||
self._conv = nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=num_groups, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=conv_lr, initializer=KaimingNormal()), |
||||
bias_attr=False) |
||||
|
||||
if norm_type in ['bn', 'sync_bn']: |
||||
self._batch_norm = nn.BatchNorm2D(out_channels) |
||||
|
||||
def forward(self, x): |
||||
x = self._conv(x) |
||||
x = self._batch_norm(x) |
||||
if self.act == "relu": |
||||
x = F.relu(x) |
||||
elif self.act == "relu6": |
||||
x = F.relu6(x) |
||||
elif self.act == 'leaky': |
||||
x = F.leaky_relu(x) |
||||
elif self.act == 'hard_swish': |
||||
x = hard_swish(x) |
||||
return x |
||||
|
||||
|
||||
class BlazeBlock(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels1, |
||||
out_channels2, |
||||
double_channels=None, |
||||
stride=1, |
||||
use_5x5kernel=True, |
||||
act='relu', |
||||
name=None): |
||||
super(BlazeBlock, self).__init__() |
||||
assert stride in [1, 2] |
||||
self.use_pool = not stride == 1 |
||||
self.use_double_block = double_channels is not None |
||||
self.conv_dw = [] |
||||
if use_5x5kernel: |
||||
self.conv_dw.append( |
||||
self.add_sublayer( |
||||
name + "1_dw", |
||||
ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels1, |
||||
kernel_size=5, |
||||
stride=stride, |
||||
padding=2, |
||||
num_groups=out_channels1, |
||||
name=name + "1_dw"))) |
||||
else: |
||||
self.conv_dw.append( |
||||
self.add_sublayer( |
||||
name + "1_dw_1", |
||||
ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels1, |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
num_groups=out_channels1, |
||||
name=name + "1_dw_1"))) |
||||
self.conv_dw.append( |
||||
self.add_sublayer( |
||||
name + "1_dw_2", |
||||
ConvBNLayer( |
||||
in_channels=out_channels1, |
||||
out_channels=out_channels1, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
num_groups=out_channels1, |
||||
name=name + "1_dw_2"))) |
||||
self.act = act if self.use_double_block else None |
||||
self.conv_pw = ConvBNLayer( |
||||
in_channels=out_channels1, |
||||
out_channels=out_channels2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
act=self.act, |
||||
name=name + "1_sep") |
||||
if self.use_double_block: |
||||
self.conv_dw2 = [] |
||||
if use_5x5kernel: |
||||
self.conv_dw2.append( |
||||
self.add_sublayer( |
||||
name + "2_dw", |
||||
ConvBNLayer( |
||||
in_channels=out_channels2, |
||||
out_channels=out_channels2, |
||||
kernel_size=5, |
||||
stride=1, |
||||
padding=2, |
||||
num_groups=out_channels2, |
||||
name=name + "2_dw"))) |
||||
else: |
||||
self.conv_dw2.append( |
||||
self.add_sublayer( |
||||
name + "2_dw_1", |
||||
ConvBNLayer( |
||||
in_channels=out_channels2, |
||||
out_channels=out_channels2, |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
num_groups=out_channels2, |
||||
name=name + "1_dw_1"))) |
||||
self.conv_dw2.append( |
||||
self.add_sublayer( |
||||
name + "2_dw_2", |
||||
ConvBNLayer( |
||||
in_channels=out_channels2, |
||||
out_channels=out_channels2, |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
num_groups=out_channels2, |
||||
name=name + "2_dw_2"))) |
||||
self.conv_pw2 = ConvBNLayer( |
||||
in_channels=out_channels2, |
||||
out_channels=double_channels, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
name=name + "2_sep") |
||||
# shortcut |
||||
if self.use_pool: |
||||
shortcut_channel = double_channels or out_channels2 |
||||
self._shortcut = [] |
||||
self._shortcut.append( |
||||
self.add_sublayer( |
||||
name + '_shortcut_pool', |
||||
nn.MaxPool2D( |
||||
kernel_size=stride, stride=stride, ceil_mode=True))) |
||||
self._shortcut.append( |
||||
self.add_sublayer( |
||||
name + '_shortcut_conv', |
||||
ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=shortcut_channel, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
name="shortcut" + name))) |
||||
|
||||
def forward(self, x): |
||||
y = x |
||||
for conv_dw_block in self.conv_dw: |
||||
y = conv_dw_block(y) |
||||
y = self.conv_pw(y) |
||||
if self.use_double_block: |
||||
for conv_dw2_block in self.conv_dw2: |
||||
y = conv_dw2_block(y) |
||||
y = self.conv_pw2(y) |
||||
if self.use_pool: |
||||
for shortcut in self._shortcut: |
||||
x = shortcut(x) |
||||
return F.relu(paddle.add(x, y)) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class BlazeNet(nn.Layer): |
||||
""" |
||||
BlazeFace, see https://arxiv.org/abs/1907.05047 |
||||
|
||||
Args: |
||||
blaze_filters (list): number of filter for each blaze block. |
||||
double_blaze_filters (list): number of filter for each double_blaze block. |
||||
use_5x5kernel (bool): whether or not filter size is 5x5 in depth-wise conv. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], |
||||
[48, 48]], |
||||
double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], |
||||
[96, 24, 96], [96, 24, 96, 2], |
||||
[96, 24, 96], [96, 24, 96]], |
||||
use_5x5kernel=True, |
||||
act=None): |
||||
super(BlazeNet, self).__init__() |
||||
conv1_num_filters = blaze_filters[0][0] |
||||
self.conv1 = ConvBNLayer( |
||||
in_channels=3, |
||||
out_channels=conv1_num_filters, |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
name="conv1") |
||||
in_channels = conv1_num_filters |
||||
self.blaze_block = [] |
||||
self._out_channels = [] |
||||
for k, v in enumerate(blaze_filters): |
||||
assert len(v) in [2, 3], \ |
||||
"blaze_filters {} not in [2, 3]" |
||||
if len(v) == 2: |
||||
self.blaze_block.append( |
||||
self.add_sublayer( |
||||
'blaze_{}'.format(k), |
||||
BlazeBlock( |
||||
in_channels, |
||||
v[0], |
||||
v[1], |
||||
use_5x5kernel=use_5x5kernel, |
||||
act=act, |
||||
name='blaze_{}'.format(k)))) |
||||
elif len(v) == 3: |
||||
self.blaze_block.append( |
||||
self.add_sublayer( |
||||
'blaze_{}'.format(k), |
||||
BlazeBlock( |
||||
in_channels, |
||||
v[0], |
||||
v[1], |
||||
stride=v[2], |
||||
use_5x5kernel=use_5x5kernel, |
||||
act=act, |
||||
name='blaze_{}'.format(k)))) |
||||
in_channels = v[1] |
||||
|
||||
for k, v in enumerate(double_blaze_filters): |
||||
assert len(v) in [3, 4], \ |
||||
"blaze_filters {} not in [3, 4]" |
||||
if len(v) == 3: |
||||
self.blaze_block.append( |
||||
self.add_sublayer( |
||||
'double_blaze_{}'.format(k), |
||||
BlazeBlock( |
||||
in_channels, |
||||
v[0], |
||||
v[1], |
||||
double_channels=v[2], |
||||
use_5x5kernel=use_5x5kernel, |
||||
act=act, |
||||
name='double_blaze_{}'.format(k)))) |
||||
elif len(v) == 4: |
||||
self.blaze_block.append( |
||||
self.add_sublayer( |
||||
'double_blaze_{}'.format(k), |
||||
BlazeBlock( |
||||
in_channels, |
||||
v[0], |
||||
v[1], |
||||
double_channels=v[2], |
||||
stride=v[3], |
||||
use_5x5kernel=use_5x5kernel, |
||||
act=act, |
||||
name='double_blaze_{}'.format(k)))) |
||||
in_channels = v[2] |
||||
self._out_channels.append(in_channels) |
||||
|
||||
def forward(self, inputs): |
||||
outs = [] |
||||
y = self.conv1(inputs['image']) |
||||
for block in self.blaze_block: |
||||
y = block(y) |
||||
outs.append(y) |
||||
return [outs[-4], outs[-1]] |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ |
||||
ShapeSpec(channels=c) |
||||
for c in [self._out_channels[-4], self._out_channels[-1]] |
||||
] |
@ -0,0 +1,340 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddlers.models.ppdet.modeling.ops import batch_norm, mish |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['DarkNet', 'ConvBNLayer'] |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=3, |
||||
stride=1, |
||||
groups=1, |
||||
padding=0, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
act="leaky", |
||||
freeze_norm=False, |
||||
data_format='NCHW', |
||||
name=''): |
||||
""" |
||||
conv + bn + activation layer |
||||
|
||||
Args: |
||||
ch_in (int): input channel |
||||
ch_out (int): output channel |
||||
filter_size (int): filter size, default 3 |
||||
stride (int): stride, default 1 |
||||
groups (int): number of groups of conv layer, default 1 |
||||
padding (int): padding size, default 0 |
||||
norm_type (str): batch norm type, default bn |
||||
norm_decay (str): decay for weight and bias of batch norm layer, default 0. |
||||
act (str): activation function type, default 'leaky', which means leaky_relu |
||||
freeze_norm (bool): whether to freeze norm, default False |
||||
data_format (str): data format, NCHW or NHWC |
||||
""" |
||||
super(ConvBNLayer, self).__init__() |
||||
|
||||
self.conv = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=groups, |
||||
data_format=data_format, |
||||
bias_attr=False) |
||||
self.batch_norm = batch_norm( |
||||
ch_out, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
self.act = act |
||||
|
||||
def forward(self, inputs): |
||||
out = self.conv(inputs) |
||||
out = self.batch_norm(out) |
||||
if self.act == 'leaky': |
||||
out = F.leaky_relu(out, 0.1) |
||||
elif self.act == 'mish': |
||||
out = mish(out) |
||||
return out |
||||
|
||||
|
||||
class DownSample(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
data_format='NCHW'): |
||||
""" |
||||
downsample layer |
||||
|
||||
Args: |
||||
ch_in (int): input channel |
||||
ch_out (int): output channel |
||||
filter_size (int): filter size, default 3 |
||||
stride (int): stride, default 2 |
||||
padding (int): padding size, default 1 |
||||
norm_type (str): batch norm type, default bn |
||||
norm_decay (str): decay for weight and bias of batch norm layer, default 0. |
||||
freeze_norm (bool): whether to freeze norm, default False |
||||
data_format (str): data format, NCHW or NHWC |
||||
""" |
||||
|
||||
super(DownSample, self).__init__() |
||||
|
||||
self.conv_bn_layer = ConvBNLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=filter_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
self.ch_out = ch_out |
||||
|
||||
def forward(self, inputs): |
||||
out = self.conv_bn_layer(inputs) |
||||
return out |
||||
|
||||
|
||||
class BasicBlock(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
data_format='NCHW'): |
||||
""" |
||||
BasicBlock layer of DarkNet |
||||
|
||||
Args: |
||||
ch_in (int): input channel |
||||
ch_out (int): output channel |
||||
norm_type (str): batch norm type, default bn |
||||
norm_decay (str): decay for weight and bias of batch norm layer, default 0. |
||||
freeze_norm (bool): whether to freeze norm, default False |
||||
data_format (str): data format, NCHW or NHWC |
||||
""" |
||||
|
||||
super(BasicBlock, self).__init__() |
||||
|
||||
self.conv1 = ConvBNLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
self.conv2 = ConvBNLayer( |
||||
ch_in=ch_out, |
||||
ch_out=ch_out * 2, |
||||
filter_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
|
||||
def forward(self, inputs): |
||||
conv1 = self.conv1(inputs) |
||||
conv2 = self.conv2(conv1) |
||||
out = paddle.add(x=inputs, y=conv2) |
||||
return out |
||||
|
||||
|
||||
class Blocks(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
count, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
name=None, |
||||
data_format='NCHW'): |
||||
""" |
||||
Blocks layer, which consist of some BaickBlock layers |
||||
|
||||
Args: |
||||
ch_in (int): input channel |
||||
ch_out (int): output channel |
||||
count (int): number of BasicBlock layer |
||||
norm_type (str): batch norm type, default bn |
||||
norm_decay (str): decay for weight and bias of batch norm layer, default 0. |
||||
freeze_norm (bool): whether to freeze norm, default False |
||||
name (str): layer name |
||||
data_format (str): data format, NCHW or NHWC |
||||
""" |
||||
super(Blocks, self).__init__() |
||||
|
||||
self.basicblock0 = BasicBlock( |
||||
ch_in, |
||||
ch_out, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
self.res_out_list = [] |
||||
for i in range(1, count): |
||||
block_name = '{}.{}'.format(name, i) |
||||
res_out = self.add_sublayer( |
||||
block_name, |
||||
BasicBlock( |
||||
ch_out * 2, |
||||
ch_out, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format)) |
||||
self.res_out_list.append(res_out) |
||||
self.ch_out = ch_out |
||||
|
||||
def forward(self, inputs): |
||||
y = self.basicblock0(inputs) |
||||
for basic_block_i in self.res_out_list: |
||||
y = basic_block_i(y) |
||||
return y |
||||
|
||||
|
||||
DarkNet_cfg = {53: ([1, 2, 8, 8, 4])} |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class DarkNet(nn.Layer): |
||||
__shared__ = ['norm_type', 'data_format'] |
||||
|
||||
def __init__(self, |
||||
depth=53, |
||||
freeze_at=-1, |
||||
return_idx=[2, 3, 4], |
||||
num_stages=5, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
data_format='NCHW'): |
||||
""" |
||||
Darknet, see https://pjreddie.com/darknet/yolo/ |
||||
|
||||
Args: |
||||
depth (int): depth of network |
||||
freeze_at (int): freeze the backbone at which stage |
||||
filter_size (int): filter size, default 3 |
||||
return_idx (list): index of stages whose feature maps are returned |
||||
norm_type (str): batch norm type, default bn |
||||
norm_decay (str): decay for weight and bias of batch norm layer, default 0. |
||||
data_format (str): data format, NCHW or NHWC |
||||
""" |
||||
super(DarkNet, self).__init__() |
||||
self.depth = depth |
||||
self.freeze_at = freeze_at |
||||
self.return_idx = return_idx |
||||
self.num_stages = num_stages |
||||
self.stages = DarkNet_cfg[self.depth][0:num_stages] |
||||
|
||||
self.conv0 = ConvBNLayer( |
||||
ch_in=3, |
||||
ch_out=32, |
||||
filter_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
|
||||
self.downsample0 = DownSample( |
||||
ch_in=32, |
||||
ch_out=32 * 2, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format) |
||||
|
||||
self._out_channels = [] |
||||
self.darknet_conv_block_list = [] |
||||
self.downsample_list = [] |
||||
ch_in = [64, 128, 256, 512, 1024] |
||||
for i, stage in enumerate(self.stages): |
||||
name = 'stage.{}'.format(i) |
||||
conv_block = self.add_sublayer( |
||||
name, |
||||
Blocks( |
||||
int(ch_in[i]), |
||||
32 * (2**i), |
||||
stage, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format, |
||||
name=name)) |
||||
self.darknet_conv_block_list.append(conv_block) |
||||
if i in return_idx: |
||||
self._out_channels.append(64 * (2**i)) |
||||
for i in range(num_stages - 1): |
||||
down_name = 'stage.{}.downsample'.format(i) |
||||
downsample = self.add_sublayer( |
||||
down_name, |
||||
DownSample( |
||||
ch_in=32 * (2**(i + 1)), |
||||
ch_out=32 * (2**(i + 2)), |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
data_format=data_format)) |
||||
self.downsample_list.append(downsample) |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
|
||||
out = self.conv0(x) |
||||
out = self.downsample0(out) |
||||
blocks = [] |
||||
for i, conv_block_i in enumerate(self.darknet_conv_block_list): |
||||
out = conv_block_i(out) |
||||
if i == self.freeze_at: |
||||
out.stop_gradient = True |
||||
if i in self.return_idx: |
||||
blocks.append(out) |
||||
if i < self.num_stages - 1: |
||||
out = self.downsample_list[i](out) |
||||
return blocks |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
@ -0,0 +1,244 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddlers.models.ppdet.modeling.layers import ConvNormLayer |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512])} |
||||
|
||||
|
||||
class BasicBlock(nn.Layer): |
||||
def __init__(self, ch_in, ch_out, stride=1): |
||||
super(BasicBlock, self).__init__() |
||||
self.conv1 = ConvNormLayer( |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=3, |
||||
stride=stride, |
||||
bias_on=False, |
||||
norm_decay=None) |
||||
self.conv2 = ConvNormLayer( |
||||
ch_out, |
||||
ch_out, |
||||
filter_size=3, |
||||
stride=1, |
||||
bias_on=False, |
||||
norm_decay=None) |
||||
|
||||
def forward(self, inputs, residual=None): |
||||
if residual is None: |
||||
residual = inputs |
||||
|
||||
out = self.conv1(inputs) |
||||
out = F.relu(out) |
||||
|
||||
out = self.conv2(out) |
||||
|
||||
out = paddle.add(x=out, y=residual) |
||||
out = F.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class Root(nn.Layer): |
||||
def __init__(self, ch_in, ch_out, kernel_size, residual): |
||||
super(Root, self).__init__() |
||||
self.conv = ConvNormLayer( |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
bias_on=False, |
||||
norm_decay=None) |
||||
self.residual = residual |
||||
|
||||
def forward(self, inputs): |
||||
children = inputs |
||||
out = self.conv(paddle.concat(inputs, axis=1)) |
||||
if self.residual: |
||||
out = paddle.add(x=out, y=children[0]) |
||||
out = F.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class Tree(nn.Layer): |
||||
def __init__(self, |
||||
level, |
||||
block, |
||||
ch_in, |
||||
ch_out, |
||||
stride=1, |
||||
level_root=False, |
||||
root_dim=0, |
||||
root_kernel_size=1, |
||||
root_residual=False): |
||||
super(Tree, self).__init__() |
||||
if root_dim == 0: |
||||
root_dim = 2 * ch_out |
||||
if level_root: |
||||
root_dim += ch_in |
||||
if level == 1: |
||||
self.tree1 = block(ch_in, ch_out, stride) |
||||
self.tree2 = block(ch_out, ch_out, 1) |
||||
else: |
||||
self.tree1 = Tree( |
||||
level - 1, |
||||
block, |
||||
ch_in, |
||||
ch_out, |
||||
stride, |
||||
root_dim=0, |
||||
root_kernel_size=root_kernel_size, |
||||
root_residual=root_residual) |
||||
self.tree2 = Tree( |
||||
level - 1, |
||||
block, |
||||
ch_out, |
||||
ch_out, |
||||
1, |
||||
root_dim=root_dim + ch_out, |
||||
root_kernel_size=root_kernel_size, |
||||
root_residual=root_residual) |
||||
|
||||
if level == 1: |
||||
self.root = Root(root_dim, ch_out, root_kernel_size, root_residual) |
||||
self.level_root = level_root |
||||
self.root_dim = root_dim |
||||
self.downsample = None |
||||
self.project = None |
||||
self.level = level |
||||
if stride > 1: |
||||
self.downsample = nn.MaxPool2D(stride, stride=stride) |
||||
if ch_in != ch_out: |
||||
self.project = ConvNormLayer( |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
bias_on=False, |
||||
norm_decay=None) |
||||
|
||||
def forward(self, x, residual=None, children=None): |
||||
children = [] if children is None else children |
||||
bottom = self.downsample(x) if self.downsample else x |
||||
residual = self.project(bottom) if self.project else bottom |
||||
if self.level_root: |
||||
children.append(bottom) |
||||
x1 = self.tree1(x, residual) |
||||
if self.level == 1: |
||||
x2 = self.tree2(x1) |
||||
x = self.root([x2, x1] + children) |
||||
else: |
||||
children.append(x1) |
||||
x = self.tree2(x1, children=children) |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class DLA(nn.Layer): |
||||
""" |
||||
DLA, see https://arxiv.org/pdf/1707.06484.pdf |
||||
|
||||
Args: |
||||
depth (int): DLA depth, should be 34. |
||||
residual_root (bool): whether use a reidual layer in the root block |
||||
|
||||
""" |
||||
|
||||
def __init__(self, depth=34, residual_root=False): |
||||
super(DLA, self).__init__() |
||||
levels, channels = DLA_cfg[depth] |
||||
if depth == 34: |
||||
block = BasicBlock |
||||
self.channels = channels |
||||
self.base_layer = nn.Sequential( |
||||
ConvNormLayer( |
||||
3, |
||||
channels[0], |
||||
filter_size=7, |
||||
stride=1, |
||||
bias_on=False, |
||||
norm_decay=None), |
||||
nn.ReLU()) |
||||
self.level0 = self._make_conv_level(channels[0], channels[0], |
||||
levels[0]) |
||||
self.level1 = self._make_conv_level( |
||||
channels[0], channels[1], levels[1], stride=2) |
||||
self.level2 = Tree( |
||||
levels[2], |
||||
block, |
||||
channels[1], |
||||
channels[2], |
||||
2, |
||||
level_root=False, |
||||
root_residual=residual_root) |
||||
self.level3 = Tree( |
||||
levels[3], |
||||
block, |
||||
channels[2], |
||||
channels[3], |
||||
2, |
||||
level_root=True, |
||||
root_residual=residual_root) |
||||
self.level4 = Tree( |
||||
levels[4], |
||||
block, |
||||
channels[3], |
||||
channels[4], |
||||
2, |
||||
level_root=True, |
||||
root_residual=residual_root) |
||||
self.level5 = Tree( |
||||
levels[5], |
||||
block, |
||||
channels[4], |
||||
channels[5], |
||||
2, |
||||
level_root=True, |
||||
root_residual=residual_root) |
||||
|
||||
def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1): |
||||
modules = [] |
||||
for i in range(conv_num): |
||||
modules.extend([ |
||||
ConvNormLayer( |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=3, |
||||
stride=stride if i == 0 else 1, |
||||
bias_on=False, |
||||
norm_decay=None), nn.ReLU() |
||||
]) |
||||
ch_in = ch_out |
||||
return nn.Sequential(*modules) |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=self.channels[i]) for i in range(6)] |
||||
|
||||
def forward(self, inputs): |
||||
outs = [] |
||||
im = inputs['image'] |
||||
feats = self.base_layer(im) |
||||
for i in range(6): |
||||
feats = getattr(self, 'level{}'.format(i))(feats) |
||||
outs.append(feats) |
||||
|
||||
return outs |
@ -0,0 +1,290 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D |
||||
from paddle.nn.initializer import KaimingNormal |
||||
from paddle.regularizer import L2Decay |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from numbers import Integral |
||||
from ..shape_spec import ShapeSpec |
||||
from paddlers.models.ppdet.modeling.ops import channel_shuffle |
||||
from paddlers.models.ppdet.modeling.backbones.shufflenet_v2 import ConvBNLayer |
||||
|
||||
__all__ = ['ESNet'] |
||||
|
||||
|
||||
def make_divisible(v, divisor=16, min_value=None): |
||||
if min_value is None: |
||||
min_value = divisor |
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) |
||||
if new_v < 0.9 * v: |
||||
new_v += divisor |
||||
return new_v |
||||
|
||||
|
||||
class SEModule(nn.Layer): |
||||
def __init__(self, channel, reduction=4): |
||||
super(SEModule, self).__init__() |
||||
self.avg_pool = AdaptiveAvgPool2D(1) |
||||
self.conv1 = Conv2D( |
||||
in_channels=channel, |
||||
out_channels=channel // reduction, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
weight_attr=ParamAttr(), |
||||
bias_attr=ParamAttr()) |
||||
self.conv2 = Conv2D( |
||||
in_channels=channel // reduction, |
||||
out_channels=channel, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
weight_attr=ParamAttr(), |
||||
bias_attr=ParamAttr()) |
||||
|
||||
def forward(self, inputs): |
||||
outputs = self.avg_pool(inputs) |
||||
outputs = self.conv1(outputs) |
||||
outputs = F.relu(outputs) |
||||
outputs = self.conv2(outputs) |
||||
outputs = F.hardsigmoid(outputs) |
||||
return paddle.multiply(x=inputs, y=outputs) |
||||
|
||||
|
||||
class InvertedResidual(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
mid_channels, |
||||
out_channels, |
||||
stride, |
||||
act="relu"): |
||||
super(InvertedResidual, self).__init__() |
||||
self._conv_pw = ConvBNLayer( |
||||
in_channels=in_channels // 2, |
||||
out_channels=mid_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
self._conv_dw = ConvBNLayer( |
||||
in_channels=mid_channels // 2, |
||||
out_channels=mid_channels // 2, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=mid_channels // 2, |
||||
act=None) |
||||
self._se = SEModule(mid_channels) |
||||
|
||||
self._conv_linear = ConvBNLayer( |
||||
in_channels=mid_channels, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
|
||||
def forward(self, inputs): |
||||
x1, x2 = paddle.split( |
||||
inputs, |
||||
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], |
||||
axis=1) |
||||
x2 = self._conv_pw(x2) |
||||
x3 = self._conv_dw(x2) |
||||
x3 = paddle.concat([x2, x3], axis=1) |
||||
x3 = self._se(x3) |
||||
x3 = self._conv_linear(x3) |
||||
out = paddle.concat([x1, x3], axis=1) |
||||
return channel_shuffle(out, 2) |
||||
|
||||
|
||||
class InvertedResidualDS(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
mid_channels, |
||||
out_channels, |
||||
stride, |
||||
act="relu"): |
||||
super(InvertedResidualDS, self).__init__() |
||||
|
||||
# branch1 |
||||
self._conv_dw_1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=in_channels, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=in_channels, |
||||
act=None) |
||||
self._conv_linear_1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
# branch2 |
||||
self._conv_pw_2 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=mid_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
self._conv_dw_2 = ConvBNLayer( |
||||
in_channels=mid_channels // 2, |
||||
out_channels=mid_channels // 2, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=mid_channels // 2, |
||||
act=None) |
||||
self._se = SEModule(mid_channels // 2) |
||||
self._conv_linear_2 = ConvBNLayer( |
||||
in_channels=mid_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
self._conv_dw_mv1 = ConvBNLayer( |
||||
in_channels=out_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
groups=out_channels, |
||||
act="hard_swish") |
||||
self._conv_pw_mv1 = ConvBNLayer( |
||||
in_channels=out_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act="hard_swish") |
||||
|
||||
def forward(self, inputs): |
||||
x1 = self._conv_dw_1(inputs) |
||||
x1 = self._conv_linear_1(x1) |
||||
x2 = self._conv_pw_2(inputs) |
||||
x2 = self._conv_dw_2(x2) |
||||
x2 = self._se(x2) |
||||
x2 = self._conv_linear_2(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
out = self._conv_dw_mv1(out) |
||||
out = self._conv_pw_mv1(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class ESNet(nn.Layer): |
||||
def __init__(self, |
||||
scale=1.0, |
||||
act="hard_swish", |
||||
feature_maps=[4, 11, 14], |
||||
channel_ratio=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]): |
||||
super(ESNet, self).__init__() |
||||
self.scale = scale |
||||
if isinstance(feature_maps, Integral): |
||||
feature_maps = [feature_maps] |
||||
self.feature_maps = feature_maps |
||||
stage_repeats = [3, 7, 3] |
||||
|
||||
stage_out_channels = [ |
||||
-1, 24, make_divisible(128 * scale), make_divisible(256 * scale), |
||||
make_divisible(512 * scale), 1024 |
||||
] |
||||
|
||||
self._out_channels = [] |
||||
self._feature_idx = 0 |
||||
# 1. conv1 |
||||
self._conv1 = ConvBNLayer( |
||||
in_channels=3, |
||||
out_channels=stage_out_channels[1], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
act=act) |
||||
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) |
||||
self._feature_idx += 1 |
||||
|
||||
# 2. bottleneck sequences |
||||
self._block_list = [] |
||||
arch_idx = 0 |
||||
for stage_id, num_repeat in enumerate(stage_repeats): |
||||
for i in range(num_repeat): |
||||
channels_scales = channel_ratio[arch_idx] |
||||
mid_c = make_divisible( |
||||
int(stage_out_channels[stage_id + 2] * channels_scales), |
||||
divisor=8) |
||||
if i == 0: |
||||
block = self.add_sublayer( |
||||
name=str(stage_id + 2) + '_' + str(i + 1), |
||||
sublayer=InvertedResidualDS( |
||||
in_channels=stage_out_channels[stage_id + 1], |
||||
mid_channels=mid_c, |
||||
out_channels=stage_out_channels[stage_id + 2], |
||||
stride=2, |
||||
act=act)) |
||||
else: |
||||
block = self.add_sublayer( |
||||
name=str(stage_id + 2) + '_' + str(i + 1), |
||||
sublayer=InvertedResidual( |
||||
in_channels=stage_out_channels[stage_id + 2], |
||||
mid_channels=mid_c, |
||||
out_channels=stage_out_channels[stage_id + 2], |
||||
stride=1, |
||||
act=act)) |
||||
self._block_list.append(block) |
||||
arch_idx += 1 |
||||
self._feature_idx += 1 |
||||
self._update_out_channels(stage_out_channels[stage_id + 2], |
||||
self._feature_idx, self.feature_maps) |
||||
|
||||
def _update_out_channels(self, channel, feature_idx, feature_maps): |
||||
if feature_idx in feature_maps: |
||||
self._out_channels.append(channel) |
||||
|
||||
def forward(self, inputs): |
||||
y = self._conv1(inputs['image']) |
||||
y = self._max_pool(y) |
||||
outs = [] |
||||
for i, inv in enumerate(self._block_list): |
||||
y = inv(y) |
||||
if i + 2 in self.feature_maps: |
||||
outs.append(y) |
||||
|
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
@ -0,0 +1,470 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import math |
||||
import paddle |
||||
from paddle import ParamAttr |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle.nn import AdaptiveAvgPool2D, Linear |
||||
from paddle.nn.initializer import Uniform |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from numbers import Integral |
||||
from ..shape_spec import ShapeSpec |
||||
from .mobilenet_v3 import make_divisible, ConvBNLayer |
||||
|
||||
__all__ = ['GhostNet'] |
||||
|
||||
|
||||
class ExtraBlockDW(nn.Layer): |
||||
def __init__(self, |
||||
in_c, |
||||
ch_1, |
||||
ch_2, |
||||
stride, |
||||
lr_mult, |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
name=None): |
||||
super(ExtraBlockDW, self).__init__() |
||||
self.pointwise_conv = ConvBNLayer( |
||||
in_c=in_c, |
||||
out_c=ch_1, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
act='relu6', |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_extra1") |
||||
self.depthwise_conv = ConvBNLayer( |
||||
in_c=ch_1, |
||||
out_c=ch_2, |
||||
filter_size=3, |
||||
stride=stride, |
||||
padding=1, # |
||||
num_groups=int(ch_1), |
||||
act='relu6', |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_extra2_dw") |
||||
self.normal_conv = ConvBNLayer( |
||||
in_c=ch_2, |
||||
out_c=ch_2, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
act='relu6', |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_extra2_sep") |
||||
|
||||
def forward(self, inputs): |
||||
x = self.pointwise_conv(inputs) |
||||
x = self.depthwise_conv(x) |
||||
x = self.normal_conv(x) |
||||
return x |
||||
|
||||
|
||||
class SEBlock(nn.Layer): |
||||
def __init__(self, num_channels, lr_mult, reduction_ratio=4, name=None): |
||||
super(SEBlock, self).__init__() |
||||
self.pool2d_gap = AdaptiveAvgPool2D(1) |
||||
self._num_channels = num_channels |
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0) |
||||
med_ch = num_channels // reduction_ratio |
||||
self.squeeze = Linear( |
||||
num_channels, |
||||
med_ch, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)), |
||||
bias_attr=ParamAttr(learning_rate=lr_mult)) |
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0) |
||||
self.excitation = Linear( |
||||
med_ch, |
||||
num_channels, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)), |
||||
bias_attr=ParamAttr(learning_rate=lr_mult)) |
||||
|
||||
def forward(self, inputs): |
||||
pool = self.pool2d_gap(inputs) |
||||
pool = paddle.squeeze(pool, axis=[2, 3]) |
||||
squeeze = self.squeeze(pool) |
||||
squeeze = F.relu(squeeze) |
||||
excitation = self.excitation(squeeze) |
||||
excitation = paddle.clip(x=excitation, min=0, max=1) |
||||
excitation = paddle.unsqueeze(excitation, axis=[2, 3]) |
||||
out = paddle.multiply(inputs, excitation) |
||||
return out |
||||
|
||||
|
||||
class GhostModule(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
output_channels, |
||||
kernel_size=1, |
||||
ratio=2, |
||||
dw_size=3, |
||||
stride=1, |
||||
relu=True, |
||||
lr_mult=1., |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
name=None): |
||||
super(GhostModule, self).__init__() |
||||
init_channels = int(math.ceil(output_channels / ratio)) |
||||
new_channels = int(init_channels * (ratio - 1)) |
||||
self.primary_conv = ConvBNLayer( |
||||
in_c=in_channels, |
||||
out_c=init_channels, |
||||
filter_size=kernel_size, |
||||
stride=stride, |
||||
padding=int((kernel_size - 1) // 2), |
||||
num_groups=1, |
||||
act="relu" if relu else None, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_primary_conv") |
||||
self.cheap_operation = ConvBNLayer( |
||||
in_c=init_channels, |
||||
out_c=new_channels, |
||||
filter_size=dw_size, |
||||
stride=1, |
||||
padding=int((dw_size - 1) // 2), |
||||
num_groups=init_channels, |
||||
act="relu" if relu else None, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_cheap_operation") |
||||
|
||||
def forward(self, inputs): |
||||
x = self.primary_conv(inputs) |
||||
y = self.cheap_operation(x) |
||||
out = paddle.concat([x, y], axis=1) |
||||
return out |
||||
|
||||
|
||||
class GhostBottleneck(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
hidden_dim, |
||||
output_channels, |
||||
kernel_size, |
||||
stride, |
||||
use_se, |
||||
lr_mult, |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
return_list=False, |
||||
name=None): |
||||
super(GhostBottleneck, self).__init__() |
||||
self._stride = stride |
||||
self._use_se = use_se |
||||
self._num_channels = in_channels |
||||
self._output_channels = output_channels |
||||
self.return_list = return_list |
||||
|
||||
self.ghost_module_1 = GhostModule( |
||||
in_channels=in_channels, |
||||
output_channels=hidden_dim, |
||||
kernel_size=1, |
||||
stride=1, |
||||
relu=True, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_ghost_module_1") |
||||
if stride == 2: |
||||
self.depthwise_conv = ConvBNLayer( |
||||
in_c=hidden_dim, |
||||
out_c=hidden_dim, |
||||
filter_size=kernel_size, |
||||
stride=stride, |
||||
padding=int((kernel_size - 1) // 2), |
||||
num_groups=hidden_dim, |
||||
act=None, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + |
||||
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. |
||||
) |
||||
if use_se: |
||||
self.se_block = SEBlock(hidden_dim, lr_mult, name=name + "_se") |
||||
self.ghost_module_2 = GhostModule( |
||||
in_channels=hidden_dim, |
||||
output_channels=output_channels, |
||||
kernel_size=1, |
||||
relu=False, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_ghost_module_2") |
||||
if stride != 1 or in_channels != output_channels: |
||||
self.shortcut_depthwise = ConvBNLayer( |
||||
in_c=in_channels, |
||||
out_c=in_channels, |
||||
filter_size=kernel_size, |
||||
stride=stride, |
||||
padding=int((kernel_size - 1) // 2), |
||||
num_groups=in_channels, |
||||
act=None, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + |
||||
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. |
||||
) |
||||
self.shortcut_conv = ConvBNLayer( |
||||
in_c=in_channels, |
||||
out_c=output_channels, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
num_groups=1, |
||||
act=None, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_shortcut_conv") |
||||
|
||||
def forward(self, inputs): |
||||
y = self.ghost_module_1(inputs) |
||||
x = y |
||||
if self._stride == 2: |
||||
x = self.depthwise_conv(x) |
||||
if self._use_se: |
||||
x = self.se_block(x) |
||||
x = self.ghost_module_2(x) |
||||
|
||||
if self._stride == 1 and self._num_channels == self._output_channels: |
||||
shortcut = inputs |
||||
else: |
||||
shortcut = self.shortcut_depthwise(inputs) |
||||
shortcut = self.shortcut_conv(shortcut) |
||||
x = paddle.add(x=x, y=shortcut) |
||||
|
||||
if self.return_list: |
||||
return [y, x] |
||||
else: |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class GhostNet(nn.Layer): |
||||
__shared__ = ['norm_type'] |
||||
|
||||
def __init__(self, |
||||
scale=1.3, |
||||
feature_maps=[6, 12, 15], |
||||
with_extra_blocks=False, |
||||
extra_block_filters=[[256, 512], [128, 256], [128, 256], |
||||
[64, 128]], |
||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0.0, |
||||
freeze_norm=False): |
||||
super(GhostNet, self).__init__() |
||||
if isinstance(feature_maps, Integral): |
||||
feature_maps = [feature_maps] |
||||
if norm_type == 'sync_bn' and freeze_norm: |
||||
raise ValueError( |
||||
"The norm_type should not be sync_bn when freeze_norm is True") |
||||
self.feature_maps = feature_maps |
||||
self.with_extra_blocks = with_extra_blocks |
||||
self.extra_block_filters = extra_block_filters |
||||
|
||||
inplanes = 16 |
||||
self.cfgs = [ |
||||
# k, t, c, SE, s |
||||
[3, 16, 16, 0, 1], |
||||
[3, 48, 24, 0, 2], |
||||
[3, 72, 24, 0, 1], |
||||
[5, 72, 40, 1, 2], |
||||
[5, 120, 40, 1, 1], |
||||
[3, 240, 80, 0, 2], |
||||
[3, 200, 80, 0, 1], |
||||
[3, 184, 80, 0, 1], |
||||
[3, 184, 80, 0, 1], |
||||
[3, 480, 112, 1, 1], |
||||
[3, 672, 112, 1, 1], |
||||
[5, 672, 160, 1, 2], # SSDLite output |
||||
[5, 960, 160, 0, 1], |
||||
[5, 960, 160, 1, 1], |
||||
[5, 960, 160, 0, 1], |
||||
[5, 960, 160, 1, 1] |
||||
] |
||||
self.scale = scale |
||||
conv1_out_ch = int(make_divisible(inplanes * self.scale, 4)) |
||||
self.conv1 = ConvBNLayer( |
||||
in_c=3, |
||||
out_c=conv1_out_ch, |
||||
filter_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
num_groups=1, |
||||
act="relu", |
||||
lr_mult=1., |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="conv1") |
||||
|
||||
# build inverted residual blocks |
||||
self._out_channels = [] |
||||
self.ghost_bottleneck_list = [] |
||||
idx = 0 |
||||
inplanes = conv1_out_ch |
||||
for k, exp_size, c, use_se, s in self.cfgs: |
||||
lr_idx = min(idx // 3, len(lr_mult_list) - 1) |
||||
lr_mult = lr_mult_list[lr_idx] |
||||
|
||||
# for SSD/SSDLite, first head input is after ResidualUnit expand_conv |
||||
return_list = self.with_extra_blocks and idx + 2 in self.feature_maps |
||||
|
||||
ghost_bottleneck = self.add_sublayer( |
||||
"_ghostbottleneck_" + str(idx), |
||||
sublayer=GhostBottleneck( |
||||
in_channels=inplanes, |
||||
hidden_dim=int(make_divisible(exp_size * self.scale, 4)), |
||||
output_channels=int(make_divisible(c * self.scale, 4)), |
||||
kernel_size=k, |
||||
stride=s, |
||||
use_se=use_se, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
return_list=return_list, |
||||
name="_ghostbottleneck_" + str(idx))) |
||||
self.ghost_bottleneck_list.append(ghost_bottleneck) |
||||
inplanes = int(make_divisible(c * self.scale, 4)) |
||||
idx += 1 |
||||
self._update_out_channels( |
||||
int(make_divisible(exp_size * self.scale, 4)) |
||||
if return_list else inplanes, idx + 1, feature_maps) |
||||
|
||||
if self.with_extra_blocks: |
||||
self.extra_block_list = [] |
||||
extra_out_c = int(make_divisible(self.scale * self.cfgs[-1][1], 4)) |
||||
lr_idx = min(idx // 3, len(lr_mult_list) - 1) |
||||
lr_mult = lr_mult_list[lr_idx] |
||||
|
||||
conv_extra = self.add_sublayer( |
||||
"conv" + str(idx + 2), |
||||
sublayer=ConvBNLayer( |
||||
in_c=inplanes, |
||||
out_c=extra_out_c, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
num_groups=1, |
||||
act="relu6", |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="conv" + str(idx + 2))) |
||||
self.extra_block_list.append(conv_extra) |
||||
idx += 1 |
||||
self._update_out_channels(extra_out_c, idx + 1, feature_maps) |
||||
|
||||
for j, block_filter in enumerate(self.extra_block_filters): |
||||
in_c = extra_out_c if j == 0 else self.extra_block_filters[ |
||||
j - 1][1] |
||||
conv_extra = self.add_sublayer( |
||||
"conv" + str(idx + 2), |
||||
sublayer=ExtraBlockDW( |
||||
in_c, |
||||
block_filter[0], |
||||
block_filter[1], |
||||
stride=2, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name='conv' + str(idx + 2))) |
||||
self.extra_block_list.append(conv_extra) |
||||
idx += 1 |
||||
self._update_out_channels(block_filter[1], idx + 1, |
||||
feature_maps) |
||||
|
||||
def _update_out_channels(self, channel, feature_idx, feature_maps): |
||||
if feature_idx in feature_maps: |
||||
self._out_channels.append(channel) |
||||
|
||||
def forward(self, inputs): |
||||
x = self.conv1(inputs['image']) |
||||
outs = [] |
||||
for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list): |
||||
x = ghost_bottleneck(x) |
||||
if idx + 2 in self.feature_maps: |
||||
if isinstance(x, list): |
||||
outs.append(x[0]) |
||||
x = x[1] |
||||
else: |
||||
outs.append(x) |
||||
|
||||
if not self.with_extra_blocks: |
||||
return outs |
||||
|
||||
for i, block in enumerate(self.extra_block_list): |
||||
idx = i + len(self.ghost_bottleneck_list) |
||||
x = block(x) |
||||
if idx + 2 in self.feature_maps: |
||||
outs.append(x) |
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
@ -0,0 +1,224 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['HarDNet'] |
||||
|
||||
|
||||
def ConvLayer(in_channels, |
||||
out_channels, |
||||
kernel_size=3, |
||||
stride=1, |
||||
bias_attr=False): |
||||
layer = nn.Sequential( |
||||
('conv', nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=kernel_size // 2, |
||||
groups=1, |
||||
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)), |
||||
('relu', nn.ReLU6())) |
||||
return layer |
||||
|
||||
|
||||
def DWConvLayer(in_channels, |
||||
out_channels, |
||||
kernel_size=3, |
||||
stride=1, |
||||
bias_attr=False): |
||||
layer = nn.Sequential( |
||||
('dwconv', nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=out_channels, |
||||
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels))) |
||||
return layer |
||||
|
||||
|
||||
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1): |
||||
layer = nn.Sequential( |
||||
('layer1', ConvLayer( |
||||
in_channels, out_channels, kernel_size=kernel_size)), |
||||
('layer2', DWConvLayer( |
||||
out_channels, out_channels, stride=stride))) |
||||
return layer |
||||
|
||||
|
||||
class HarDBlock(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
growth_rate, |
||||
grmul, |
||||
n_layers, |
||||
keepBase=False, |
||||
residual_out=False, |
||||
dwconv=False): |
||||
super().__init__() |
||||
self.keepBase = keepBase |
||||
self.links = [] |
||||
layers_ = [] |
||||
self.out_channels = 0 |
||||
for i in range(n_layers): |
||||
outch, inch, link = self.get_link(i + 1, in_channels, growth_rate, |
||||
grmul) |
||||
self.links.append(link) |
||||
if dwconv: |
||||
layers_.append(CombConvLayer(inch, outch)) |
||||
else: |
||||
layers_.append(ConvLayer(inch, outch)) |
||||
|
||||
if (i % 2 == 0) or (i == n_layers - 1): |
||||
self.out_channels += outch |
||||
self.layers = nn.LayerList(layers_) |
||||
|
||||
def get_out_ch(self): |
||||
return self.out_channels |
||||
|
||||
def get_link(self, layer, base_ch, growth_rate, grmul): |
||||
if layer == 0: |
||||
return base_ch, 0, [] |
||||
out_channels = growth_rate |
||||
|
||||
link = [] |
||||
for i in range(10): |
||||
dv = 2**i |
||||
if layer % dv == 0: |
||||
k = layer - dv |
||||
link.append(k) |
||||
if i > 0: |
||||
out_channels *= grmul |
||||
|
||||
out_channels = int(int(out_channels + 1) / 2) * 2 |
||||
in_channels = 0 |
||||
|
||||
for i in link: |
||||
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul) |
||||
in_channels += ch |
||||
|
||||
return out_channels, in_channels, link |
||||
|
||||
def forward(self, x): |
||||
layers_ = [x] |
||||
|
||||
for layer in range(len(self.layers)): |
||||
link = self.links[layer] |
||||
tin = [] |
||||
for i in link: |
||||
tin.append(layers_[i]) |
||||
if len(tin) > 1: |
||||
x = paddle.concat(tin, 1) |
||||
else: |
||||
x = tin[0] |
||||
out = self.layers[layer](x) |
||||
layers_.append(out) |
||||
|
||||
t = len(layers_) |
||||
out_ = [] |
||||
for i in range(t): |
||||
if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1): |
||||
out_.append(layers_[i]) |
||||
out = paddle.concat(out_, 1) |
||||
|
||||
return out |
||||
|
||||
|
||||
@register |
||||
class HarDNet(nn.Layer): |
||||
def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85): |
||||
super(HarDNet, self).__init__() |
||||
assert arch in [39, 68, 85], "HarDNet-{} not support.".format(arch) |
||||
if arch == 85: |
||||
first_ch = [48, 96] |
||||
second_kernel = 3 |
||||
ch_list = [192, 256, 320, 480, 720] |
||||
grmul = 1.7 |
||||
gr = [24, 24, 28, 36, 48] |
||||
n_layers = [8, 16, 16, 16, 16] |
||||
elif arch == 68: |
||||
first_ch = [32, 64] |
||||
second_kernel = 3 |
||||
ch_list = [128, 256, 320, 640] |
||||
grmul = 1.7 |
||||
gr = [14, 16, 20, 40] |
||||
n_layers = [8, 16, 16, 16] |
||||
|
||||
self.return_idx = return_idx |
||||
self._out_channels = [96, 214, 458, 784] |
||||
|
||||
avg_pool = True |
||||
if depth_wise: |
||||
second_kernel = 1 |
||||
avg_pool = False |
||||
|
||||
blks = len(n_layers) |
||||
self.base = nn.LayerList([]) |
||||
|
||||
# First Layer: Standard Conv3x3, Stride=2 |
||||
self.base.append( |
||||
ConvLayer( |
||||
in_channels=3, |
||||
out_channels=first_ch[0], |
||||
kernel_size=3, |
||||
stride=2, |
||||
bias_attr=False)) |
||||
|
||||
# Second Layer |
||||
self.base.append( |
||||
ConvLayer( |
||||
first_ch[0], first_ch[1], kernel_size=second_kernel)) |
||||
|
||||
# Avgpooling or DWConv3x3 downsampling |
||||
if avg_pool: |
||||
self.base.append(nn.AvgPool2D(kernel_size=3, stride=2, padding=1)) |
||||
else: |
||||
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2)) |
||||
|
||||
# Build all HarDNet blocks |
||||
ch = first_ch[1] |
||||
for i in range(blks): |
||||
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise) |
||||
ch = blk.out_channels |
||||
self.base.append(blk) |
||||
|
||||
if i != blks - 1: |
||||
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1)) |
||||
ch = ch_list[i] |
||||
if i == 0: |
||||
self.base.append( |
||||
nn.AvgPool2D( |
||||
kernel_size=2, stride=2, ceil_mode=True)) |
||||
elif i != blks - 1 and i != 1 and i != 3: |
||||
self.base.append(nn.AvgPool2D(kernel_size=2, stride=2)) |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
outs = [] |
||||
for i, layer in enumerate(self.base): |
||||
x = layer(x) |
||||
if i in self.return_idx: |
||||
outs.append(x) |
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=self._out_channels[i]) for i in range(4)] |
@ -0,0 +1,727 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle.nn import AdaptiveAvgPool2D, Linear |
||||
from paddle.regularizer import L2Decay |
||||
from paddle import ParamAttr |
||||
from paddle.nn.initializer import Normal, Uniform |
||||
from numbers import Integral |
||||
import math |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['HRNet'] |
||||
|
||||
|
||||
class ConvNormLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size, |
||||
stride=1, |
||||
norm_type='bn', |
||||
norm_groups=32, |
||||
use_dcn=False, |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
act=None, |
||||
name=None): |
||||
super(ConvNormLayer, self).__init__() |
||||
assert norm_type in ['bn', 'sync_bn', 'gn'] |
||||
|
||||
self.act = act |
||||
self.conv = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
groups=1, |
||||
weight_attr=ParamAttr(initializer=Normal( |
||||
mean=0., std=0.01)), |
||||
bias_attr=False) |
||||
|
||||
norm_lr = 0. if freeze_norm else 1. |
||||
|
||||
param_attr = ParamAttr( |
||||
learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) |
||||
bias_attr = ParamAttr( |
||||
learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) |
||||
global_stats = True if freeze_norm else None |
||||
if norm_type in ['bn', 'sync_bn']: |
||||
self.norm = nn.BatchNorm2D( |
||||
ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr, |
||||
use_global_stats=global_stats) |
||||
elif norm_type == 'gn': |
||||
self.norm = nn.GroupNorm( |
||||
num_groups=norm_groups, |
||||
num_channels=ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr) |
||||
norm_params = self.norm.parameters() |
||||
if freeze_norm: |
||||
for param in norm_params: |
||||
param.stop_gradient = True |
||||
|
||||
def forward(self, inputs): |
||||
out = self.conv(inputs) |
||||
out = self.norm(out) |
||||
|
||||
if self.act == 'relu': |
||||
out = F.relu(out) |
||||
return out |
||||
|
||||
|
||||
class Layer1(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
has_se=False, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(Layer1, self).__init__() |
||||
|
||||
self.bottleneck_block_list = [] |
||||
|
||||
for i in range(4): |
||||
bottleneck_block = self.add_sublayer( |
||||
"block_{}_{}".format(name, i + 1), |
||||
BottleneckBlock( |
||||
num_channels=num_channels if i == 0 else 256, |
||||
num_filters=64, |
||||
has_se=has_se, |
||||
stride=1, |
||||
downsample=True if i == 0 else False, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + '_' + str(i + 1))) |
||||
self.bottleneck_block_list.append(bottleneck_block) |
||||
|
||||
def forward(self, input): |
||||
conv = input |
||||
for block_func in self.bottleneck_block_list: |
||||
conv = block_func(conv) |
||||
return conv |
||||
|
||||
|
||||
class TransitionLayer(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(TransitionLayer, self).__init__() |
||||
|
||||
num_in = len(in_channels) |
||||
num_out = len(out_channels) |
||||
out = [] |
||||
self.conv_bn_func_list = [] |
||||
for i in range(num_out): |
||||
residual = None |
||||
if i < num_in: |
||||
if in_channels[i] != out_channels[i]: |
||||
residual = self.add_sublayer( |
||||
"transition_{}_layer_{}".format(name, i + 1), |
||||
ConvNormLayer( |
||||
ch_in=in_channels[i], |
||||
ch_out=out_channels[i], |
||||
filter_size=3, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act='relu', |
||||
name=name + '_layer_' + str(i + 1))) |
||||
else: |
||||
residual = self.add_sublayer( |
||||
"transition_{}_layer_{}".format(name, i + 1), |
||||
ConvNormLayer( |
||||
ch_in=in_channels[-1], |
||||
ch_out=out_channels[i], |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act='relu', |
||||
name=name + '_layer_' + str(i + 1))) |
||||
self.conv_bn_func_list.append(residual) |
||||
|
||||
def forward(self, input): |
||||
outs = [] |
||||
for idx, conv_bn_func in enumerate(self.conv_bn_func_list): |
||||
if conv_bn_func is None: |
||||
outs.append(input[idx]) |
||||
else: |
||||
if idx < len(input): |
||||
outs.append(conv_bn_func(input[idx])) |
||||
else: |
||||
outs.append(conv_bn_func(input[-1])) |
||||
return outs |
||||
|
||||
|
||||
class Branches(nn.Layer): |
||||
def __init__(self, |
||||
block_num, |
||||
in_channels, |
||||
out_channels, |
||||
has_se=False, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(Branches, self).__init__() |
||||
|
||||
self.basic_block_list = [] |
||||
for i in range(len(out_channels)): |
||||
self.basic_block_list.append([]) |
||||
for j in range(block_num): |
||||
in_ch = in_channels[i] if j == 0 else out_channels[i] |
||||
basic_block_func = self.add_sublayer( |
||||
"bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), |
||||
BasicBlock( |
||||
num_channels=in_ch, |
||||
num_filters=out_channels[i], |
||||
has_se=has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + '_branch_layer_' + str(i + 1) + '_' + |
||||
str(j + 1))) |
||||
self.basic_block_list[i].append(basic_block_func) |
||||
|
||||
def forward(self, inputs): |
||||
outs = [] |
||||
for idx, input in enumerate(inputs): |
||||
conv = input |
||||
basic_block_list = self.basic_block_list[idx] |
||||
for basic_block_func in basic_block_list: |
||||
conv = basic_block_func(conv) |
||||
outs.append(conv) |
||||
return outs |
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
num_filters, |
||||
has_se, |
||||
stride=1, |
||||
downsample=False, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(BottleneckBlock, self).__init__() |
||||
|
||||
self.has_se = has_se |
||||
self.downsample = downsample |
||||
|
||||
self.conv1 = ConvNormLayer( |
||||
ch_in=num_channels, |
||||
ch_out=num_filters, |
||||
filter_size=1, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act="relu", |
||||
name=name + "_conv1") |
||||
self.conv2 = ConvNormLayer( |
||||
ch_in=num_filters, |
||||
ch_out=num_filters, |
||||
filter_size=3, |
||||
stride=stride, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act="relu", |
||||
name=name + "_conv2") |
||||
self.conv3 = ConvNormLayer( |
||||
ch_in=num_filters, |
||||
ch_out=num_filters * 4, |
||||
filter_size=1, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act=None, |
||||
name=name + "_conv3") |
||||
|
||||
if self.downsample: |
||||
self.conv_down = ConvNormLayer( |
||||
ch_in=num_channels, |
||||
ch_out=num_filters * 4, |
||||
filter_size=1, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act=None, |
||||
name=name + "_downsample") |
||||
|
||||
if self.has_se: |
||||
self.se = SELayer( |
||||
num_channels=num_filters * 4, |
||||
num_filters=num_filters * 4, |
||||
reduction_ratio=16, |
||||
name='fc' + name) |
||||
|
||||
def forward(self, input): |
||||
residual = input |
||||
conv1 = self.conv1(input) |
||||
conv2 = self.conv2(conv1) |
||||
conv3 = self.conv3(conv2) |
||||
|
||||
if self.downsample: |
||||
residual = self.conv_down(input) |
||||
|
||||
if self.has_se: |
||||
conv3 = self.se(conv3) |
||||
|
||||
y = paddle.add(x=residual, y=conv3) |
||||
y = F.relu(y) |
||||
return y |
||||
|
||||
|
||||
class BasicBlock(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
num_filters, |
||||
stride=1, |
||||
has_se=False, |
||||
downsample=False, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(BasicBlock, self).__init__() |
||||
|
||||
self.has_se = has_se |
||||
self.downsample = downsample |
||||
self.conv1 = ConvNormLayer( |
||||
ch_in=num_channels, |
||||
ch_out=num_filters, |
||||
filter_size=3, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
stride=stride, |
||||
act="relu", |
||||
name=name + "_conv1") |
||||
self.conv2 = ConvNormLayer( |
||||
ch_in=num_filters, |
||||
ch_out=num_filters, |
||||
filter_size=3, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
stride=1, |
||||
act=None, |
||||
name=name + "_conv2") |
||||
|
||||
if self.downsample: |
||||
self.conv_down = ConvNormLayer( |
||||
ch_in=num_channels, |
||||
ch_out=num_filters * 4, |
||||
filter_size=1, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act=None, |
||||
name=name + "_downsample") |
||||
|
||||
if self.has_se: |
||||
self.se = SELayer( |
||||
num_channels=num_filters, |
||||
num_filters=num_filters, |
||||
reduction_ratio=16, |
||||
name='fc' + name) |
||||
|
||||
def forward(self, input): |
||||
residual = input |
||||
conv1 = self.conv1(input) |
||||
conv2 = self.conv2(conv1) |
||||
|
||||
if self.downsample: |
||||
residual = self.conv_down(input) |
||||
|
||||
if self.has_se: |
||||
conv2 = self.se(conv2) |
||||
|
||||
y = paddle.add(x=residual, y=conv2) |
||||
y = F.relu(y) |
||||
return y |
||||
|
||||
|
||||
class SELayer(nn.Layer): |
||||
def __init__(self, num_channels, num_filters, reduction_ratio, name=None): |
||||
super(SELayer, self).__init__() |
||||
|
||||
self.pool2d_gap = AdaptiveAvgPool2D(1) |
||||
|
||||
self._num_channels = num_channels |
||||
|
||||
med_ch = int(num_channels / reduction_ratio) |
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0) |
||||
self.squeeze = Linear( |
||||
num_channels, |
||||
med_ch, |
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) |
||||
|
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0) |
||||
self.excitation = Linear( |
||||
med_ch, |
||||
num_filters, |
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) |
||||
|
||||
def forward(self, input): |
||||
pool = self.pool2d_gap(input) |
||||
pool = paddle.squeeze(pool, axis=[2, 3]) |
||||
squeeze = self.squeeze(pool) |
||||
squeeze = F.relu(squeeze) |
||||
excitation = self.excitation(squeeze) |
||||
excitation = F.sigmoid(excitation) |
||||
excitation = paddle.unsqueeze(excitation, axis=[2, 3]) |
||||
out = input * excitation |
||||
return out |
||||
|
||||
|
||||
class Stage(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
num_modules, |
||||
num_filters, |
||||
has_se=False, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
multi_scale_output=True, |
||||
name=None): |
||||
super(Stage, self).__init__() |
||||
|
||||
self._num_modules = num_modules |
||||
self.stage_func_list = [] |
||||
for i in range(num_modules): |
||||
if i == num_modules - 1 and not multi_scale_output: |
||||
stage_func = self.add_sublayer( |
||||
"stage_{}_{}".format(name, i + 1), |
||||
HighResolutionModule( |
||||
num_channels=num_channels, |
||||
num_filters=num_filters, |
||||
has_se=has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
multi_scale_output=False, |
||||
name=name + '_' + str(i + 1))) |
||||
else: |
||||
stage_func = self.add_sublayer( |
||||
"stage_{}_{}".format(name, i + 1), |
||||
HighResolutionModule( |
||||
num_channels=num_channels, |
||||
num_filters=num_filters, |
||||
has_se=has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + '_' + str(i + 1))) |
||||
|
||||
self.stage_func_list.append(stage_func) |
||||
|
||||
def forward(self, input): |
||||
out = input |
||||
for idx in range(self._num_modules): |
||||
out = self.stage_func_list[idx](out) |
||||
return out |
||||
|
||||
|
||||
class HighResolutionModule(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
num_filters, |
||||
has_se=False, |
||||
multi_scale_output=True, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(HighResolutionModule, self).__init__() |
||||
self.branches_func = Branches( |
||||
block_num=4, |
||||
in_channels=num_channels, |
||||
out_channels=num_filters, |
||||
has_se=has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name) |
||||
|
||||
self.fuse_func = FuseLayers( |
||||
in_channels=num_filters, |
||||
out_channels=num_filters, |
||||
multi_scale_output=multi_scale_output, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name) |
||||
|
||||
def forward(self, input): |
||||
out = self.branches_func(input) |
||||
out = self.fuse_func(out) |
||||
return out |
||||
|
||||
|
||||
class FuseLayers(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
multi_scale_output=True, |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
name=None): |
||||
super(FuseLayers, self).__init__() |
||||
|
||||
self._actual_ch = len(in_channels) if multi_scale_output else 1 |
||||
self._in_channels = in_channels |
||||
|
||||
self.residual_func_list = [] |
||||
for i in range(self._actual_ch): |
||||
for j in range(len(in_channels)): |
||||
residual_func = None |
||||
if j > i: |
||||
residual_func = self.add_sublayer( |
||||
"residual_{}_layer_{}_{}".format(name, i + 1, j + 1), |
||||
ConvNormLayer( |
||||
ch_in=in_channels[j], |
||||
ch_out=out_channels[i], |
||||
filter_size=1, |
||||
stride=1, |
||||
act=None, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + '_layer_' + str(i + 1) + '_' + |
||||
str(j + 1))) |
||||
self.residual_func_list.append(residual_func) |
||||
elif j < i: |
||||
pre_num_filters = in_channels[j] |
||||
for k in range(i - j): |
||||
if k == i - j - 1: |
||||
residual_func = self.add_sublayer( |
||||
"residual_{}_layer_{}_{}_{}".format( |
||||
name, i + 1, j + 1, k + 1), |
||||
ConvNormLayer( |
||||
ch_in=pre_num_filters, |
||||
ch_out=out_channels[i], |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act=None, |
||||
name=name + '_layer_' + str(i + 1) + '_' + |
||||
str(j + 1) + '_' + str(k + 1))) |
||||
pre_num_filters = out_channels[i] |
||||
else: |
||||
residual_func = self.add_sublayer( |
||||
"residual_{}_layer_{}_{}_{}".format( |
||||
name, i + 1, j + 1, k + 1), |
||||
ConvNormLayer( |
||||
ch_in=pre_num_filters, |
||||
ch_out=out_channels[j], |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act="relu", |
||||
name=name + '_layer_' + str(i + 1) + '_' + |
||||
str(j + 1) + '_' + str(k + 1))) |
||||
pre_num_filters = out_channels[j] |
||||
self.residual_func_list.append(residual_func) |
||||
|
||||
def forward(self, input): |
||||
outs = [] |
||||
residual_func_idx = 0 |
||||
for i in range(self._actual_ch): |
||||
residual = input[i] |
||||
for j in range(len(self._in_channels)): |
||||
if j > i: |
||||
y = self.residual_func_list[residual_func_idx](input[j]) |
||||
residual_func_idx += 1 |
||||
y = F.interpolate(y, scale_factor=2**(j - i)) |
||||
residual = paddle.add(x=residual, y=y) |
||||
elif j < i: |
||||
y = input[j] |
||||
for k in range(i - j): |
||||
y = self.residual_func_list[residual_func_idx](y) |
||||
residual_func_idx += 1 |
||||
|
||||
residual = paddle.add(x=residual, y=y) |
||||
residual = F.relu(residual) |
||||
outs.append(residual) |
||||
|
||||
return outs |
||||
|
||||
|
||||
@register |
||||
class HRNet(nn.Layer): |
||||
""" |
||||
HRNet, see https://arxiv.org/abs/1908.07919 |
||||
|
||||
Args: |
||||
width (int): the width of HRNet |
||||
has_se (bool): whether to add SE block for each stage |
||||
freeze_at (int): the stage to freeze |
||||
freeze_norm (bool): whether to freeze norm in HRNet |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
return_idx (List): the stage to return |
||||
upsample (bool): whether to upsample and concat the backbone feats |
||||
""" |
||||
|
||||
def __init__(self, |
||||
width=18, |
||||
has_se=False, |
||||
freeze_at=0, |
||||
freeze_norm=True, |
||||
norm_decay=0., |
||||
return_idx=[0, 1, 2, 3], |
||||
upsample=False): |
||||
super(HRNet, self).__init__() |
||||
|
||||
self.width = width |
||||
self.has_se = has_se |
||||
if isinstance(return_idx, Integral): |
||||
return_idx = [return_idx] |
||||
|
||||
assert len(return_idx) > 0, "need one or more return index" |
||||
self.freeze_at = freeze_at |
||||
self.return_idx = return_idx |
||||
self.upsample = upsample |
||||
|
||||
self.channels = { |
||||
18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]], |
||||
30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]], |
||||
32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]], |
||||
40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]], |
||||
48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]], |
||||
60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]], |
||||
64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]] |
||||
} |
||||
|
||||
channels_2, channels_3, channels_4 = self.channels[width] |
||||
num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 |
||||
self._out_channels = [sum(channels_4)] if self.upsample else channels_4 |
||||
self._out_strides = [4] if self.upsample else [4, 8, 16, 32] |
||||
|
||||
self.conv_layer1_1 = ConvNormLayer( |
||||
ch_in=3, |
||||
ch_out=64, |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act='relu', |
||||
name="layer1_1") |
||||
|
||||
self.conv_layer1_2 = ConvNormLayer( |
||||
ch_in=64, |
||||
ch_out=64, |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
act='relu', |
||||
name="layer1_2") |
||||
|
||||
self.la1 = Layer1( |
||||
num_channels=64, |
||||
has_se=has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="layer2") |
||||
|
||||
self.tr1 = TransitionLayer( |
||||
in_channels=[256], |
||||
out_channels=channels_2, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="tr1") |
||||
|
||||
self.st2 = Stage( |
||||
num_channels=channels_2, |
||||
num_modules=num_modules_2, |
||||
num_filters=channels_2, |
||||
has_se=self.has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="st2") |
||||
|
||||
self.tr2 = TransitionLayer( |
||||
in_channels=channels_2, |
||||
out_channels=channels_3, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="tr2") |
||||
|
||||
self.st3 = Stage( |
||||
num_channels=channels_3, |
||||
num_modules=num_modules_3, |
||||
num_filters=channels_3, |
||||
has_se=self.has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="st3") |
||||
|
||||
self.tr3 = TransitionLayer( |
||||
in_channels=channels_3, |
||||
out_channels=channels_4, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="tr3") |
||||
self.st4 = Stage( |
||||
num_channels=channels_4, |
||||
num_modules=num_modules_4, |
||||
num_filters=channels_4, |
||||
has_se=self.has_se, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
multi_scale_output=len(return_idx) > 1, |
||||
name="st4") |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
conv1 = self.conv_layer1_1(x) |
||||
conv2 = self.conv_layer1_2(conv1) |
||||
|
||||
la1 = self.la1(conv2) |
||||
tr1 = self.tr1([la1]) |
||||
st2 = self.st2(tr1) |
||||
tr2 = self.tr2(st2) |
||||
|
||||
st3 = self.st3(tr2) |
||||
tr3 = self.tr3(st3) |
||||
|
||||
st4 = self.st4(tr3) |
||||
|
||||
if self.upsample: |
||||
# Upsampling |
||||
x0_h, x0_w = st4[0].shape[2:4] |
||||
x1 = F.upsample(st4[1], size=(x0_h, x0_w), mode='bilinear') |
||||
x2 = F.upsample(st4[2], size=(x0_h, x0_w), mode='bilinear') |
||||
x3 = F.upsample(st4[3], size=(x0_h, x0_w), mode='bilinear') |
||||
x = paddle.concat([st4[0], x1, x2, x3], 1) |
||||
return x |
||||
|
||||
res = [] |
||||
for i, layer in enumerate(st4): |
||||
if i == self.freeze_at: |
||||
layer.stop_gradient = True |
||||
if i in self.return_idx: |
||||
res.append(layer) |
||||
|
||||
return res |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
if self.upsample: |
||||
self.return_idx = [0] |
||||
return [ |
||||
ShapeSpec( |
||||
channels=self._out_channels[i], stride=self._out_strides[i]) |
||||
for i in self.return_idx |
||||
] |
@ -0,0 +1,259 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
from paddle import ParamAttr |
||||
from paddle.nn import AdaptiveAvgPool2D, Conv2D |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import KaimingNormal |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from numbers import Integral |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['LCNet'] |
||||
|
||||
NET_CONFIG = { |
||||
"blocks2": |
||||
#k, in_c, out_c, s, use_se |
||||
[[3, 16, 32, 1, False], ], |
||||
"blocks3": [ |
||||
[3, 32, 64, 2, False], |
||||
[3, 64, 64, 1, False], |
||||
], |
||||
"blocks4": [ |
||||
[3, 64, 128, 2, False], |
||||
[3, 128, 128, 1, False], |
||||
], |
||||
"blocks5": [ |
||||
[3, 128, 256, 2, False], |
||||
[5, 256, 256, 1, False], |
||||
[5, 256, 256, 1, False], |
||||
[5, 256, 256, 1, False], |
||||
[5, 256, 256, 1, False], |
||||
[5, 256, 256, 1, False], |
||||
], |
||||
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] |
||||
} |
||||
|
||||
|
||||
def make_divisible(v, divisor=8, min_value=None): |
||||
if min_value is None: |
||||
min_value = divisor |
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) |
||||
if new_v < 0.9 * v: |
||||
new_v += divisor |
||||
return new_v |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
filter_size, |
||||
num_filters, |
||||
stride, |
||||
num_groups=1): |
||||
super().__init__() |
||||
|
||||
self.conv = Conv2D( |
||||
in_channels=num_channels, |
||||
out_channels=num_filters, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
groups=num_groups, |
||||
weight_attr=ParamAttr(initializer=KaimingNormal()), |
||||
bias_attr=False) |
||||
|
||||
self.bn = nn.BatchNorm2D( |
||||
num_filters, |
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))) |
||||
self.hardswish = nn.Hardswish() |
||||
|
||||
def forward(self, x): |
||||
x = self.conv(x) |
||||
x = self.bn(x) |
||||
x = self.hardswish(x) |
||||
return x |
||||
|
||||
|
||||
class DepthwiseSeparable(nn.Layer): |
||||
def __init__(self, |
||||
num_channels, |
||||
num_filters, |
||||
stride, |
||||
dw_size=3, |
||||
use_se=False): |
||||
super().__init__() |
||||
self.use_se = use_se |
||||
self.dw_conv = ConvBNLayer( |
||||
num_channels=num_channels, |
||||
num_filters=num_channels, |
||||
filter_size=dw_size, |
||||
stride=stride, |
||||
num_groups=num_channels) |
||||
if use_se: |
||||
self.se = SEModule(num_channels) |
||||
self.pw_conv = ConvBNLayer( |
||||
num_channels=num_channels, |
||||
filter_size=1, |
||||
num_filters=num_filters, |
||||
stride=1) |
||||
|
||||
def forward(self, x): |
||||
x = self.dw_conv(x) |
||||
if self.use_se: |
||||
x = self.se(x) |
||||
x = self.pw_conv(x) |
||||
return x |
||||
|
||||
|
||||
class SEModule(nn.Layer): |
||||
def __init__(self, channel, reduction=4): |
||||
super().__init__() |
||||
self.avg_pool = AdaptiveAvgPool2D(1) |
||||
self.conv1 = Conv2D( |
||||
in_channels=channel, |
||||
out_channels=channel // reduction, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0) |
||||
self.relu = nn.ReLU() |
||||
self.conv2 = Conv2D( |
||||
in_channels=channel // reduction, |
||||
out_channels=channel, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0) |
||||
self.hardsigmoid = nn.Hardsigmoid() |
||||
|
||||
def forward(self, x): |
||||
identity = x |
||||
x = self.avg_pool(x) |
||||
x = self.conv1(x) |
||||
x = self.relu(x) |
||||
x = self.conv2(x) |
||||
x = self.hardsigmoid(x) |
||||
x = paddle.multiply(x=identity, y=x) |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class LCNet(nn.Layer): |
||||
def __init__(self, scale=1.0, feature_maps=[3, 4, 5]): |
||||
super().__init__() |
||||
self.scale = scale |
||||
self.feature_maps = feature_maps |
||||
|
||||
out_channels = [] |
||||
|
||||
self.conv1 = ConvBNLayer( |
||||
num_channels=3, |
||||
filter_size=3, |
||||
num_filters=make_divisible(16 * scale), |
||||
stride=2) |
||||
|
||||
self.blocks2 = nn.Sequential(*[ |
||||
DepthwiseSeparable( |
||||
num_channels=make_divisible(in_c * scale), |
||||
num_filters=make_divisible(out_c * scale), |
||||
dw_size=k, |
||||
stride=s, |
||||
use_se=se) |
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) |
||||
]) |
||||
|
||||
self.blocks3 = nn.Sequential(*[ |
||||
DepthwiseSeparable( |
||||
num_channels=make_divisible(in_c * scale), |
||||
num_filters=make_divisible(out_c * scale), |
||||
dw_size=k, |
||||
stride=s, |
||||
use_se=se) |
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) |
||||
]) |
||||
|
||||
out_channels.append( |
||||
make_divisible(NET_CONFIG["blocks3"][-1][2] * scale)) |
||||
|
||||
self.blocks4 = nn.Sequential(*[ |
||||
DepthwiseSeparable( |
||||
num_channels=make_divisible(in_c * scale), |
||||
num_filters=make_divisible(out_c * scale), |
||||
dw_size=k, |
||||
stride=s, |
||||
use_se=se) |
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) |
||||
]) |
||||
|
||||
out_channels.append( |
||||
make_divisible(NET_CONFIG["blocks4"][-1][2] * scale)) |
||||
|
||||
self.blocks5 = nn.Sequential(*[ |
||||
DepthwiseSeparable( |
||||
num_channels=make_divisible(in_c * scale), |
||||
num_filters=make_divisible(out_c * scale), |
||||
dw_size=k, |
||||
stride=s, |
||||
use_se=se) |
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) |
||||
]) |
||||
|
||||
out_channels.append( |
||||
make_divisible(NET_CONFIG["blocks5"][-1][2] * scale)) |
||||
|
||||
self.blocks6 = nn.Sequential(*[ |
||||
DepthwiseSeparable( |
||||
num_channels=make_divisible(in_c * scale), |
||||
num_filters=make_divisible(out_c * scale), |
||||
dw_size=k, |
||||
stride=s, |
||||
use_se=se) |
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) |
||||
]) |
||||
|
||||
out_channels.append( |
||||
make_divisible(NET_CONFIG["blocks6"][-1][2] * scale)) |
||||
self._out_channels = [ |
||||
ch for idx, ch in enumerate(out_channels) |
||||
if idx + 2 in feature_maps |
||||
] |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
outs = [] |
||||
|
||||
x = self.conv1(x) |
||||
x = self.blocks2(x) |
||||
x = self.blocks3(x) |
||||
outs.append(x) |
||||
x = self.blocks4(x) |
||||
outs.append(x) |
||||
x = self.blocks5(x) |
||||
outs.append(x) |
||||
x = self.blocks6(x) |
||||
outs.append(x) |
||||
outs = [o for i, o in enumerate(outs) if i + 2 in self.feature_maps] |
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
@ -0,0 +1,886 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This code is based on |
||||
https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py |
||||
""" |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from numbers import Integral |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import Normal, Constant |
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec |
||||
from paddlers.models.ppdet.modeling.ops import channel_shuffle |
||||
from .. import layers as L |
||||
|
||||
__all__ = ['LiteHRNet'] |
||||
|
||||
|
||||
class ConvNormLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size, |
||||
stride=1, |
||||
groups=1, |
||||
norm_type=None, |
||||
norm_groups=32, |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
act=None): |
||||
super(ConvNormLayer, self).__init__() |
||||
self.act = act |
||||
norm_lr = 0. if freeze_norm else 1. |
||||
if norm_type is not None: |
||||
assert norm_type in ['bn', 'sync_bn', 'gn'], \ |
||||
"norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type) |
||||
param_attr = ParamAttr( |
||||
initializer=Constant(1.0), |
||||
learning_rate=norm_lr, |
||||
regularizer=L2Decay(norm_decay), ) |
||||
bias_attr = ParamAttr( |
||||
learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) |
||||
global_stats = True if freeze_norm else None |
||||
if norm_type in ['bn', 'sync_bn']: |
||||
self.norm = nn.BatchNorm2D( |
||||
ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr, |
||||
use_global_stats=global_stats, ) |
||||
elif norm_type == 'gn': |
||||
self.norm = nn.GroupNorm( |
||||
num_groups=norm_groups, |
||||
num_channels=ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr) |
||||
norm_params = self.norm.parameters() |
||||
if freeze_norm: |
||||
for param in norm_params: |
||||
param.stop_gradient = True |
||||
conv_bias_attr = False |
||||
else: |
||||
conv_bias_attr = True |
||||
self.norm = None |
||||
|
||||
self.conv = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
groups=groups, |
||||
weight_attr=ParamAttr(initializer=Normal( |
||||
mean=0., std=0.001)), |
||||
bias_attr=conv_bias_attr) |
||||
|
||||
def forward(self, inputs): |
||||
out = self.conv(inputs) |
||||
if self.norm is not None: |
||||
out = self.norm(out) |
||||
|
||||
if self.act == 'relu': |
||||
out = F.relu(out) |
||||
elif self.act == 'sigmoid': |
||||
out = F.sigmoid(out) |
||||
return out |
||||
|
||||
|
||||
class DepthWiseSeparableConvNormLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size, |
||||
stride=1, |
||||
dw_norm_type=None, |
||||
pw_norm_type=None, |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
dw_act=None, |
||||
pw_act=None): |
||||
super(DepthWiseSeparableConvNormLayer, self).__init__() |
||||
self.depthwise_conv = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_in, |
||||
filter_size=filter_size, |
||||
stride=stride, |
||||
groups=ch_in, |
||||
norm_type=dw_norm_type, |
||||
act=dw_act, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, ) |
||||
self.pointwise_conv = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=pw_norm_type, |
||||
act=pw_act, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, ) |
||||
|
||||
def forward(self, x): |
||||
x = self.depthwise_conv(x) |
||||
x = self.pointwise_conv(x) |
||||
return x |
||||
|
||||
|
||||
class CrossResolutionWeightingModule(nn.Layer): |
||||
def __init__(self, |
||||
channels, |
||||
ratio=16, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(CrossResolutionWeightingModule, self).__init__() |
||||
self.channels = channels |
||||
total_channel = sum(channels) |
||||
self.conv1 = ConvNormLayer( |
||||
ch_in=total_channel, |
||||
ch_out=total_channel // ratio, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.conv2 = ConvNormLayer( |
||||
ch_in=total_channel // ratio, |
||||
ch_out=total_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='sigmoid', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
def forward(self, x): |
||||
mini_size = x[-1].shape[-2:] |
||||
out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]] |
||||
out = paddle.concat(out, 1) |
||||
out = self.conv1(out) |
||||
out = self.conv2(out) |
||||
out = paddle.split(out, self.channels, 1) |
||||
out = [ |
||||
s * F.interpolate( |
||||
a, s.shape[-2:], mode='nearest') for s, a in zip(x, out) |
||||
] |
||||
return out |
||||
|
||||
|
||||
class SpatialWeightingModule(nn.Layer): |
||||
def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.): |
||||
super(SpatialWeightingModule, self).__init__() |
||||
self.global_avgpooling = nn.AdaptiveAvgPool2D(1) |
||||
self.conv1 = ConvNormLayer( |
||||
ch_in=in_channel, |
||||
ch_out=in_channel // ratio, |
||||
filter_size=1, |
||||
stride=1, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.conv2 = ConvNormLayer( |
||||
ch_in=in_channel // ratio, |
||||
ch_out=in_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
act='sigmoid', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
def forward(self, x): |
||||
out = self.global_avgpooling(x) |
||||
out = self.conv1(out) |
||||
out = self.conv2(out) |
||||
return x * out |
||||
|
||||
|
||||
class ConditionalChannelWeightingBlock(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
stride, |
||||
reduce_ratio, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(ConditionalChannelWeightingBlock, self).__init__() |
||||
assert stride in [1, 2] |
||||
branch_channels = [channel // 2 for channel in in_channels] |
||||
|
||||
self.cross_resolution_weighting = CrossResolutionWeightingModule( |
||||
branch_channels, |
||||
ratio=reduce_ratio, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.depthwise_convs = nn.LayerList([ |
||||
ConvNormLayer( |
||||
channel, |
||||
channel, |
||||
filter_size=3, |
||||
stride=stride, |
||||
groups=channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) for channel in branch_channels |
||||
]) |
||||
|
||||
self.spatial_weighting = nn.LayerList([ |
||||
SpatialWeightingModule( |
||||
channel, |
||||
ratio=4, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) for channel in branch_channels |
||||
]) |
||||
|
||||
def forward(self, x): |
||||
x = [s.chunk(2, axis=1) for s in x] |
||||
x1 = [s[0] for s in x] |
||||
x2 = [s[1] for s in x] |
||||
|
||||
x2 = self.cross_resolution_weighting(x2) |
||||
x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)] |
||||
x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)] |
||||
|
||||
out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)] |
||||
out = [channel_shuffle(s, groups=2) for s in out] |
||||
return out |
||||
|
||||
|
||||
class ShuffleUnit(nn.Layer): |
||||
def __init__(self, |
||||
in_channel, |
||||
out_channel, |
||||
stride, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(ShuffleUnit, self).__init__() |
||||
branch_channel = out_channel // 2 |
||||
self.stride = stride |
||||
if self.stride == 1: |
||||
assert in_channel == branch_channel * 2, \ |
||||
"when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2) |
||||
if stride > 1: |
||||
self.branch1 = nn.Sequential( |
||||
ConvNormLayer( |
||||
ch_in=in_channel, |
||||
ch_out=in_channel, |
||||
filter_size=3, |
||||
stride=self.stride, |
||||
groups=in_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=in_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), ) |
||||
self.branch2 = nn.Sequential( |
||||
ConvNormLayer( |
||||
ch_in=branch_channel if stride == 1 else in_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=3, |
||||
stride=self.stride, |
||||
groups=branch_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), ) |
||||
|
||||
def forward(self, x): |
||||
if self.stride > 1: |
||||
x1 = self.branch1(x) |
||||
x2 = self.branch2(x) |
||||
else: |
||||
x1, x2 = x.chunk(2, axis=1) |
||||
x2 = self.branch2(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
out = channel_shuffle(out, groups=2) |
||||
return out |
||||
|
||||
|
||||
class IterativeHead(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(IterativeHead, self).__init__() |
||||
num_branches = len(in_channels) |
||||
self.in_channels = in_channels[::-1] |
||||
|
||||
projects = [] |
||||
for i in range(num_branches): |
||||
if i != num_branches - 1: |
||||
projects.append( |
||||
DepthWiseSeparableConvNormLayer( |
||||
ch_in=self.in_channels[i], |
||||
ch_out=self.in_channels[i + 1], |
||||
filter_size=3, |
||||
stride=1, |
||||
dw_act=None, |
||||
pw_act='relu', |
||||
dw_norm_type=norm_type, |
||||
pw_norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
else: |
||||
projects.append( |
||||
DepthWiseSeparableConvNormLayer( |
||||
ch_in=self.in_channels[i], |
||||
ch_out=self.in_channels[i], |
||||
filter_size=3, |
||||
stride=1, |
||||
dw_act=None, |
||||
pw_act='relu', |
||||
dw_norm_type=norm_type, |
||||
pw_norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
self.projects = nn.LayerList(projects) |
||||
|
||||
def forward(self, x): |
||||
x = x[::-1] |
||||
y = [] |
||||
last_x = None |
||||
for i, s in enumerate(x): |
||||
if last_x is not None: |
||||
last_x = F.interpolate( |
||||
last_x, |
||||
size=s.shape[-2:], |
||||
mode='bilinear', |
||||
align_corners=True) |
||||
s = s + last_x |
||||
s = self.projects[i](s) |
||||
y.append(s) |
||||
last_x = s |
||||
|
||||
return y[::-1] |
||||
|
||||
|
||||
class Stem(nn.Layer): |
||||
def __init__(self, |
||||
in_channel, |
||||
stem_channel, |
||||
out_channel, |
||||
expand_ratio, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(Stem, self).__init__() |
||||
self.conv1 = ConvNormLayer( |
||||
in_channel, |
||||
stem_channel, |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
mid_channel = int(round(stem_channel * expand_ratio)) |
||||
branch_channel = stem_channel // 2 |
||||
if stem_channel == out_channel: |
||||
inc_channel = out_channel - branch_channel |
||||
else: |
||||
inc_channel = out_channel - stem_channel |
||||
self.branch1 = nn.Sequential( |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=3, |
||||
stride=2, |
||||
groups=branch_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=inc_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), ) |
||||
self.expand_conv = ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=mid_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.depthwise_conv = ConvNormLayer( |
||||
ch_in=mid_channel, |
||||
ch_out=mid_channel, |
||||
filter_size=3, |
||||
stride=2, |
||||
groups=mid_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.linear_conv = ConvNormLayer( |
||||
ch_in=mid_channel, |
||||
ch_out=branch_channel |
||||
if stem_channel == out_channel else stem_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
def forward(self, x): |
||||
x = self.conv1(x) |
||||
x1, x2 = x.chunk(2, axis=1) |
||||
x1 = self.branch1(x1) |
||||
x2 = self.expand_conv(x2) |
||||
x2 = self.depthwise_conv(x2) |
||||
x2 = self.linear_conv(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
out = channel_shuffle(out, groups=2) |
||||
|
||||
return out |
||||
|
||||
|
||||
class LiteHRNetModule(nn.Layer): |
||||
def __init__(self, |
||||
num_branches, |
||||
num_blocks, |
||||
in_channels, |
||||
reduce_ratio, |
||||
module_type, |
||||
multiscale_output=False, |
||||
with_fuse=True, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(LiteHRNetModule, self).__init__() |
||||
assert num_branches == len(in_channels),\ |
||||
"num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels)) |
||||
assert module_type in [ |
||||
'LITE', 'NAIVE' |
||||
], "module_type should be one of ['LITE', 'NAIVE']" |
||||
self.num_branches = num_branches |
||||
self.in_channels = in_channels |
||||
self.multiscale_output = multiscale_output |
||||
self.with_fuse = with_fuse |
||||
self.norm_type = 'bn' |
||||
self.module_type = module_type |
||||
|
||||
if self.module_type == 'LITE': |
||||
self.layers = self._make_weighting_blocks( |
||||
num_blocks, |
||||
reduce_ratio, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
elif self.module_type == 'NAIVE': |
||||
self.layers = self._make_naive_branches( |
||||
num_branches, |
||||
num_blocks, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
if self.with_fuse: |
||||
self.fuse_layers = self._make_fuse_layers( |
||||
freeze_norm=freeze_norm, norm_decay=norm_decay) |
||||
self.relu = nn.ReLU() |
||||
|
||||
def _make_weighting_blocks(self, |
||||
num_blocks, |
||||
reduce_ratio, |
||||
stride=1, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
layers = [] |
||||
for i in range(num_blocks): |
||||
layers.append( |
||||
ConditionalChannelWeightingBlock( |
||||
self.in_channels, |
||||
stride=stride, |
||||
reduce_ratio=reduce_ratio, |
||||
norm_type=self.norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
return nn.Sequential(*layers) |
||||
|
||||
def _make_naive_branches(self, |
||||
num_branches, |
||||
num_blocks, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
branches = [] |
||||
for branch_idx in range(num_branches): |
||||
layers = [] |
||||
for i in range(num_blocks): |
||||
layers.append( |
||||
ShuffleUnit( |
||||
self.in_channels[branch_idx], |
||||
self.in_channels[branch_idx], |
||||
stride=1, |
||||
norm_type=self.norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
branches.append(nn.Sequential(*layers)) |
||||
return nn.LayerList(branches) |
||||
|
||||
def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.): |
||||
if self.num_branches == 1: |
||||
return None |
||||
fuse_layers = [] |
||||
num_out_branches = self.num_branches if self.multiscale_output else 1 |
||||
for i in range(num_out_branches): |
||||
fuse_layer = [] |
||||
for j in range(self.num_branches): |
||||
if j > i: |
||||
fuse_layer.append( |
||||
nn.Sequential( |
||||
L.Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[i], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[i]), |
||||
nn.Upsample( |
||||
scale_factor=2**(j - i), mode='nearest'))) |
||||
elif j == i: |
||||
fuse_layer.append(None) |
||||
else: |
||||
conv_downsamples = [] |
||||
for k in range(i - j): |
||||
if k == i - j - 1: |
||||
conv_downsamples.append( |
||||
nn.Sequential( |
||||
L.Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[j], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
groups=self.in_channels[j], |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[j]), |
||||
L.Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[i], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[i]))) |
||||
else: |
||||
conv_downsamples.append( |
||||
nn.Sequential( |
||||
L.Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[j], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
groups=self.in_channels[j], |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[j]), |
||||
L.Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[j], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[j]), |
||||
nn.ReLU())) |
||||
|
||||
fuse_layer.append(nn.Sequential(*conv_downsamples)) |
||||
fuse_layers.append(nn.LayerList(fuse_layer)) |
||||
|
||||
return nn.LayerList(fuse_layers) |
||||
|
||||
def forward(self, x): |
||||
if self.num_branches == 1: |
||||
return [self.layers[0](x[0])] |
||||
if self.module_type == 'LITE': |
||||
out = self.layers(x) |
||||
elif self.module_type == 'NAIVE': |
||||
for i in range(self.num_branches): |
||||
x[i] = self.layers[i](x[i]) |
||||
out = x |
||||
if self.with_fuse: |
||||
out_fuse = [] |
||||
for i in range(len(self.fuse_layers)): |
||||
y = out[0] if i == 0 else self.fuse_layers[i][0](out[0]) |
||||
for j in range(self.num_branches): |
||||
if j == 0: |
||||
y += y |
||||
elif i == j: |
||||
y += out[j] |
||||
else: |
||||
y += self.fuse_layers[i][j](out[j]) |
||||
if i == 0: |
||||
out[i] = y |
||||
out_fuse.append(self.relu(y)) |
||||
out = out_fuse |
||||
elif not self.multiscale_output: |
||||
out = [out[0]] |
||||
return out |
||||
|
||||
|
||||
@register |
||||
class LiteHRNet(nn.Layer): |
||||
""" |
||||
@inproceedings{Yulitehrnet21, |
||||
title={Lite-HRNet: A Lightweight High-Resolution Network}, |
||||
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong}, |
||||
booktitle={CVPR},year={2021} |
||||
} |
||||
Args: |
||||
network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"], |
||||
"naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet. |
||||
"wider_naive": Naive network with wider channels in each block. |
||||
"lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting. |
||||
"lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18. |
||||
freeze_at (int): the stage to freeze |
||||
freeze_norm (bool): whether to freeze norm in HRNet |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
return_idx (List): the stage to return |
||||
""" |
||||
|
||||
def __init__(self, |
||||
network_type, |
||||
freeze_at=0, |
||||
freeze_norm=True, |
||||
norm_decay=0., |
||||
return_idx=[0, 1, 2, 3]): |
||||
super(LiteHRNet, self).__init__() |
||||
if isinstance(return_idx, Integral): |
||||
return_idx = [return_idx] |
||||
assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \ |
||||
"the network_type should be one of [lite_18, lite_30, naive, wider_naive]" |
||||
assert len(return_idx) > 0, "need one or more return index" |
||||
self.freeze_at = freeze_at |
||||
self.freeze_norm = freeze_norm |
||||
self.norm_decay = norm_decay |
||||
self.return_idx = return_idx |
||||
self.norm_type = 'bn' |
||||
|
||||
self.module_configs = { |
||||
"lite_18": { |
||||
"num_modules": [2, 4, 2], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["LITE", "LITE", "LITE"], |
||||
"reduce_ratios": [8, 8, 8], |
||||
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
}, |
||||
"lite_30": { |
||||
"num_modules": [3, 8, 3], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["LITE", "LITE", "LITE"], |
||||
"reduce_ratios": [8, 8, 8], |
||||
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
}, |
||||
"naive": { |
||||
"num_modules": [2, 4, 2], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["NAIVE", "NAIVE", "NAIVE"], |
||||
"reduce_ratios": [1, 1, 1], |
||||
"num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]], |
||||
}, |
||||
"wider_naive": { |
||||
"num_modules": [2, 4, 2], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["NAIVE", "NAIVE", "NAIVE"], |
||||
"reduce_ratios": [1, 1, 1], |
||||
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
}, |
||||
} |
||||
|
||||
self.stages_config = self.module_configs[network_type] |
||||
|
||||
self.stem = Stem(3, 32, 32, 1) |
||||
num_channels_pre_layer = [32] |
||||
for stage_idx in range(3): |
||||
num_channels = self.stages_config["num_channels"][stage_idx] |
||||
setattr(self, 'transition{}'.format(stage_idx), |
||||
self._make_transition_layer(num_channels_pre_layer, |
||||
num_channels, self.freeze_norm, |
||||
self.norm_decay)) |
||||
stage, num_channels_pre_layer = self._make_stage( |
||||
self.stages_config, stage_idx, num_channels, True, |
||||
self.freeze_norm, self.norm_decay) |
||||
setattr(self, 'stage{}'.format(stage_idx), stage) |
||||
self.head_layer = IterativeHead(num_channels_pre_layer, 'bn', |
||||
self.freeze_norm, self.norm_decay) |
||||
|
||||
def _make_transition_layer(self, |
||||
num_channels_pre_layer, |
||||
num_channels_cur_layer, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
num_branches_pre = len(num_channels_pre_layer) |
||||
num_branches_cur = len(num_channels_cur_layer) |
||||
transition_layers = [] |
||||
for i in range(num_branches_cur): |
||||
if i < num_branches_pre: |
||||
if num_channels_cur_layer[i] != num_channels_pre_layer[i]: |
||||
transition_layers.append( |
||||
nn.Sequential( |
||||
L.Conv2d( |
||||
num_channels_pre_layer[i], |
||||
num_channels_pre_layer[i], |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
groups=num_channels_pre_layer[i], |
||||
bias=False), |
||||
nn.BatchNorm2D(num_channels_pre_layer[i]), |
||||
L.Conv2d( |
||||
num_channels_pre_layer[i], |
||||
num_channels_cur_layer[i], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(num_channels_cur_layer[i]), |
||||
nn.ReLU())) |
||||
else: |
||||
transition_layers.append(None) |
||||
else: |
||||
conv_downsamples = [] |
||||
for j in range(i + 1 - num_branches_pre): |
||||
conv_downsamples.append( |
||||
nn.Sequential( |
||||
L.Conv2d( |
||||
num_channels_pre_layer[-1], |
||||
num_channels_pre_layer[-1], |
||||
groups=num_channels_pre_layer[-1], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(num_channels_pre_layer[-1]), |
||||
L.Conv2d( |
||||
num_channels_pre_layer[-1], |
||||
num_channels_cur_layer[i] |
||||
if j == i - num_branches_pre else |
||||
num_channels_pre_layer[-1], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(num_channels_cur_layer[i] |
||||
if j == i - num_branches_pre else |
||||
num_channels_pre_layer[-1]), |
||||
nn.ReLU())) |
||||
transition_layers.append(nn.Sequential(*conv_downsamples)) |
||||
return nn.LayerList(transition_layers) |
||||
|
||||
def _make_stage(self, |
||||
stages_config, |
||||
stage_idx, |
||||
in_channels, |
||||
multiscale_output, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
num_modules = stages_config["num_modules"][stage_idx] |
||||
num_branches = stages_config["num_branches"][stage_idx] |
||||
num_blocks = stages_config["num_blocks"][stage_idx] |
||||
reduce_ratio = stages_config['reduce_ratios'][stage_idx] |
||||
module_type = stages_config['module_type'][stage_idx] |
||||
|
||||
modules = [] |
||||
for i in range(num_modules): |
||||
if not multiscale_output and i == num_modules - 1: |
||||
reset_multiscale_output = False |
||||
else: |
||||
reset_multiscale_output = True |
||||
modules.append( |
||||
LiteHRNetModule( |
||||
num_branches, |
||||
num_blocks, |
||||
in_channels, |
||||
reduce_ratio, |
||||
module_type, |
||||
multiscale_output=reset_multiscale_output, |
||||
with_fuse=True, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
in_channels = modules[-1].in_channels |
||||
return nn.Sequential(*modules), in_channels |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
x = self.stem(x) |
||||
y_list = [x] |
||||
for stage_idx in range(3): |
||||
x_list = [] |
||||
transition = getattr(self, 'transition{}'.format(stage_idx)) |
||||
for j in range(self.stages_config["num_branches"][stage_idx]): |
||||
if transition[j] is not None: |
||||
if j >= len(y_list): |
||||
x_list.append(transition[j](y_list[-1])) |
||||
else: |
||||
x_list.append(transition[j](y_list[j])) |
||||
else: |
||||
x_list.append(y_list[j]) |
||||
y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list) |
||||
x = self.head_layer(y_list) |
||||
res = [] |
||||
for i, layer in enumerate(x): |
||||
if i == self.freeze_at: |
||||
layer.stop_gradient = True |
||||
if i in self.return_idx: |
||||
res.append(layer) |
||||
return res |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ |
||||
ShapeSpec( |
||||
channels=self._out_channels[i], stride=self._out_strides[i]) |
||||
for i in self.return_idx |
||||
] |
@ -0,0 +1,411 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import KaimingNormal |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from numbers import Integral |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['MobileNet'] |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride, |
||||
padding, |
||||
num_groups=1, |
||||
act='relu', |
||||
conv_lr=1., |
||||
conv_decay=0., |
||||
norm_decay=0., |
||||
norm_type='bn', |
||||
name=None): |
||||
super(ConvBNLayer, self).__init__() |
||||
self.act = act |
||||
self._conv = nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=num_groups, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=conv_lr, |
||||
initializer=KaimingNormal(), |
||||
regularizer=L2Decay(conv_decay)), |
||||
bias_attr=False) |
||||
|
||||
param_attr = ParamAttr(regularizer=L2Decay(norm_decay)) |
||||
bias_attr = ParamAttr(regularizer=L2Decay(norm_decay)) |
||||
if norm_type in ['sync_bn', 'bn']: |
||||
self._batch_norm = nn.BatchNorm2D( |
||||
out_channels, weight_attr=param_attr, bias_attr=bias_attr) |
||||
|
||||
def forward(self, x): |
||||
x = self._conv(x) |
||||
x = self._batch_norm(x) |
||||
if self.act == "relu": |
||||
x = F.relu(x) |
||||
elif self.act == "relu6": |
||||
x = F.relu6(x) |
||||
return x |
||||
|
||||
|
||||
class DepthwiseSeparable(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels1, |
||||
out_channels2, |
||||
num_groups, |
||||
stride, |
||||
scale, |
||||
conv_lr=1., |
||||
conv_decay=0., |
||||
norm_decay=0., |
||||
norm_type='bn', |
||||
name=None): |
||||
super(DepthwiseSeparable, self).__init__() |
||||
|
||||
self._depthwise_conv = ConvBNLayer( |
||||
in_channels, |
||||
int(out_channels1 * scale), |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
num_groups=int(num_groups * scale), |
||||
conv_lr=conv_lr, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name=name + "_dw") |
||||
|
||||
self._pointwise_conv = ConvBNLayer( |
||||
int(out_channels1 * scale), |
||||
int(out_channels2 * scale), |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
conv_lr=conv_lr, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name=name + "_sep") |
||||
|
||||
def forward(self, x): |
||||
x = self._depthwise_conv(x) |
||||
x = self._pointwise_conv(x) |
||||
return x |
||||
|
||||
|
||||
class ExtraBlock(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels1, |
||||
out_channels2, |
||||
num_groups=1, |
||||
stride=2, |
||||
conv_lr=1., |
||||
conv_decay=0., |
||||
norm_decay=0., |
||||
norm_type='bn', |
||||
name=None): |
||||
super(ExtraBlock, self).__init__() |
||||
|
||||
self.pointwise_conv = ConvBNLayer( |
||||
in_channels, |
||||
int(out_channels1), |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
num_groups=int(num_groups), |
||||
act='relu6', |
||||
conv_lr=conv_lr, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name=name + "_extra1") |
||||
|
||||
self.normal_conv = ConvBNLayer( |
||||
int(out_channels1), |
||||
int(out_channels2), |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
num_groups=int(num_groups), |
||||
act='relu6', |
||||
conv_lr=conv_lr, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name=name + "_extra2") |
||||
|
||||
def forward(self, x): |
||||
x = self.pointwise_conv(x) |
||||
x = self.normal_conv(x) |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class MobileNet(nn.Layer): |
||||
__shared__ = ['norm_type'] |
||||
|
||||
def __init__(self, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
conv_decay=0., |
||||
scale=1, |
||||
conv_learning_rate=1.0, |
||||
feature_maps=[4, 6, 13], |
||||
with_extra_blocks=False, |
||||
extra_block_filters=[[256, 512], [128, 256], [128, 256], |
||||
[64, 128]]): |
||||
super(MobileNet, self).__init__() |
||||
if isinstance(feature_maps, Integral): |
||||
feature_maps = [feature_maps] |
||||
self.feature_maps = feature_maps |
||||
self.with_extra_blocks = with_extra_blocks |
||||
self.extra_block_filters = extra_block_filters |
||||
|
||||
self._out_channels = [] |
||||
|
||||
self.conv1 = ConvBNLayer( |
||||
in_channels=3, |
||||
out_channels=int(32 * scale), |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv1") |
||||
|
||||
self.dwsl = [] |
||||
dws21 = self.add_sublayer( |
||||
"conv2_1", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(32 * scale), |
||||
out_channels1=32, |
||||
out_channels2=64, |
||||
num_groups=32, |
||||
stride=1, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv2_1")) |
||||
self.dwsl.append(dws21) |
||||
self._update_out_channels( |
||||
int(64 * scale), len(self.dwsl), feature_maps) |
||||
dws22 = self.add_sublayer( |
||||
"conv2_2", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(64 * scale), |
||||
out_channels1=64, |
||||
out_channels2=128, |
||||
num_groups=64, |
||||
stride=2, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv2_2")) |
||||
self.dwsl.append(dws22) |
||||
self._update_out_channels( |
||||
int(128 * scale), len(self.dwsl), feature_maps) |
||||
# 1/4 |
||||
dws31 = self.add_sublayer( |
||||
"conv3_1", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(128 * scale), |
||||
out_channels1=128, |
||||
out_channels2=128, |
||||
num_groups=128, |
||||
stride=1, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv3_1")) |
||||
self.dwsl.append(dws31) |
||||
self._update_out_channels( |
||||
int(128 * scale), len(self.dwsl), feature_maps) |
||||
dws32 = self.add_sublayer( |
||||
"conv3_2", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(128 * scale), |
||||
out_channels1=128, |
||||
out_channels2=256, |
||||
num_groups=128, |
||||
stride=2, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv3_2")) |
||||
self.dwsl.append(dws32) |
||||
self._update_out_channels( |
||||
int(256 * scale), len(self.dwsl), feature_maps) |
||||
# 1/8 |
||||
dws41 = self.add_sublayer( |
||||
"conv4_1", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(256 * scale), |
||||
out_channels1=256, |
||||
out_channels2=256, |
||||
num_groups=256, |
||||
stride=1, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv4_1")) |
||||
self.dwsl.append(dws41) |
||||
self._update_out_channels( |
||||
int(256 * scale), len(self.dwsl), feature_maps) |
||||
dws42 = self.add_sublayer( |
||||
"conv4_2", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(256 * scale), |
||||
out_channels1=256, |
||||
out_channels2=512, |
||||
num_groups=256, |
||||
stride=2, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv4_2")) |
||||
self.dwsl.append(dws42) |
||||
self._update_out_channels( |
||||
int(512 * scale), len(self.dwsl), feature_maps) |
||||
# 1/16 |
||||
for i in range(5): |
||||
tmp = self.add_sublayer( |
||||
"conv5_" + str(i + 1), |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(512 * scale), |
||||
out_channels1=512, |
||||
out_channels2=512, |
||||
num_groups=512, |
||||
stride=1, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv5_" + str(i + 1))) |
||||
self.dwsl.append(tmp) |
||||
self._update_out_channels( |
||||
int(512 * scale), len(self.dwsl), feature_maps) |
||||
dws56 = self.add_sublayer( |
||||
"conv5_6", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(512 * scale), |
||||
out_channels1=512, |
||||
out_channels2=1024, |
||||
num_groups=512, |
||||
stride=2, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv5_6")) |
||||
self.dwsl.append(dws56) |
||||
self._update_out_channels( |
||||
int(1024 * scale), len(self.dwsl), feature_maps) |
||||
# 1/32 |
||||
dws6 = self.add_sublayer( |
||||
"conv6", |
||||
sublayer=DepthwiseSeparable( |
||||
in_channels=int(1024 * scale), |
||||
out_channels1=1024, |
||||
out_channels2=1024, |
||||
num_groups=1024, |
||||
stride=1, |
||||
scale=scale, |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv6")) |
||||
self.dwsl.append(dws6) |
||||
self._update_out_channels( |
||||
int(1024 * scale), len(self.dwsl), feature_maps) |
||||
|
||||
if self.with_extra_blocks: |
||||
self.extra_blocks = [] |
||||
for i, block_filter in enumerate(self.extra_block_filters): |
||||
in_c = 1024 if i == 0 else self.extra_block_filters[i - 1][1] |
||||
conv_extra = self.add_sublayer( |
||||
"conv7_" + str(i + 1), |
||||
sublayer=ExtraBlock( |
||||
in_c, |
||||
block_filter[0], |
||||
block_filter[1], |
||||
conv_lr=conv_learning_rate, |
||||
conv_decay=conv_decay, |
||||
norm_decay=norm_decay, |
||||
norm_type=norm_type, |
||||
name="conv7_" + str(i + 1))) |
||||
self.extra_blocks.append(conv_extra) |
||||
self._update_out_channels( |
||||
block_filter[1], |
||||
len(self.dwsl) + len(self.extra_blocks), feature_maps) |
||||
|
||||
def _update_out_channels(self, channel, feature_idx, feature_maps): |
||||
if feature_idx in feature_maps: |
||||
self._out_channels.append(channel) |
||||
|
||||
def forward(self, inputs): |
||||
outs = [] |
||||
y = self.conv1(inputs['image']) |
||||
for i, block in enumerate(self.dwsl): |
||||
y = block(y) |
||||
if i + 1 in self.feature_maps: |
||||
outs.append(y) |
||||
|
||||
if not self.with_extra_blocks: |
||||
return outs |
||||
|
||||
y = outs[-1] |
||||
for i, block in enumerate(self.extra_blocks): |
||||
idx = i + len(self.dwsl) |
||||
y = block(y) |
||||
if idx + 1 in self.feature_maps: |
||||
outs.append(y) |
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
@ -0,0 +1,479 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from numbers import Integral |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['MobileNetV3'] |
||||
|
||||
|
||||
def make_divisible(v, divisor=8, min_value=None): |
||||
if min_value is None: |
||||
min_value = divisor |
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) |
||||
if new_v < 0.9 * v: |
||||
new_v += divisor |
||||
return new_v |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
in_c, |
||||
out_c, |
||||
filter_size, |
||||
stride, |
||||
padding, |
||||
num_groups=1, |
||||
act=None, |
||||
lr_mult=1., |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
name=""): |
||||
super(ConvBNLayer, self).__init__() |
||||
self.act = act |
||||
self.conv = nn.Conv2D( |
||||
in_channels=in_c, |
||||
out_channels=out_c, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=num_groups, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)), |
||||
bias_attr=False) |
||||
|
||||
norm_lr = 0. if freeze_norm else lr_mult |
||||
param_attr = ParamAttr( |
||||
learning_rate=norm_lr, |
||||
regularizer=L2Decay(norm_decay), |
||||
trainable=False if freeze_norm else True) |
||||
bias_attr = ParamAttr( |
||||
learning_rate=norm_lr, |
||||
regularizer=L2Decay(norm_decay), |
||||
trainable=False if freeze_norm else True) |
||||
global_stats = True if freeze_norm else None |
||||
if norm_type in ['sync_bn', 'bn']: |
||||
self.bn = nn.BatchNorm2D( |
||||
out_c, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr, |
||||
use_global_stats=global_stats) |
||||
norm_params = self.bn.parameters() |
||||
if freeze_norm: |
||||
for param in norm_params: |
||||
param.stop_gradient = True |
||||
|
||||
def forward(self, x): |
||||
x = self.conv(x) |
||||
x = self.bn(x) |
||||
if self.act is not None: |
||||
if self.act == "relu": |
||||
x = F.relu(x) |
||||
elif self.act == "relu6": |
||||
x = F.relu6(x) |
||||
elif self.act == "hard_swish": |
||||
x = F.hardswish(x) |
||||
else: |
||||
raise NotImplementedError( |
||||
"The activation function is selected incorrectly.") |
||||
return x |
||||
|
||||
|
||||
class ResidualUnit(nn.Layer): |
||||
def __init__(self, |
||||
in_c, |
||||
mid_c, |
||||
out_c, |
||||
filter_size, |
||||
stride, |
||||
use_se, |
||||
lr_mult, |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
act=None, |
||||
return_list=False, |
||||
name=''): |
||||
super(ResidualUnit, self).__init__() |
||||
self.if_shortcut = stride == 1 and in_c == out_c |
||||
self.use_se = use_se |
||||
self.return_list = return_list |
||||
|
||||
self.expand_conv = ConvBNLayer( |
||||
in_c=in_c, |
||||
out_c=mid_c, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
act=act, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_expand") |
||||
self.bottleneck_conv = ConvBNLayer( |
||||
in_c=mid_c, |
||||
out_c=mid_c, |
||||
filter_size=filter_size, |
||||
stride=stride, |
||||
padding=int((filter_size - 1) // 2), |
||||
num_groups=mid_c, |
||||
act=act, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_depthwise") |
||||
if self.use_se: |
||||
self.mid_se = SEModule( |
||||
mid_c, lr_mult, conv_decay, name=name + "_se") |
||||
self.linear_conv = ConvBNLayer( |
||||
in_c=mid_c, |
||||
out_c=out_c, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
act=None, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_linear") |
||||
|
||||
def forward(self, inputs): |
||||
y = self.expand_conv(inputs) |
||||
x = self.bottleneck_conv(y) |
||||
if self.use_se: |
||||
x = self.mid_se(x) |
||||
x = self.linear_conv(x) |
||||
if self.if_shortcut: |
||||
x = paddle.add(inputs, x) |
||||
if self.return_list: |
||||
return [y, x] |
||||
else: |
||||
return x |
||||
|
||||
|
||||
class SEModule(nn.Layer): |
||||
def __init__(self, channel, lr_mult, conv_decay, reduction=4, name=""): |
||||
super(SEModule, self).__init__() |
||||
self.avg_pool = nn.AdaptiveAvgPool2D(1) |
||||
mid_channels = int(channel // reduction) |
||||
self.conv1 = nn.Conv2D( |
||||
in_channels=channel, |
||||
out_channels=mid_channels, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)), |
||||
bias_attr=ParamAttr( |
||||
learning_rate=lr_mult, regularizer=L2Decay(conv_decay))) |
||||
self.conv2 = nn.Conv2D( |
||||
in_channels=mid_channels, |
||||
out_channels=channel, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
weight_attr=ParamAttr( |
||||
learning_rate=lr_mult, regularizer=L2Decay(conv_decay)), |
||||
bias_attr=ParamAttr( |
||||
learning_rate=lr_mult, regularizer=L2Decay(conv_decay))) |
||||
|
||||
def forward(self, inputs): |
||||
outputs = self.avg_pool(inputs) |
||||
outputs = self.conv1(outputs) |
||||
outputs = F.relu(outputs) |
||||
outputs = self.conv2(outputs) |
||||
outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) |
||||
return paddle.multiply(x=inputs, y=outputs) |
||||
|
||||
|
||||
class ExtraBlockDW(nn.Layer): |
||||
def __init__(self, |
||||
in_c, |
||||
ch_1, |
||||
ch_2, |
||||
stride, |
||||
lr_mult, |
||||
conv_decay=0., |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
name=None): |
||||
super(ExtraBlockDW, self).__init__() |
||||
self.pointwise_conv = ConvBNLayer( |
||||
in_c=in_c, |
||||
out_c=ch_1, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding='SAME', |
||||
act='relu6', |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_extra1") |
||||
self.depthwise_conv = ConvBNLayer( |
||||
in_c=ch_1, |
||||
out_c=ch_2, |
||||
filter_size=3, |
||||
stride=stride, |
||||
padding='SAME', |
||||
num_groups=int(ch_1), |
||||
act='relu6', |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_extra2_dw") |
||||
self.normal_conv = ConvBNLayer( |
||||
in_c=ch_2, |
||||
out_c=ch_2, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding='SAME', |
||||
act='relu6', |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name=name + "_extra2_sep") |
||||
|
||||
def forward(self, inputs): |
||||
x = self.pointwise_conv(inputs) |
||||
x = self.depthwise_conv(x) |
||||
x = self.normal_conv(x) |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class MobileNetV3(nn.Layer): |
||||
__shared__ = ['norm_type'] |
||||
|
||||
def __init__(self, |
||||
scale=1.0, |
||||
model_name="large", |
||||
feature_maps=[6, 12, 15], |
||||
with_extra_blocks=False, |
||||
extra_block_filters=[[256, 512], [128, 256], [128, 256], |
||||
[64, 128]], |
||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], |
||||
conv_decay=0.0, |
||||
multiplier=1.0, |
||||
norm_type='bn', |
||||
norm_decay=0.0, |
||||
freeze_norm=False): |
||||
super(MobileNetV3, self).__init__() |
||||
if isinstance(feature_maps, Integral): |
||||
feature_maps = [feature_maps] |
||||
if norm_type == 'sync_bn' and freeze_norm: |
||||
raise ValueError( |
||||
"The norm_type should not be sync_bn when freeze_norm is True") |
||||
self.feature_maps = feature_maps |
||||
self.with_extra_blocks = with_extra_blocks |
||||
self.extra_block_filters = extra_block_filters |
||||
|
||||
inplanes = 16 |
||||
if model_name == "large": |
||||
self.cfg = [ |
||||
# k, exp, c, se, nl, s, |
||||
[3, 16, 16, False, "relu", 1], |
||||
[3, 64, 24, False, "relu", 2], |
||||
[3, 72, 24, False, "relu", 1], |
||||
[5, 72, 40, True, "relu", 2], # RCNN output |
||||
[5, 120, 40, True, "relu", 1], |
||||
[5, 120, 40, True, "relu", 1], # YOLOv3 output |
||||
[3, 240, 80, False, "hard_swish", 2], # RCNN output |
||||
[3, 200, 80, False, "hard_swish", 1], |
||||
[3, 184, 80, False, "hard_swish", 1], |
||||
[3, 184, 80, False, "hard_swish", 1], |
||||
[3, 480, 112, True, "hard_swish", 1], |
||||
[3, 672, 112, True, "hard_swish", 1], # YOLOv3 output |
||||
[5, 672, 160, True, "hard_swish", |
||||
2], # SSD/SSDLite/RCNN output |
||||
[5, 960, 160, True, "hard_swish", 1], |
||||
[5, 960, 160, True, "hard_swish", 1], # YOLOv3 output |
||||
] |
||||
elif model_name == "small": |
||||
self.cfg = [ |
||||
# k, exp, c, se, nl, s, |
||||
[3, 16, 16, True, "relu", 2], |
||||
[3, 72, 24, False, "relu", 2], # RCNN output |
||||
[3, 88, 24, False, "relu", 1], # YOLOv3 output |
||||
[5, 96, 40, True, "hard_swish", 2], # RCNN output |
||||
[5, 240, 40, True, "hard_swish", 1], |
||||
[5, 240, 40, True, "hard_swish", 1], |
||||
[5, 120, 48, True, "hard_swish", 1], |
||||
[5, 144, 48, True, "hard_swish", 1], # YOLOv3 output |
||||
[5, 288, 96, True, "hard_swish", 2], # SSD/SSDLite/RCNN output |
||||
[5, 576, 96, True, "hard_swish", 1], |
||||
[5, 576, 96, True, "hard_swish", 1], # YOLOv3 output |
||||
] |
||||
else: |
||||
raise NotImplementedError( |
||||
"mode[{}_model] is not implemented!".format(model_name)) |
||||
|
||||
if multiplier != 1.0: |
||||
self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier) |
||||
self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier) |
||||
self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier) |
||||
self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier) |
||||
self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier) |
||||
|
||||
self.conv1 = ConvBNLayer( |
||||
in_c=3, |
||||
out_c=make_divisible(inplanes * scale), |
||||
filter_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
num_groups=1, |
||||
act="hard_swish", |
||||
lr_mult=lr_mult_list[0], |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="conv1") |
||||
|
||||
self._out_channels = [] |
||||
self.block_list = [] |
||||
i = 0 |
||||
inplanes = make_divisible(inplanes * scale) |
||||
for (k, exp, c, se, nl, s) in self.cfg: |
||||
lr_idx = min(i // 3, len(lr_mult_list) - 1) |
||||
lr_mult = lr_mult_list[lr_idx] |
||||
|
||||
# for SSD/SSDLite, first head input is after ResidualUnit expand_conv |
||||
return_list = self.with_extra_blocks and i + 2 in self.feature_maps |
||||
|
||||
block = self.add_sublayer( |
||||
"conv" + str(i + 2), |
||||
sublayer=ResidualUnit( |
||||
in_c=inplanes, |
||||
mid_c=make_divisible(scale * exp), |
||||
out_c=make_divisible(scale * c), |
||||
filter_size=k, |
||||
stride=s, |
||||
use_se=se, |
||||
act=nl, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
return_list=return_list, |
||||
name="conv" + str(i + 2))) |
||||
self.block_list.append(block) |
||||
inplanes = make_divisible(scale * c) |
||||
i += 1 |
||||
self._update_out_channels( |
||||
make_divisible(scale * exp) |
||||
if return_list else inplanes, i + 1, feature_maps) |
||||
|
||||
if self.with_extra_blocks: |
||||
self.extra_block_list = [] |
||||
extra_out_c = make_divisible(scale * self.cfg[-1][1]) |
||||
lr_idx = min(i // 3, len(lr_mult_list) - 1) |
||||
lr_mult = lr_mult_list[lr_idx] |
||||
|
||||
conv_extra = self.add_sublayer( |
||||
"conv" + str(i + 2), |
||||
sublayer=ConvBNLayer( |
||||
in_c=inplanes, |
||||
out_c=extra_out_c, |
||||
filter_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
num_groups=1, |
||||
act="hard_swish", |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name="conv" + str(i + 2))) |
||||
self.extra_block_list.append(conv_extra) |
||||
i += 1 |
||||
self._update_out_channels(extra_out_c, i + 1, feature_maps) |
||||
|
||||
for j, block_filter in enumerate(self.extra_block_filters): |
||||
in_c = extra_out_c if j == 0 else self.extra_block_filters[ |
||||
j - 1][1] |
||||
conv_extra = self.add_sublayer( |
||||
"conv" + str(i + 2), |
||||
sublayer=ExtraBlockDW( |
||||
in_c, |
||||
block_filter[0], |
||||
block_filter[1], |
||||
stride=2, |
||||
lr_mult=lr_mult, |
||||
conv_decay=conv_decay, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
name='conv' + str(i + 2))) |
||||
self.extra_block_list.append(conv_extra) |
||||
i += 1 |
||||
self._update_out_channels(block_filter[1], i + 1, feature_maps) |
||||
|
||||
def _update_out_channels(self, channel, feature_idx, feature_maps): |
||||
if feature_idx in feature_maps: |
||||
self._out_channels.append(channel) |
||||
|
||||
def forward(self, inputs): |
||||
x = self.conv1(inputs['image']) |
||||
outs = [] |
||||
for idx, block in enumerate(self.block_list): |
||||
x = block(x) |
||||
if idx + 2 in self.feature_maps: |
||||
if isinstance(x, list): |
||||
outs.append(x[0]) |
||||
x = x[1] |
||||
else: |
||||
outs.append(x) |
||||
|
||||
if not self.with_extra_blocks: |
||||
return outs |
||||
|
||||
for i, block in enumerate(self.extra_block_list): |
||||
idx = i + len(self.block_list) |
||||
x = block(x) |
||||
if idx + 2 in self.feature_maps: |
||||
outs.append(x) |
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
@ -0,0 +1,69 @@ |
||||
class NameAdapter(object): |
||||
"""Fix the backbones variable names for pretrained weight""" |
||||
|
||||
def __init__(self, model): |
||||
super(NameAdapter, self).__init__() |
||||
self.model = model |
||||
|
||||
@property |
||||
def model_type(self): |
||||
return getattr(self.model, '_model_type', '') |
||||
|
||||
@property |
||||
def variant(self): |
||||
return getattr(self.model, 'variant', '') |
||||
|
||||
def fix_conv_norm_name(self, name): |
||||
if name == "conv1": |
||||
bn_name = "bn_" + name |
||||
else: |
||||
bn_name = "bn" + name[3:] |
||||
# the naming rule is same as pretrained weight |
||||
if self.model_type == 'SEResNeXt': |
||||
bn_name = name + "_bn" |
||||
return bn_name |
||||
|
||||
def fix_shortcut_name(self, name): |
||||
if self.model_type == 'SEResNeXt': |
||||
name = 'conv' + name + '_prj' |
||||
return name |
||||
|
||||
def fix_bottleneck_name(self, name): |
||||
if self.model_type == 'SEResNeXt': |
||||
conv_name1 = 'conv' + name + '_x1' |
||||
conv_name2 = 'conv' + name + '_x2' |
||||
conv_name3 = 'conv' + name + '_x3' |
||||
shortcut_name = name |
||||
else: |
||||
conv_name1 = name + "_branch2a" |
||||
conv_name2 = name + "_branch2b" |
||||
conv_name3 = name + "_branch2c" |
||||
shortcut_name = name + "_branch1" |
||||
return conv_name1, conv_name2, conv_name3, shortcut_name |
||||
|
||||
def fix_basicblock_name(self, name): |
||||
if self.model_type == 'SEResNeXt': |
||||
conv_name1 = 'conv' + name + '_x1' |
||||
conv_name2 = 'conv' + name + '_x2' |
||||
shortcut_name = name |
||||
else: |
||||
conv_name1 = name + "_branch2a" |
||||
conv_name2 = name + "_branch2b" |
||||
shortcut_name = name + "_branch1" |
||||
return conv_name1, conv_name2, shortcut_name |
||||
|
||||
def fix_layer_warp_name(self, stage_num, count, i): |
||||
name = 'res' + str(stage_num) |
||||
if count > 10 and stage_num == 4: |
||||
if i == 0: |
||||
conv_name = name + "a" |
||||
else: |
||||
conv_name = name + "b" + str(i) |
||||
else: |
||||
conv_name = name + chr(ord("a") + i) |
||||
if self.model_type == 'SEResNeXt': |
||||
conv_name = str(stage_num + 2) + '_' + str(i + 1) |
||||
return conv_name |
||||
|
||||
def fix_c1_stage_name(self): |
||||
return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" |
@ -0,0 +1,358 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from numbers import Integral |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from ..shape_spec import ShapeSpec |
||||
from .resnet import ConvNormLayer |
||||
|
||||
__all__ = ['Res2Net', 'Res2NetC5'] |
||||
|
||||
Res2Net_cfg = { |
||||
50: [3, 4, 6, 3], |
||||
101: [3, 4, 23, 3], |
||||
152: [3, 8, 36, 3], |
||||
200: [3, 12, 48, 3] |
||||
} |
||||
|
||||
|
||||
class BottleNeck(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
stride, |
||||
shortcut, |
||||
width, |
||||
scales=4, |
||||
variant='b', |
||||
groups=1, |
||||
lr=1.0, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
dcn_v2=False): |
||||
super(BottleNeck, self).__init__() |
||||
|
||||
self.shortcut = shortcut |
||||
self.scales = scales |
||||
self.stride = stride |
||||
if not shortcut: |
||||
if variant == 'd' and stride == 2: |
||||
self.branch1 = nn.Sequential() |
||||
self.branch1.add_sublayer( |
||||
'pool', |
||||
nn.AvgPool2D( |
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)) |
||||
self.branch1.add_sublayer( |
||||
'conv', |
||||
ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr)) |
||||
else: |
||||
self.branch1 = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=stride, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.branch2a = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=width * scales, |
||||
filter_size=1, |
||||
stride=stride if variant == 'a' else 1, |
||||
groups=1, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.branch2b = nn.LayerList([ |
||||
ConvNormLayer( |
||||
ch_in=width, |
||||
ch_out=width, |
||||
filter_size=3, |
||||
stride=1 if variant == 'a' else stride, |
||||
groups=groups, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr, |
||||
dcn_v2=dcn_v2) for _ in range(self.scales - 1) |
||||
]) |
||||
|
||||
self.branch2c = ConvNormLayer( |
||||
ch_in=width * scales, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
groups=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
def forward(self, inputs): |
||||
|
||||
out = self.branch2a(inputs) |
||||
feature_split = paddle.split(out, self.scales, 1) |
||||
out_split = [] |
||||
for i in range(self.scales - 1): |
||||
if i == 0 or self.stride == 2: |
||||
out_split.append(self.branch2b[i](feature_split[i])) |
||||
else: |
||||
out_split.append(self.branch2b[i](paddle.add(feature_split[i], |
||||
out_split[-1]))) |
||||
if self.stride == 1: |
||||
out_split.append(feature_split[-1]) |
||||
else: |
||||
out_split.append( |
||||
F.avg_pool2d(feature_split[-1], 3, self.stride, 1)) |
||||
out = self.branch2c(paddle.concat(out_split, 1)) |
||||
|
||||
if self.shortcut: |
||||
short = inputs |
||||
else: |
||||
short = self.branch1(inputs) |
||||
|
||||
out = paddle.add(out, short) |
||||
out = F.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class Blocks(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
count, |
||||
stage_num, |
||||
width, |
||||
scales=4, |
||||
variant='b', |
||||
groups=1, |
||||
lr=1.0, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
dcn_v2=False): |
||||
super(Blocks, self).__init__() |
||||
|
||||
self.blocks = nn.Sequential() |
||||
for i in range(count): |
||||
self.blocks.add_sublayer( |
||||
str(i), |
||||
BottleNeck( |
||||
ch_in=ch_in if i == 0 else ch_out, |
||||
ch_out=ch_out, |
||||
stride=2 if i == 0 and stage_num != 2 else 1, |
||||
shortcut=False if i == 0 else True, |
||||
width=width * (2**(stage_num - 2)), |
||||
scales=scales, |
||||
variant=variant, |
||||
groups=groups, |
||||
lr=lr, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
dcn_v2=dcn_v2)) |
||||
|
||||
def forward(self, inputs): |
||||
return self.blocks(inputs) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class Res2Net(nn.Layer): |
||||
""" |
||||
Res2Net, see https://arxiv.org/abs/1904.01169 |
||||
Args: |
||||
depth (int): Res2Net depth, should be 50, 101, 152, 200. |
||||
width (int): Res2Net width |
||||
scales (int): Res2Net scale |
||||
variant (str): Res2Net variant, supports 'a', 'b', 'c', 'd' currently |
||||
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5), |
||||
lower learning rate ratio is need for pretrained model |
||||
got using distillation(default as [1.0, 1.0, 1.0, 1.0]). |
||||
groups (int): The groups number of the Conv Layer. |
||||
norm_type (str): normalization type, 'bn' or 'sync_bn' |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
freeze_norm (bool): freeze normalization layers |
||||
freeze_at (int): freeze the backbone at which stage |
||||
return_idx (list): index of stages whose feature maps are returned, |
||||
index 0 stands for res2 |
||||
dcn_v2_stages (list): index of stages who select deformable conv v2 |
||||
num_stages (int): number of stages created |
||||
|
||||
""" |
||||
__shared__ = ['norm_type'] |
||||
|
||||
def __init__(self, |
||||
depth=50, |
||||
width=26, |
||||
scales=4, |
||||
variant='b', |
||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0], |
||||
groups=1, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
freeze_at=0, |
||||
return_idx=[0, 1, 2, 3], |
||||
dcn_v2_stages=[-1], |
||||
num_stages=4): |
||||
super(Res2Net, self).__init__() |
||||
|
||||
self._model_type = 'Res2Net' if groups == 1 else 'Res2NeXt' |
||||
|
||||
assert depth in [50, 101, 152, 200], \ |
||||
"depth {} not in [50, 101, 152, 200]" |
||||
assert variant in ['a', 'b', 'c', 'd'], "invalid Res2Net variant" |
||||
assert num_stages >= 1 and num_stages <= 4 |
||||
|
||||
self.depth = depth |
||||
self.variant = variant |
||||
self.norm_type = norm_type |
||||
self.norm_decay = norm_decay |
||||
self.freeze_norm = freeze_norm |
||||
self.freeze_at = freeze_at |
||||
if isinstance(return_idx, Integral): |
||||
return_idx = [return_idx] |
||||
assert max(return_idx) < num_stages, \ |
||||
'the maximum return index must smaller than num_stages, ' \ |
||||
'but received maximum return index is {} and num_stages ' \ |
||||
'is {}'.format(max(return_idx), num_stages) |
||||
self.return_idx = return_idx |
||||
self.num_stages = num_stages |
||||
assert len(lr_mult_list) == 4, \ |
||||
"lr_mult_list length must be 4 but got {}".format(len(lr_mult_list)) |
||||
if isinstance(dcn_v2_stages, Integral): |
||||
dcn_v2_stages = [dcn_v2_stages] |
||||
assert max(dcn_v2_stages) < num_stages |
||||
self.dcn_v2_stages = dcn_v2_stages |
||||
|
||||
block_nums = Res2Net_cfg[depth] |
||||
|
||||
# C1 stage |
||||
if self.variant in ['c', 'd']: |
||||
conv_def = [ |
||||
[3, 32, 3, 2, "conv1_1"], |
||||
[32, 32, 3, 1, "conv1_2"], |
||||
[32, 64, 3, 1, "conv1_3"], |
||||
] |
||||
else: |
||||
conv_def = [[3, 64, 7, 2, "conv1"]] |
||||
self.res1 = nn.Sequential() |
||||
for (c_in, c_out, k, s, _name) in conv_def: |
||||
self.res1.add_sublayer( |
||||
_name, |
||||
ConvNormLayer( |
||||
ch_in=c_in, |
||||
ch_out=c_out, |
||||
filter_size=k, |
||||
stride=s, |
||||
groups=1, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=1.0)) |
||||
|
||||
self._in_channels = [64, 256, 512, 1024] |
||||
self._out_channels = [256, 512, 1024, 2048] |
||||
self._out_strides = [4, 8, 16, 32] |
||||
|
||||
# C2-C5 stages |
||||
self.res_layers = [] |
||||
for i in range(num_stages): |
||||
lr_mult = lr_mult_list[i] |
||||
stage_num = i + 2 |
||||
self.res_layers.append( |
||||
self.add_sublayer( |
||||
"res{}".format(stage_num), |
||||
Blocks( |
||||
self._in_channels[i], |
||||
self._out_channels[i], |
||||
count=block_nums[i], |
||||
stage_num=stage_num, |
||||
width=width, |
||||
scales=scales, |
||||
groups=groups, |
||||
lr=lr_mult, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
dcn_v2=(i in self.dcn_v2_stages)))) |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ |
||||
ShapeSpec( |
||||
channels=self._out_channels[i], stride=self._out_strides[i]) |
||||
for i in self.return_idx |
||||
] |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
res1 = self.res1(x) |
||||
x = F.max_pool2d(res1, kernel_size=3, stride=2, padding=1) |
||||
outs = [] |
||||
for idx, stage in enumerate(self.res_layers): |
||||
x = stage(x) |
||||
if idx == self.freeze_at: |
||||
x.stop_gradient = True |
||||
if idx in self.return_idx: |
||||
outs.append(x) |
||||
return outs |
||||
|
||||
|
||||
@register |
||||
class Res2NetC5(nn.Layer): |
||||
def __init__(self, depth=50, width=26, scales=4, variant='b'): |
||||
super(Res2NetC5, self).__init__() |
||||
feat_in, feat_out = [1024, 2048] |
||||
self.res5 = Blocks( |
||||
feat_in, |
||||
feat_out, |
||||
count=3, |
||||
stage_num=5, |
||||
width=width, |
||||
scales=scales, |
||||
variant=variant) |
||||
self.feat_out = feat_out |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec( |
||||
channels=self.feat_out, |
||||
stride=32, )] |
||||
|
||||
def forward(self, roi_feat, stage=0): |
||||
y = self.res5(roi_feat) |
||||
return y |
@ -0,0 +1,609 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import math |
||||
from numbers import Integral |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import Uniform |
||||
from paddle import ParamAttr |
||||
from paddle.nn.initializer import Constant |
||||
from paddle.vision.ops import DeformConv2D |
||||
from .name_adapter import NameAdapter |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['ResNet', 'Res5Head', 'Blocks', 'BasicBlock', 'BottleNeck'] |
||||
|
||||
ResNet_cfg = { |
||||
18: [2, 2, 2, 2], |
||||
34: [3, 4, 6, 3], |
||||
50: [3, 4, 6, 3], |
||||
101: [3, 4, 23, 3], |
||||
152: [3, 8, 36, 3], |
||||
} |
||||
|
||||
|
||||
class ConvNormLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size, |
||||
stride, |
||||
groups=1, |
||||
act=None, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
lr=1.0, |
||||
dcn_v2=False): |
||||
super(ConvNormLayer, self).__init__() |
||||
assert norm_type in ['bn', 'sync_bn'] |
||||
self.norm_type = norm_type |
||||
self.act = act |
||||
self.dcn_v2 = dcn_v2 |
||||
|
||||
if not self.dcn_v2: |
||||
self.conv = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
groups=groups, |
||||
weight_attr=ParamAttr(learning_rate=lr), |
||||
bias_attr=False) |
||||
else: |
||||
self.offset_channel = 2 * filter_size**2 |
||||
self.mask_channel = filter_size**2 |
||||
|
||||
self.conv_offset = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=3 * filter_size**2, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
weight_attr=ParamAttr(initializer=Constant(0.)), |
||||
bias_attr=ParamAttr(initializer=Constant(0.))) |
||||
self.conv = DeformConv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
dilation=1, |
||||
groups=groups, |
||||
weight_attr=ParamAttr(learning_rate=lr), |
||||
bias_attr=False) |
||||
|
||||
norm_lr = 0. if freeze_norm else lr |
||||
param_attr = ParamAttr( |
||||
learning_rate=norm_lr, |
||||
regularizer=L2Decay(norm_decay), |
||||
trainable=False if freeze_norm else True) |
||||
bias_attr = ParamAttr( |
||||
learning_rate=norm_lr, |
||||
regularizer=L2Decay(norm_decay), |
||||
trainable=False if freeze_norm else True) |
||||
|
||||
global_stats = True if freeze_norm else None |
||||
if norm_type in ['sync_bn', 'bn']: |
||||
self.norm = nn.BatchNorm2D( |
||||
ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr, |
||||
use_global_stats=global_stats) |
||||
norm_params = self.norm.parameters() |
||||
|
||||
if freeze_norm: |
||||
for param in norm_params: |
||||
param.stop_gradient = True |
||||
|
||||
def forward(self, inputs): |
||||
if not self.dcn_v2: |
||||
out = self.conv(inputs) |
||||
else: |
||||
offset_mask = self.conv_offset(inputs) |
||||
offset, mask = paddle.split( |
||||
offset_mask, |
||||
num_or_sections=[self.offset_channel, self.mask_channel], |
||||
axis=1) |
||||
mask = F.sigmoid(mask) |
||||
out = self.conv(inputs, offset, mask=mask) |
||||
|
||||
if self.norm_type in ['bn', 'sync_bn']: |
||||
out = self.norm(out) |
||||
if self.act: |
||||
out = getattr(F, self.act)(out) |
||||
return out |
||||
|
||||
|
||||
class SELayer(nn.Layer): |
||||
def __init__(self, ch, reduction_ratio=16): |
||||
super(SELayer, self).__init__() |
||||
self.pool = nn.AdaptiveAvgPool2D(1) |
||||
stdv = 1.0 / math.sqrt(ch) |
||||
c_ = ch // reduction_ratio |
||||
self.squeeze = nn.Linear( |
||||
ch, |
||||
c_, |
||||
weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)), |
||||
bias_attr=True) |
||||
|
||||
stdv = 1.0 / math.sqrt(c_) |
||||
self.extract = nn.Linear( |
||||
c_, |
||||
ch, |
||||
weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)), |
||||
bias_attr=True) |
||||
|
||||
def forward(self, inputs): |
||||
out = self.pool(inputs) |
||||
out = paddle.squeeze(out, axis=[2, 3]) |
||||
out = self.squeeze(out) |
||||
out = F.relu(out) |
||||
out = self.extract(out) |
||||
out = F.sigmoid(out) |
||||
out = paddle.unsqueeze(out, axis=[2, 3]) |
||||
scale = out * inputs |
||||
return scale |
||||
|
||||
|
||||
class BasicBlock(nn.Layer): |
||||
|
||||
expansion = 1 |
||||
|
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
stride, |
||||
shortcut, |
||||
variant='b', |
||||
groups=1, |
||||
base_width=64, |
||||
lr=1.0, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
dcn_v2=False, |
||||
std_senet=False): |
||||
super(BasicBlock, self).__init__() |
||||
assert groups == 1 and base_width == 64, 'BasicBlock only supports groups=1 and base_width=64' |
||||
|
||||
self.shortcut = shortcut |
||||
if not shortcut: |
||||
if variant == 'd' and stride == 2: |
||||
self.short = nn.Sequential() |
||||
self.short.add_sublayer( |
||||
'pool', |
||||
nn.AvgPool2D( |
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)) |
||||
self.short.add_sublayer( |
||||
'conv', |
||||
ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr)) |
||||
else: |
||||
self.short = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=stride, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.branch2a = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=3, |
||||
stride=stride, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.branch2b = ConvNormLayer( |
||||
ch_in=ch_out, |
||||
ch_out=ch_out, |
||||
filter_size=3, |
||||
stride=1, |
||||
act=None, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr, |
||||
dcn_v2=dcn_v2) |
||||
|
||||
self.std_senet = std_senet |
||||
if self.std_senet: |
||||
self.se = SELayer(ch_out) |
||||
|
||||
def forward(self, inputs): |
||||
out = self.branch2a(inputs) |
||||
out = self.branch2b(out) |
||||
if self.std_senet: |
||||
out = self.se(out) |
||||
|
||||
if self.shortcut: |
||||
short = inputs |
||||
else: |
||||
short = self.short(inputs) |
||||
|
||||
out = paddle.add(x=out, y=short) |
||||
out = F.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class BottleNeck(nn.Layer): |
||||
|
||||
expansion = 4 |
||||
|
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
stride, |
||||
shortcut, |
||||
variant='b', |
||||
groups=1, |
||||
base_width=4, |
||||
lr=1.0, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
dcn_v2=False, |
||||
std_senet=False): |
||||
super(BottleNeck, self).__init__() |
||||
if variant == 'a': |
||||
stride1, stride2 = stride, 1 |
||||
else: |
||||
stride1, stride2 = 1, stride |
||||
|
||||
# ResNeXt |
||||
width = int(ch_out * (base_width / 64.)) * groups |
||||
|
||||
self.shortcut = shortcut |
||||
if not shortcut: |
||||
if variant == 'd' and stride == 2: |
||||
self.short = nn.Sequential() |
||||
self.short.add_sublayer( |
||||
'pool', |
||||
nn.AvgPool2D( |
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)) |
||||
self.short.add_sublayer( |
||||
'conv', |
||||
ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out * self.expansion, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr)) |
||||
else: |
||||
self.short = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out * self.expansion, |
||||
filter_size=1, |
||||
stride=stride, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.branch2a = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=width, |
||||
filter_size=1, |
||||
stride=stride1, |
||||
groups=1, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.branch2b = ConvNormLayer( |
||||
ch_in=width, |
||||
ch_out=width, |
||||
filter_size=3, |
||||
stride=stride2, |
||||
groups=groups, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr, |
||||
dcn_v2=dcn_v2) |
||||
|
||||
self.branch2c = ConvNormLayer( |
||||
ch_in=width, |
||||
ch_out=ch_out * self.expansion, |
||||
filter_size=1, |
||||
stride=1, |
||||
groups=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=lr) |
||||
|
||||
self.std_senet = std_senet |
||||
if self.std_senet: |
||||
self.se = SELayer(ch_out * self.expansion) |
||||
|
||||
def forward(self, inputs): |
||||
|
||||
out = self.branch2a(inputs) |
||||
out = self.branch2b(out) |
||||
out = self.branch2c(out) |
||||
|
||||
if self.std_senet: |
||||
out = self.se(out) |
||||
|
||||
if self.shortcut: |
||||
short = inputs |
||||
else: |
||||
short = self.short(inputs) |
||||
|
||||
out = paddle.add(x=out, y=short) |
||||
out = F.relu(out) |
||||
|
||||
return out |
||||
|
||||
|
||||
class Blocks(nn.Layer): |
||||
def __init__(self, |
||||
block, |
||||
ch_in, |
||||
ch_out, |
||||
count, |
||||
name_adapter, |
||||
stage_num, |
||||
variant='b', |
||||
groups=1, |
||||
base_width=64, |
||||
lr=1.0, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
freeze_norm=True, |
||||
dcn_v2=False, |
||||
std_senet=False): |
||||
super(Blocks, self).__init__() |
||||
|
||||
self.blocks = [] |
||||
for i in range(count): |
||||
conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i) |
||||
layer = self.add_sublayer( |
||||
conv_name, |
||||
block( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
stride=2 if i == 0 and stage_num != 2 else 1, |
||||
shortcut=False if i == 0 else True, |
||||
variant=variant, |
||||
groups=groups, |
||||
base_width=base_width, |
||||
lr=lr, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
dcn_v2=dcn_v2, |
||||
std_senet=std_senet)) |
||||
self.blocks.append(layer) |
||||
if i == 0: |
||||
ch_in = ch_out * block.expansion |
||||
|
||||
def forward(self, inputs): |
||||
block_out = inputs |
||||
for block in self.blocks: |
||||
block_out = block(block_out) |
||||
return block_out |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class ResNet(nn.Layer): |
||||
__shared__ = ['norm_type'] |
||||
|
||||
def __init__(self, |
||||
depth=50, |
||||
ch_in=64, |
||||
variant='b', |
||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0], |
||||
groups=1, |
||||
base_width=64, |
||||
norm_type='bn', |
||||
norm_decay=0, |
||||
freeze_norm=True, |
||||
freeze_at=0, |
||||
return_idx=[0, 1, 2, 3], |
||||
dcn_v2_stages=[-1], |
||||
num_stages=4, |
||||
std_senet=False): |
||||
""" |
||||
Residual Network, see https://arxiv.org/abs/1512.03385 |
||||
|
||||
Args: |
||||
depth (int): ResNet depth, should be 18, 34, 50, 101, 152. |
||||
ch_in (int): output channel of first stage, default 64 |
||||
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently |
||||
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5), |
||||
lower learning rate ratio is need for pretrained model |
||||
got using distillation(default as [1.0, 1.0, 1.0, 1.0]). |
||||
groups (int): group convolution cardinality |
||||
base_width (int): base width of each group convolution |
||||
norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel' |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
freeze_norm (bool): freeze normalization layers |
||||
freeze_at (int): freeze the backbone at which stage |
||||
return_idx (list): index of the stages whose feature maps are returned |
||||
dcn_v2_stages (list): index of stages who select deformable conv v2 |
||||
num_stages (int): total num of stages |
||||
std_senet (bool): whether use senet, default True |
||||
""" |
||||
super(ResNet, self).__init__() |
||||
self._model_type = 'ResNet' if groups == 1 else 'ResNeXt' |
||||
assert num_stages >= 1 and num_stages <= 4 |
||||
self.depth = depth |
||||
self.variant = variant |
||||
self.groups = groups |
||||
self.base_width = base_width |
||||
self.norm_type = norm_type |
||||
self.norm_decay = norm_decay |
||||
self.freeze_norm = freeze_norm |
||||
self.freeze_at = freeze_at |
||||
if isinstance(return_idx, Integral): |
||||
return_idx = [return_idx] |
||||
assert max(return_idx) < num_stages, \ |
||||
'the maximum return index must smaller than num_stages, ' \ |
||||
'but received maximum return index is {} and num_stages ' \ |
||||
'is {}'.format(max(return_idx), num_stages) |
||||
self.return_idx = return_idx |
||||
self.num_stages = num_stages |
||||
assert len(lr_mult_list) == 4, \ |
||||
"lr_mult_list length must be 4 but got {}".format(len(lr_mult_list)) |
||||
if isinstance(dcn_v2_stages, Integral): |
||||
dcn_v2_stages = [dcn_v2_stages] |
||||
assert max(dcn_v2_stages) < num_stages |
||||
|
||||
if isinstance(dcn_v2_stages, Integral): |
||||
dcn_v2_stages = [dcn_v2_stages] |
||||
assert max(dcn_v2_stages) < num_stages |
||||
self.dcn_v2_stages = dcn_v2_stages |
||||
|
||||
block_nums = ResNet_cfg[depth] |
||||
na = NameAdapter(self) |
||||
|
||||
conv1_name = na.fix_c1_stage_name() |
||||
if variant in ['c', 'd']: |
||||
conv_def = [ |
||||
[3, ch_in // 2, 3, 2, "conv1_1"], |
||||
[ch_in // 2, ch_in // 2, 3, 1, "conv1_2"], |
||||
[ch_in // 2, ch_in, 3, 1, "conv1_3"], |
||||
] |
||||
else: |
||||
conv_def = [[3, ch_in, 7, 2, conv1_name]] |
||||
self.conv1 = nn.Sequential() |
||||
for (c_in, c_out, k, s, _name) in conv_def: |
||||
self.conv1.add_sublayer( |
||||
_name, |
||||
ConvNormLayer( |
||||
ch_in=c_in, |
||||
ch_out=c_out, |
||||
filter_size=k, |
||||
stride=s, |
||||
groups=1, |
||||
act='relu', |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
lr=1.0)) |
||||
|
||||
self.ch_in = ch_in |
||||
ch_out_list = [64, 128, 256, 512] |
||||
block = BottleNeck if depth >= 50 else BasicBlock |
||||
|
||||
self._out_channels = [block.expansion * v for v in ch_out_list] |
||||
self._out_strides = [4, 8, 16, 32] |
||||
|
||||
self.res_layers = [] |
||||
for i in range(num_stages): |
||||
lr_mult = lr_mult_list[i] |
||||
stage_num = i + 2 |
||||
res_name = "res{}".format(stage_num) |
||||
res_layer = self.add_sublayer( |
||||
res_name, |
||||
Blocks( |
||||
block, |
||||
self.ch_in, |
||||
ch_out_list[i], |
||||
count=block_nums[i], |
||||
name_adapter=na, |
||||
stage_num=stage_num, |
||||
variant=variant, |
||||
groups=groups, |
||||
base_width=base_width, |
||||
lr=lr_mult, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
dcn_v2=(i in self.dcn_v2_stages), |
||||
std_senet=std_senet)) |
||||
self.res_layers.append(res_layer) |
||||
self.ch_in = self._out_channels[i] |
||||
|
||||
if freeze_at >= 0: |
||||
self._freeze_parameters(self.conv1) |
||||
for i in range(min(freeze_at + 1, num_stages)): |
||||
self._freeze_parameters(self.res_layers[i]) |
||||
|
||||
def _freeze_parameters(self, m): |
||||
for p in m.parameters(): |
||||
p.stop_gradient = True |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ |
||||
ShapeSpec( |
||||
channels=self._out_channels[i], stride=self._out_strides[i]) |
||||
for i in self.return_idx |
||||
] |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
conv1 = self.conv1(x) |
||||
x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1) |
||||
outs = [] |
||||
for idx, stage in enumerate(self.res_layers): |
||||
x = stage(x) |
||||
if idx in self.return_idx: |
||||
outs.append(x) |
||||
return outs |
||||
|
||||
|
||||
@register |
||||
class Res5Head(nn.Layer): |
||||
def __init__(self, depth=50): |
||||
super(Res5Head, self).__init__() |
||||
feat_in, feat_out = [1024, 512] |
||||
if depth < 50: |
||||
feat_in = 256 |
||||
na = NameAdapter(self) |
||||
block = BottleNeck if depth >= 50 else BasicBlock |
||||
self.res5 = Blocks( |
||||
block, feat_in, feat_out, count=3, name_adapter=na, stage_num=5) |
||||
self.feat_out = feat_out if depth < 50 else feat_out * 4 |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec( |
||||
channels=self.feat_out, |
||||
stride=16, )] |
||||
|
||||
def forward(self, roi_feat, stage=0): |
||||
y = self.res5(roi_feat) |
||||
return y |
@ -0,0 +1,139 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle.nn as nn |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from .resnet import ResNet, Blocks, BasicBlock, BottleNeck |
||||
|
||||
__all__ = ['SENet', 'SERes5Head'] |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class SENet(ResNet): |
||||
__shared__ = ['norm_type'] |
||||
|
||||
def __init__(self, |
||||
depth=50, |
||||
variant='b', |
||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0], |
||||
groups=1, |
||||
base_width=64, |
||||
norm_type='bn', |
||||
norm_decay=0, |
||||
freeze_norm=True, |
||||
freeze_at=0, |
||||
return_idx=[0, 1, 2, 3], |
||||
dcn_v2_stages=[-1], |
||||
std_senet=True, |
||||
num_stages=4): |
||||
""" |
||||
Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507 |
||||
|
||||
Args: |
||||
depth (int): SENet depth, should be 50, 101, 152 |
||||
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently |
||||
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5), |
||||
lower learning rate ratio is need for pretrained model |
||||
got using distillation(default as [1.0, 1.0, 1.0, 1.0]). |
||||
groups (int): group convolution cardinality |
||||
base_width (int): base width of each group convolution |
||||
norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel' |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
freeze_norm (bool): freeze normalization layers |
||||
freeze_at (int): freeze the backbone at which stage |
||||
return_idx (list): index of the stages whose feature maps are returned |
||||
dcn_v2_stages (list): index of stages who select deformable conv v2 |
||||
std_senet (bool): whether use senet, default True |
||||
num_stages (int): total num of stages |
||||
""" |
||||
|
||||
super(SENet, self).__init__( |
||||
depth=depth, |
||||
variant=variant, |
||||
lr_mult_list=lr_mult_list, |
||||
ch_in=128, |
||||
groups=groups, |
||||
base_width=base_width, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
freeze_at=freeze_at, |
||||
return_idx=return_idx, |
||||
dcn_v2_stages=dcn_v2_stages, |
||||
std_senet=std_senet, |
||||
num_stages=num_stages) |
||||
|
||||
|
||||
@register |
||||
class SERes5Head(nn.Layer): |
||||
def __init__(self, |
||||
depth=50, |
||||
variant='b', |
||||
lr_mult=1.0, |
||||
groups=1, |
||||
base_width=64, |
||||
norm_type='bn', |
||||
norm_decay=0, |
||||
dcn_v2=False, |
||||
freeze_norm=False, |
||||
std_senet=True): |
||||
""" |
||||
SERes5Head layer |
||||
|
||||
Args: |
||||
depth (int): SENet depth, should be 50, 101, 152 |
||||
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently |
||||
lr_mult (list): learning rate ratio of SERes5Head, default as 1.0. |
||||
groups (int): group convolution cardinality |
||||
base_width (int): base width of each group convolution |
||||
norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel' |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
dcn_v2_stages (list): index of stages who select deformable conv v2 |
||||
std_senet (bool): whether use senet, default True |
||||
|
||||
""" |
||||
super(SERes5Head, self).__init__() |
||||
ch_out = 512 |
||||
ch_in = 256 if depth < 50 else 1024 |
||||
na = NameAdapter(self) |
||||
block = BottleNeck if depth >= 50 else BasicBlock |
||||
self.res5 = Blocks( |
||||
block, |
||||
ch_in, |
||||
ch_out, |
||||
count=3, |
||||
name_adapter=na, |
||||
stage_num=5, |
||||
variant=variant, |
||||
groups=groups, |
||||
base_width=base_width, |
||||
lr=lr_mult, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, |
||||
dcn_v2=dcn_v2, |
||||
std_senet=std_senet) |
||||
self.ch_out = ch_out * block.expansion |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec( |
||||
channels=self.ch_out, |
||||
stride=16, )] |
||||
|
||||
def forward(self, roi_feat): |
||||
y = self.res5(roi_feat) |
||||
return y |
@ -0,0 +1,251 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
from paddle import ParamAttr |
||||
import paddle.nn.functional as F |
||||
from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm2D |
||||
from paddle.nn.initializer import KaimingNormal |
||||
from paddle.regularizer import L2Decay |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from numbers import Integral |
||||
from ..shape_spec import ShapeSpec |
||||
from paddlers.models.ppdet.modeling.ops import channel_shuffle |
||||
|
||||
__all__ = ['ShuffleNetV2'] |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride, |
||||
padding, |
||||
groups=1, |
||||
act=None): |
||||
super(ConvBNLayer, self).__init__() |
||||
self._conv = Conv2D( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=groups, |
||||
weight_attr=ParamAttr(initializer=KaimingNormal()), |
||||
bias_attr=False) |
||||
|
||||
self._batch_norm = BatchNorm2D( |
||||
out_channels, |
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))) |
||||
if act == "hard_swish": |
||||
act = 'hardswish' |
||||
self.act = act |
||||
|
||||
def forward(self, inputs): |
||||
y = self._conv(inputs) |
||||
y = self._batch_norm(y) |
||||
if self.act: |
||||
y = getattr(F, self.act)(y) |
||||
return y |
||||
|
||||
|
||||
class InvertedResidual(nn.Layer): |
||||
def __init__(self, in_channels, out_channels, stride, act="relu"): |
||||
super(InvertedResidual, self).__init__() |
||||
self._conv_pw = ConvBNLayer( |
||||
in_channels=in_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
self._conv_dw = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=out_channels // 2, |
||||
act=None) |
||||
self._conv_linear = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
|
||||
def forward(self, inputs): |
||||
x1, x2 = paddle.split( |
||||
inputs, |
||||
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], |
||||
axis=1) |
||||
x2 = self._conv_pw(x2) |
||||
x2 = self._conv_dw(x2) |
||||
x2 = self._conv_linear(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
return channel_shuffle(out, 2) |
||||
|
||||
|
||||
class InvertedResidualDS(nn.Layer): |
||||
def __init__(self, in_channels, out_channels, stride, act="relu"): |
||||
super(InvertedResidualDS, self).__init__() |
||||
|
||||
# branch1 |
||||
self._conv_dw_1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=in_channels, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=in_channels, |
||||
act=None) |
||||
self._conv_linear_1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
# branch2 |
||||
self._conv_pw_2 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
self._conv_dw_2 = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=out_channels // 2, |
||||
act=None) |
||||
self._conv_linear_2 = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act) |
||||
|
||||
def forward(self, inputs): |
||||
x1 = self._conv_dw_1(inputs) |
||||
x1 = self._conv_linear_1(x1) |
||||
x2 = self._conv_pw_2(inputs) |
||||
x2 = self._conv_dw_2(x2) |
||||
x2 = self._conv_linear_2(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
|
||||
return channel_shuffle(out, 2) |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class ShuffleNetV2(nn.Layer): |
||||
def __init__(self, scale=1.0, act="relu", feature_maps=[5, 13, 17]): |
||||
super(ShuffleNetV2, self).__init__() |
||||
self.scale = scale |
||||
if isinstance(feature_maps, Integral): |
||||
feature_maps = [feature_maps] |
||||
self.feature_maps = feature_maps |
||||
stage_repeats = [4, 8, 4] |
||||
|
||||
if scale == 0.25: |
||||
stage_out_channels = [-1, 24, 24, 48, 96, 512] |
||||
elif scale == 0.33: |
||||
stage_out_channels = [-1, 24, 32, 64, 128, 512] |
||||
elif scale == 0.5: |
||||
stage_out_channels = [-1, 24, 48, 96, 192, 1024] |
||||
elif scale == 1.0: |
||||
stage_out_channels = [-1, 24, 116, 232, 464, 1024] |
||||
elif scale == 1.5: |
||||
stage_out_channels = [-1, 24, 176, 352, 704, 1024] |
||||
elif scale == 2.0: |
||||
stage_out_channels = [-1, 24, 224, 488, 976, 2048] |
||||
else: |
||||
raise NotImplementedError("This scale size:[" + str(scale) + |
||||
"] is not implemented!") |
||||
|
||||
self._out_channels = [] |
||||
self._feature_idx = 0 |
||||
# 1. conv1 |
||||
self._conv1 = ConvBNLayer( |
||||
in_channels=3, |
||||
out_channels=stage_out_channels[1], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
act=act) |
||||
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) |
||||
self._feature_idx += 1 |
||||
|
||||
# 2. bottleneck sequences |
||||
self._block_list = [] |
||||
for stage_id, num_repeat in enumerate(stage_repeats): |
||||
for i in range(num_repeat): |
||||
if i == 0: |
||||
block = self.add_sublayer( |
||||
name=str(stage_id + 2) + '_' + str(i + 1), |
||||
sublayer=InvertedResidualDS( |
||||
in_channels=stage_out_channels[stage_id + 1], |
||||
out_channels=stage_out_channels[stage_id + 2], |
||||
stride=2, |
||||
act=act)) |
||||
else: |
||||
block = self.add_sublayer( |
||||
name=str(stage_id + 2) + '_' + str(i + 1), |
||||
sublayer=InvertedResidual( |
||||
in_channels=stage_out_channels[stage_id + 2], |
||||
out_channels=stage_out_channels[stage_id + 2], |
||||
stride=1, |
||||
act=act)) |
||||
self._block_list.append(block) |
||||
self._feature_idx += 1 |
||||
self._update_out_channels(stage_out_channels[stage_id + 2], |
||||
self._feature_idx, self.feature_maps) |
||||
|
||||
def _update_out_channels(self, channel, feature_idx, feature_maps): |
||||
if feature_idx in feature_maps: |
||||
self._out_channels.append(channel) |
||||
|
||||
def forward(self, inputs): |
||||
y = self._conv1(inputs['image']) |
||||
y = self._max_pool(y) |
||||
outs = [] |
||||
for i, inv in enumerate(self._block_list): |
||||
y = inv(y) |
||||
if i + 2 in self.feature_maps: |
||||
outs.append(y) |
||||
|
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self._out_channels] |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue