diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index f9d3d3f..b4a1701 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -26,16 +26,16 @@ jobs:
include:
- python-version: "3.7"
os: windows-latest
- gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/cp37/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
+ gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
- python-version: "3.7"
os: ubuntu-latest
- gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+ gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
- python-version: "3.8"
os: windows-latest
- gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/GDAL-3.3.3-cp38-cp38-win_amd64.whl
+ gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.3.3-cp38-cp38-win_amd64.whl
- python-version: "3.8"
os: ubuntu-latest
- gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+ gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
fail-fast: false
steps:
- uses: actions/checkout@v3
diff --git a/README.md b/README.md
index 013710d..98b0951 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ PaddleRS具有以下五大特色:
* 如果您发现任何PaddleRS存在的问题或是对PaddleRS有建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleRS/issues)向我们提出。
* 欢迎加入PaddleRS微信群:
-
+
## 产品矩阵
diff --git a/docs/images/whole_picture.png b/docs/images/whole_picture.png
index f99934e..963b854 100644
Binary files a/docs/images/whole_picture.png and b/docs/images/whole_picture.png differ
diff --git a/paddlers/models/hash.txt b/paddlers/models/hash.txt
index 3307510..922cec8 100644
--- a/paddlers/models/hash.txt
+++ b/paddlers/models/hash.txt
@@ -1 +1,2 @@
+ppdet ba2aad26e6bc1e5c2dad76ca96692a0d63eccfac
ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef
\ No newline at end of file
diff --git a/paddlers/models/ppdet/core/workspace.py b/paddlers/models/ppdet/core/workspace.py
index ec33b64..231532b 100644
--- a/paddlers/models/ppdet/core/workspace.py
+++ b/paddlers/models/ppdet/core/workspace.py
@@ -210,9 +210,17 @@ def create(cls_or_name, **kwargs):
assert type(cls_or_name) in [type, str
], "should be a class or name of a class"
name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
- assert name in global_config and \
- isinstance(global_config[name], SchemaDict), \
- "the module {} is not registered".format(name)
+ if name in global_config:
+ if isinstance(global_config[name], SchemaDict):
+ pass
+ elif hasattr(global_config[name], "__dict__"):
+ # support instance return directly
+ return global_config[name]
+ else:
+ raise ValueError("The module {} is not registered".format(name))
+ else:
+ raise ValueError("The module {} is not registered".format(name))
+
config = global_config[name]
cls = getattr(config.pymodule, name)
cls_kwargs = {}
diff --git a/paddlers/models/ppdet/data/__init__.py b/paddlers/models/ppdet/data/__init__.py
index 69dd9a7..11bc9e4 100644
--- a/paddlers/models/ppdet/data/__init__.py
+++ b/paddlers/models/ppdet/data/__init__.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from . import source
diff --git a/paddlers/models/ppdet/data/crop_utils/__init__.py b/paddlers/models/ppdet/data/crop_utils/__init__.py
index 97043fd..c747d3e 100644
--- a/paddlers/models/ppdet/data/crop_utils/__init__.py
+++ b/paddlers/models/ppdet/data/crop_utils/__init__.py
@@ -10,4 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License.
\ No newline at end of file
diff --git a/paddlers/models/ppdet/data/reader.py b/paddlers/models/ppdet/data/reader.py
index 8c7845f..a1922d0 100644
--- a/paddlers/models/ppdet/data/reader.py
+++ b/paddlers/models/ppdet/data/reader.py
@@ -23,7 +23,7 @@ else:
import numpy as np
from paddle.io import DataLoader, DistributedBatchSampler
-from paddle.fluid.dataloader.collate import default_collate_fn
+from .utils import default_collate_fn
from paddlers.models.ppdet.core.workspace import register
from . import transform
@@ -118,7 +118,7 @@ class BaseDataLoader(object):
collate_batch (bool): whether to collate batch in dataloader.
If set to True, the samples will collate into batch according
to the batch size. Otherwise, the ground-truth will not collate,
- which is used when the number of ground-truch is different in
+ which is used when the number of ground-truch is different in
samples.
use_shared_memory (bool): whether to use shared memory to
accelerate data loading, enable this only if you
@@ -144,7 +144,7 @@ class BaseDataLoader(object):
self._sample_transforms = Compose(
sample_transforms, num_classes=num_classes)
- # batch transfrom
+ # batch transfrom
self._batch_transforms = BatchCompose(batch_transforms, num_classes,
collate_batch)
self.batch_size = batch_size
diff --git a/paddlers/models/ppdet/data/shm_utils.py b/paddlers/models/ppdet/data/shm_utils.py
index 16e948c..5ff72eb 100644
--- a/paddlers/models/ppdet/data/shm_utils.py
+++ b/paddlers/models/ppdet/data/shm_utils.py
@@ -34,7 +34,10 @@ SHM_DEFAULT_MOUNT = '/dev/shm'
def _parse_size_in_M(size_str):
- num, unit = size_str[:-1], size_str[-1]
+ if size_str[-1] == 'B':
+ num, unit = size_str[:-2], size_str[-2]
+ else:
+ num, unit = size_str[:-1], size_str[-1]
assert unit in SIZE_UNIT, \
"unknown shm size unit {}".format(unit)
return float(num) * \
diff --git a/paddlers/models/ppdet/data/source/__init__.py b/paddlers/models/ppdet/data/source/__init__.py
index ad593c4..a0ca322 100644
--- a/paddlers/models/ppdet/data/source/__init__.py
+++ b/paddlers/models/ppdet/data/source/__init__.py
@@ -27,3 +27,4 @@ from .category import *
from .keypoint_coco import *
from .mot import *
from .sniper_coco import SniperCOCODataSet
+from .dataset import ImageFolder
diff --git a/paddlers/models/ppdet/data/source/category.py b/paddlers/models/ppdet/data/source/category.py
index cf03d8a..73628a6 100644
--- a/paddlers/models/ppdet/data/source/category.py
+++ b/paddlers/models/ppdet/data/source/category.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -39,24 +39,49 @@ def get_categories(metric_type, anno_file=None, arch=None):
if arch == 'keypoint_arch':
return (None, {'id': 'keypoint'})
+ if anno_file == None or (not os.path.isfile(anno_file)):
+ logger.warning(
+ "anno_file '{}' is None or not set or not exist, "
+ "please recheck TrainDataset/EvalDataset/TestDataset.anno_path, "
+ "otherwise the default categories will be used by metric_type.".
+ format(anno_file))
+
if metric_type.lower() == 'coco' or metric_type.lower(
) == 'rbox' or metric_type.lower() == 'snipercoco':
if anno_file and os.path.isfile(anno_file):
- # lazy import pycocotools here
- from pycocotools.coco import COCO
-
- coco = COCO(anno_file)
- cats = coco.loadCats(coco.getCatIds())
-
- clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
- catid2name = {cat['id']: cat['name'] for cat in cats}
+ if anno_file.endswith('json'):
+ # lazy import pycocotools here
+ from pycocotools.coco import COCO
+ coco = COCO(anno_file)
+ cats = coco.loadCats(coco.getCatIds())
+
+ clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
+ catid2name = {cat['id']: cat['name'] for cat in cats}
+
+ elif anno_file.endswith('txt'):
+ cats = []
+ with open(anno_file) as f:
+ for line in f.readlines():
+ cats.append(line.strip())
+ if cats[0] == 'background': cats = cats[1:]
+
+ clsid2catid = {i: i for i in range(len(cats))}
+ catid2name = {i: name for i, name in enumerate(cats)}
+
+ else:
+ raise ValueError("anno_file {} should be json or txt.".format(
+ anno_file))
return clsid2catid, catid2name
# anno file not exist, load default categories of COCO17
else:
if metric_type.lower() == 'rbox':
+ logger.warning(
+ "metric_type: {}, load default categories of DOTA.".format(
+ metric_type))
return _dota_category()
-
+ logger.warning("metric_type: {}, load default categories of COCO.".
+ format(metric_type))
return _coco17_category()
elif metric_type.lower() == 'voc':
@@ -77,6 +102,8 @@ def get_categories(metric_type, anno_file=None, arch=None):
# anno file not exist, load default categories of
# VOC all 20 categories
else:
+ logger.warning("metric_type: {}, load default categories of VOC.".
+ format(metric_type))
return _vocall_category()
elif metric_type.lower() == 'oid':
@@ -104,6 +131,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
return clsid2catid, catid2name
# anno file not exist, load default category 'pedestrian'.
else:
+ logger.warning(
+ "metric_type: {}, load default categories of pedestrian MOT.".
+ format(metric_type))
return _mot_category(category='pedestrian')
elif metric_type.lower() in ['kitti', 'bdd100kmot']:
@@ -122,6 +152,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
return clsid2catid, catid2name
# anno file not exist, load default categories of visdrone all 10 categories
else:
+ logger.warning(
+ "metric_type: {}, load default categories of VisDrone.".format(
+ metric_type))
return _visdrone_category()
else:
diff --git a/paddlers/models/ppdet/data/source/coco.py b/paddlers/models/ppdet/data/source/coco.py
index efaf61f..0024009 100644
--- a/paddlers/models/ppdet/data/source/coco.py
+++ b/paddlers/models/ppdet/data/source/coco.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import os
@@ -33,12 +33,13 @@ class COCODataSet(DetDataset):
anno_path (str): coco annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
- load_crowd (bool): whether to load crowded ground-truth.
+ load_crowd (bool): whether to load crowded ground-truth.
False as default
allow_empty (bool): whether to load empty entry. False as default
- empty_ratio (float): the ratio of empty record number to total
- record's, if empty_ratio is out of [0. ,1.), do not sample the
+ empty_ratio (float): the ratio of empty record number to total
+ record's, if empty_ratio is out of [0. ,1.), do not sample the
records and use all the empty entries. 1. as default
+ repeat (int): repeat times for dataset, use in benchmark.
"""
def __init__(self,
@@ -49,9 +50,15 @@ class COCODataSet(DetDataset):
sample_num=-1,
load_crowd=False,
allow_empty=False,
- empty_ratio=1.):
- super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
- data_fields, sample_num)
+ empty_ratio=1.,
+ repeat=1):
+ super(COCODataSet, self).__init__(
+ dataset_dir,
+ image_dir,
+ anno_path,
+ data_fields,
+ sample_num,
+ repeat=repeat)
self.load_image_only = False
self.load_semantic = False
self.load_crowd = load_crowd
@@ -138,25 +145,14 @@ class COCODataSet(DetDataset):
if not any(np.array(inst['bbox'])):
continue
- # read rbox anno or not
- is_rbox_anno = True if len(inst['bbox']) == 5 else False
- if is_rbox_anno:
- xc, yc, box_w, box_h, angle = inst['bbox']
- x1 = xc - box_w / 2.0
- y1 = yc - box_h / 2.0
- x2 = x1 + box_w
- y2 = y1 + box_h
- else:
- x1, y1, box_w, box_h = inst['bbox']
- x2 = x1 + box_w
- y2 = y1 + box_h
+ x1, y1, box_w, box_h = inst['bbox']
+ x2 = x1 + box_w
+ y2 = y1 + box_h
eps = 1e-5
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
inst['clean_bbox'] = [
round(float(x), 3) for x in [x1, y1, x2, y2]
]
- if is_rbox_anno:
- inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
bboxes.append(inst)
else:
logger.warning(
@@ -171,9 +167,6 @@ class COCODataSet(DetDataset):
is_empty = True
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
- if is_rbox_anno:
- gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
- gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = [None] * num_bbox
@@ -183,13 +176,10 @@ class COCODataSet(DetDataset):
catid = box['category_id']
gt_class[i][0] = self.catid2clsid[catid]
gt_bbox[i, :] = box['clean_bbox']
- # xc, yc, w, h, theta
- if is_rbox_anno:
- gt_rbox[i, :] = box['clean_rbox']
is_crowd[i][0] = box['iscrowd']
- # check RLE format
+ # check RLE format
if 'segmentation' in box and box['iscrowd'] == 1:
- gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
+ gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
elif 'segmentation' in box and box['segmentation']:
if not np.array(box['segmentation']
).size > 0 and not self.allow_empty:
@@ -206,21 +196,12 @@ class COCODataSet(DetDataset):
gt_poly) and not self.allow_empty:
continue
- if is_rbox_anno:
- gt_rec = {
- 'is_crowd': is_crowd,
- 'gt_class': gt_class,
- 'gt_bbox': gt_bbox,
- 'gt_rbox': gt_rbox,
- 'gt_poly': gt_poly,
- }
- else:
- gt_rec = {
- 'is_crowd': is_crowd,
- 'gt_class': gt_class,
- 'gt_bbox': gt_bbox,
- 'gt_poly': gt_poly,
- }
+ gt_rec = {
+ 'is_crowd': is_crowd,
+ 'gt_class': gt_class,
+ 'gt_bbox': gt_bbox,
+ 'gt_poly': gt_poly,
+ }
for k, v in gt_rec.items():
if k in self.data_fields:
@@ -247,3 +228,126 @@ class COCODataSet(DetDataset):
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs = records
+
+
+@register
+@serializable
+class SlicedCOCODataSet(COCODataSet):
+ """Sliced COCODataSet"""
+
+ def __init__(
+ self,
+ dataset_dir=None,
+ image_dir=None,
+ anno_path=None,
+ data_fields=['image'],
+ sample_num=-1,
+ load_crowd=False,
+ allow_empty=False,
+ empty_ratio=1.,
+ repeat=1,
+ sliced_size=[640, 640],
+ overlap_ratio=[0.25, 0.25], ):
+ super(SlicedCOCODataSet, self).__init__(
+ dataset_dir=dataset_dir,
+ image_dir=image_dir,
+ anno_path=anno_path,
+ data_fields=data_fields,
+ sample_num=sample_num,
+ load_crowd=load_crowd,
+ allow_empty=allow_empty,
+ empty_ratio=empty_ratio,
+ repeat=repeat, )
+ self.sliced_size = sliced_size
+ self.overlap_ratio = overlap_ratio
+
+ def parse_dataset(self):
+ anno_path = os.path.join(self.dataset_dir, self.anno_path)
+ image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+ assert anno_path.endswith('.json'), \
+ 'invalid coco annotation file: ' + anno_path
+ from pycocotools.coco import COCO
+ coco = COCO(anno_path)
+ img_ids = coco.getImgIds()
+ img_ids.sort()
+ cat_ids = coco.getCatIds()
+ records = []
+ empty_records = []
+ ct = 0
+ ct_sub = 0
+
+ self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+ self.cname2cid = dict({
+ coco.loadCats(catid)[0]['name']: clsid
+ for catid, clsid in self.catid2clsid.items()
+ })
+
+ if 'annotations' not in coco.dataset:
+ self.load_image_only = True
+ logger.warning('Annotation file: {} does not contains ground truth '
+ 'and load image information only.'.format(anno_path))
+ try:
+ import sahi
+ from sahi.slicing import slice_image
+ except Exception as e:
+ logger.error(
+ 'sahi not found, plaese install sahi. '
+ 'for example: `pip install sahi`, see https://github.com/obss/sahi.'
+ )
+ raise e
+
+ sub_img_ids = 0
+ for img_id in img_ids:
+ img_anno = coco.loadImgs([img_id])[0]
+ im_fname = img_anno['file_name']
+ im_w = float(img_anno['width'])
+ im_h = float(img_anno['height'])
+
+ im_path = os.path.join(image_dir,
+ im_fname) if image_dir else im_fname
+ is_empty = False
+ if not os.path.exists(im_path):
+ logger.warning('Illegal image file: {}, and it will be '
+ 'ignored'.format(im_path))
+ continue
+
+ if im_w < 0 or im_h < 0:
+ logger.warning('Illegal width: {} or height: {} in annotation, '
+ 'and im_id: {} will be ignored'.format(
+ im_w, im_h, img_id))
+ continue
+
+ slice_image_result = sahi.slicing.slice_image(
+ image=im_path,
+ slice_height=self.sliced_size[0],
+ slice_width=self.sliced_size[1],
+ overlap_height_ratio=self.overlap_ratio[0],
+ overlap_width_ratio=self.overlap_ratio[1])
+
+ sub_img_num = len(slice_image_result)
+ for _ind in range(sub_img_num):
+ im = slice_image_result.images[_ind]
+ coco_rec = {
+ 'image': im,
+ 'im_id': np.array([sub_img_ids + _ind]),
+ 'h': im.shape[0],
+ 'w': im.shape[1],
+ 'ori_im_id': np.array([img_id]),
+ 'st_pix': np.array(
+ slice_image_result.starting_pixels[_ind],
+ dtype=np.float32),
+ 'is_last': 1 if _ind == sub_img_num - 1 else 0,
+ } if 'image' in self.data_fields else {}
+ records.append(coco_rec)
+ ct_sub += sub_img_num
+ ct += 1
+ if self.sample_num > 0 and ct >= self.sample_num:
+ break
+ assert ct > 0, 'not found any coco record in %s' % (anno_path)
+ logger.info('{} samples and slice to {} sub_samples in file {}'.format(
+ ct, ct_sub, anno_path))
+ if self.allow_empty and len(empty_records) > 0:
+ empty_records = self._sample_empty(empty_records, len(records))
+ records += empty_records
+ self.roidbs = records
diff --git a/paddlers/models/ppdet/data/source/dataset.py b/paddlers/models/ppdet/data/source/dataset.py
index 7345d2b..bf3c0c7 100644
--- a/paddlers/models/ppdet/data/source/dataset.py
+++ b/paddlers/models/ppdet/data/source/dataset.py
@@ -1,20 +1,20 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import os
+import copy
import numpy as np
-
try:
from collections.abc import Sequence
except Exception:
@@ -22,7 +22,10 @@ except Exception:
from paddle.io import Dataset
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.utils.download import get_dataset_path
-import copy
+from paddlers.models.ppdet.data import source
+
+from paddlers.models.ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
@serializable
@@ -37,6 +40,7 @@ class DetDataset(Dataset):
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
use_default_label (bool): whether to load default label list.
+ repeat (int): repeat times for dataset, use in benchmark.
"""
def __init__(self,
@@ -46,6 +50,7 @@ class DetDataset(Dataset):
data_fields=['image'],
sample_num=-1,
use_default_label=None,
+ repeat=1,
**kwargs):
super(DetDataset, self).__init__()
self.dataset_dir = dataset_dir if dataset_dir is not None else ''
@@ -54,28 +59,32 @@ class DetDataset(Dataset):
self.data_fields = data_fields
self.sample_num = sample_num
self.use_default_label = use_default_label
+ self.repeat = repeat
self._epoch = 0
self._curr_iter = 0
def __len__(self, ):
- return len(self.roidbs)
+ return len(self.roidbs) * self.repeat
+
+ def __call__(self, *args, **kwargs):
+ return self
def __getitem__(self, idx):
+ n = len(self.roidbs)
+ if self.repeat > 1:
+ idx %= n
# data batch
roidb = copy.deepcopy(self.roidbs[idx])
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
- n = len(self.roidbs)
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
- n = len(self.roidbs)
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
- n = len(self.roidbs)
roidb = [roidb, ] + [
copy.deepcopy(self.roidbs[np.random.randint(n)])
- for _ in range(3)
+ for _ in range(4)
]
if isinstance(roidb, Sequence):
for r in roidb:
@@ -149,12 +158,15 @@ class ImageFolder(DetDataset):
self.sample_num = sample_num
def check_or_download_dataset(self):
+ return
+
+ def get_anno(self):
+ if self.anno_path is None:
+ return
if self.dataset_dir:
- # NOTE: ImageFolder is only used for prediction, in
- # infer mode, image_dir is set by set_images
- # so we only check anno_path here
- self.dataset_dir = get_dataset_path(self.dataset_dir,
- self.anno_path, None)
+ return os.path.join(self.dataset_dir, self.anno_path)
+ else:
+ return self.anno_path
def parse_dataset(self, ):
if not self.roidbs:
@@ -195,3 +207,93 @@ class ImageFolder(DetDataset):
def set_images(self, images):
self.image_dir = images
self.roidbs = self._load_images()
+
+ def set_slice_images(self,
+ images,
+ slice_size=[640, 640],
+ overlap_ratio=[0.25, 0.25]):
+ self.image_dir = images
+ ori_records = self._load_images()
+ try:
+ import sahi
+ from sahi.slicing import slice_image
+ except Exception as e:
+ logger.error(
+ 'sahi not found, plaese install sahi. '
+ 'for example: `pip install sahi`, see https://github.com/obss/sahi.'
+ )
+ raise e
+
+ sub_img_ids = 0
+ ct = 0
+ ct_sub = 0
+ records = []
+ for i, ori_rec in enumerate(ori_records):
+ im_path = ori_rec['im_file']
+ slice_image_result = sahi.slicing.slice_image(
+ image=im_path,
+ slice_height=slice_size[0],
+ slice_width=slice_size[1],
+ overlap_height_ratio=overlap_ratio[0],
+ overlap_width_ratio=overlap_ratio[1])
+
+ sub_img_num = len(slice_image_result)
+ for _ind in range(sub_img_num):
+ im = slice_image_result.images[_ind]
+ rec = {
+ 'image': im,
+ 'im_id': np.array([sub_img_ids + _ind]),
+ 'h': im.shape[0],
+ 'w': im.shape[1],
+ 'ori_im_id': np.array([ori_rec['im_id'][0]]),
+ 'st_pix': np.array(
+ slice_image_result.starting_pixels[_ind],
+ dtype=np.float32),
+ 'is_last': 1 if _ind == sub_img_num - 1 else 0,
+ } if 'image' in self.data_fields else {}
+ records.append(rec)
+ ct_sub += sub_img_num
+ ct += 1
+ print('{} samples and slice to {} sub_samples'.format(ct, ct_sub))
+ self.roidbs = records
+
+ def get_label_list(self):
+ # Only VOC dataset needs label list in ImageFold
+ return self.anno_path
+
+
+@register
+class CommonDataset(object):
+ def __init__(self, **dataset_args):
+ super(CommonDataset, self).__init__()
+ dataset_args = copy.deepcopy(dataset_args)
+ type = dataset_args.pop("name")
+ self.dataset = getattr(source, type)(**dataset_args)
+
+ def __call__(self):
+ return self.dataset
+
+
+@register
+class TrainDataset(CommonDataset):
+ pass
+
+
+@register
+class EvalMOTDataset(CommonDataset):
+ pass
+
+
+@register
+class TestMOTDataset(CommonDataset):
+ pass
+
+
+@register
+class EvalDataset(CommonDataset):
+ pass
+
+
+@register
+class TestDataset(CommonDataset):
+ pass
diff --git a/paddlers/models/ppdet/data/source/keypoint_coco.py b/paddlers/models/ppdet/data/source/keypoint_coco.py
index d51e674..e2c36d7 100644
--- a/paddlers/models/ppdet/data/source/keypoint_coco.py
+++ b/paddlers/models/ppdet/data/source/keypoint_coco.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
"""
this code is base on https://github.com/open-mmlab/mmpose
@@ -27,7 +27,7 @@ from paddlers.models.ppdet.core.workspace import register, serializable
@serializable
class KeypointBottomUpBaseDataset(DetDataset):
- """Base class for bottom-up datasets.
+ """Base class for bottom-up datasets.
All datasets should subclass it.
All subclasses should overwrite:
@@ -91,7 +91,7 @@ class KeypointBottomUpBaseDataset(DetDataset):
@register
@serializable
class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
- """COCO dataset for bottom-up pose estimation.
+ """COCO dataset for bottom-up pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
@@ -262,7 +262,7 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
@register
@serializable
class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
- """CrowdPose dataset for bottom-up pose estimation.
+ """CrowdPose dataset for bottom-up pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
@@ -386,7 +386,7 @@ class KeypointTopDownBaseDataset(DetDataset):
@register
@serializable
class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
- """COCO dataset for top-down pose estimation.
+ """COCO dataset for top-down pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
diff --git a/paddlers/models/ppdet/data/source/mot.py b/paddlers/models/ppdet/data/source/mot.py
index 3e96ba0..0386387 100644
--- a/paddlers/models/ppdet/data/source/mot.py
+++ b/paddlers/models/ppdet/data/source/mot.py
@@ -39,15 +39,16 @@ class MOTDataSet(DetDataset):
image_lists (str|list): mot data image lists, muiti-source mot dataset.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
+ repeat (int): repeat times for dataset, use in benchmark.
Notes:
MOT datasets root directory following this:
dataset/mot
|——————image_lists
- | |——————caltech.train
- | |——————caltech.val
- | |——————mot16.train
- | |——————mot17.train
+ | |——————caltech.train
+ | |——————caltech.val
+ | |——————mot16.train
+ | |——————mot17.train
| ......
|——————Caltech
|——————MOT17
@@ -77,11 +78,13 @@ class MOTDataSet(DetDataset):
dataset_dir=None,
image_lists=[],
data_fields=['image'],
- sample_num=-1):
+ sample_num=-1,
+ repeat=1):
super(MOTDataSet, self).__init__(
dataset_dir=dataset_dir,
data_fields=data_fields,
- sample_num=sample_num)
+ sample_num=sample_num,
+ repeat=repeat)
self.dataset_dir = dataset_dir
self.image_lists = image_lists
if isinstance(self.image_lists, str):
@@ -243,8 +246,8 @@ class MCMOTDataSet(DetDataset):
MCMOT datasets root directory following this:
dataset/mot
|——————image_lists
- | |——————visdrone_mcmot.train
- | |——————visdrone_mcmot.val
+ | |——————visdrone_mcmot.train
+ | |——————visdrone_mcmot.val
visdrone_mcmot
|——————images
| └——————train
@@ -348,10 +351,10 @@ class MCMOTDataSet(DetDataset):
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
self.total_imgs = sum(self.num_imgs_each_data)
- # cname2cid and cid2cname
+ # cname2cid and cid2cname
cname2cid = {}
if self.label_list is not None:
- # if use label_list for multi source mix dataset,
+ # if use label_list for multi source mix dataset,
# please make sure label_list in the first sub_dataset at least.
sub_dataset = self.image_lists[0].split('.')[0]
label_path = os.path.join(self.dataset_dir, sub_dataset,
@@ -461,7 +464,7 @@ class MOTImageFolder(DetDataset):
video_file (str): path of the video file, default ''.
frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
dataset_dir (str): root directory for dataset.
- keep_ori_im (bool): whether to keep original image, default False.
+ keep_ori_im (bool): whether to keep original image, default False.
Set True when used during MOT model inference while saving
images or video, or used in DeepSORT.
"""
@@ -474,6 +477,7 @@ class MOTImageFolder(DetDataset):
image_dir=None,
sample_num=-1,
keep_ori_im=False,
+ anno_path=None,
**kwargs):
super(MOTImageFolder, self).__init__(
dataset_dir, image_dir, sample_num=sample_num)
@@ -483,6 +487,7 @@ class MOTImageFolder(DetDataset):
self._imid2path = {}
self.roidbs = None
self.frame_rate = frame_rate
+ self.anno_path = anno_path
def check_or_download_dataset(self):
return
@@ -573,6 +578,9 @@ class MOTImageFolder(DetDataset):
"wrong or unsupported file format: {}".format(self.video_file)
self.roidbs = self._load_video_images()
+ def get_anno(self):
+ return self.anno_path
+
def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
return f.lower().endswith(extensions)
diff --git a/paddlers/models/ppdet/data/source/voc.py b/paddlers/models/ppdet/data/source/voc.py
index 41324c3..5a1e0cf 100644
--- a/paddlers/models/ppdet/data/source/voc.py
+++ b/paddlers/models/ppdet/data/source/voc.py
@@ -43,9 +43,10 @@ class VOCDataSet(DetDataset):
label_list (str): if use_default_label is False, will load
mapping between category and class index.
allow_empty (bool): whether to load empty entry. False as default
- empty_ratio (float): the ratio of empty record number to total
- record's, if empty_ratio is out of [0. ,1.), do not sample the
+ empty_ratio (float): the ratio of empty record number to total
+ record's, if empty_ratio is out of [0. ,1.), do not sample the
records and use all the empty entries. 1. as default
+ repeat (int): repeat times for dataset, use in benchmark.
"""
def __init__(self,
@@ -56,13 +57,15 @@ class VOCDataSet(DetDataset):
sample_num=-1,
label_list=None,
allow_empty=False,
- empty_ratio=1.):
+ empty_ratio=1.,
+ repeat=1):
super(VOCDataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
- sample_num=sample_num)
+ sample_num=sample_num,
+ repeat=repeat)
self.label_list = label_list
self.allow_empty = allow_empty
self.empty_ratio = empty_ratio
diff --git a/paddlers/models/ppdet/data/transform/__init__.py b/paddlers/models/ppdet/data/transform/__init__.py
index b6af6ae..58cec84 100644
--- a/paddlers/models/ppdet/data/transform/__init__.py
+++ b/paddlers/models/ppdet/data/transform/__init__.py
@@ -16,11 +16,13 @@ from . import operators
from . import batch_operators
from . import keypoint_operators
from . import mot_operators
+from . import rotated_operators
from .operators import *
from .batch_operators import *
from .keypoint_operators import *
from .mot_operators import *
+from .rotated_operators import *
__all__ = []
__all__ += registered_ops
diff --git a/paddlers/models/ppdet/data/transform/autoaugment_utils.py b/paddlers/models/ppdet/data/transform/autoaugment_utils.py
index 4fbfa4e..094f827 100644
--- a/paddlers/models/ppdet/data/transform/autoaugment_utils.py
+++ b/paddlers/models/ppdet/data/transform/autoaugment_utils.py
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-# Reference:
+# Reference:
# https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/autoaugment_utils.py
"""AutoAugment util file."""
@@ -65,7 +65,7 @@ def policy_v1():
[('ShearY_Only_BBoxes', 0.8, 2), ('Flip_Only_BBoxes', 0.0, 10)],
[('Equalize', 0.6, 10), ('TranslateX_BBox', 0.2, 2)],
[('Color', 1.0, 10), ('TranslateY_Only_BBoxes', 0.4, 6)],
- [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)], # ,
+ [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)], # ,
[('Cutout', 0.2, 2), ('Brightness', 0.8, 10)],
[('Color', 1.0, 6), ('Equalize', 1.0, 2)],
[('Cutout_Only_BBoxes', 0.4, 6), ('TranslateY_Only_BBoxes', 0.8, 2)],
diff --git a/paddlers/models/ppdet/data/transform/batch_operators.py b/paddlers/models/ppdet/data/transform/batch_operators.py
index 69a3e88..d64b94d 100644
--- a/paddlers/models/ppdet/data/transform/batch_operators.py
+++ b/paddlers/models/ppdet/data/transform/batch_operators.py
@@ -47,6 +47,8 @@ __all__ = [
'PadMaskBatch',
'Gt2GFLTarget',
'Gt2CenterNetTarget',
+ 'PadGT',
+ 'PadRGT',
]
@@ -108,12 +110,6 @@ class PadBatch(BaseOperator):
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
- if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
- # ploy to rbox
- polys = data['gt_rbox2poly']
- rbox = bbox_utils.poly2rbox(polys)
- data['gt_rbox'] = rbox
-
return samples
@@ -233,7 +229,7 @@ class Gt2YoloTarget(BaseOperator):
gi = int(gx * grid_w)
gj = int(gy * grid_h)
- # gtbox should be regresed in this layes if best match
+ # gtbox should be regresed in this layes if best match
# anchor index in anchor mask of this layer
if best_idx in mask:
best_n = mask.index(best_idx)
@@ -253,7 +249,7 @@ class Gt2YoloTarget(BaseOperator):
# classification
target[best_n, 6 + cls, gj, gi] = 1.
- # For non-matched anchors, calculate the target if the iou
+ # For non-matched anchors, calculate the target if the iou
# between anchor and gt is larger than iou_thresh
if self.iou_thresh < 1:
for idx, mask_i in enumerate(mask):
@@ -618,7 +614,7 @@ class Gt2TTFTarget(BaseOperator):
"""
Gt2TTFTarget
Generate TTFNet targets by ground truth data
-
+
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
@@ -980,12 +976,6 @@ class PadMaskBatch(BaseOperator):
padding_mask[:im_h, :im_w] = 1.
data['pad_mask'] = padding_mask
- if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
- # ploy to rbox
- polys = data['gt_rbox2poly']
- rbox = bbox_utils.poly2rbox(polys)
- data['gt_rbox'] = rbox
-
return samples
@@ -994,7 +984,7 @@ class Gt2CenterNetTarget(BaseOperator):
"""Gt2CenterNetTarget
Genterate CenterNet targets by ground-truth
Args:
- down_ratio (int): The down sample ratio between output feature and
+ down_ratio (int): The down sample ratio between output feature and
input image.
num_classes (int): The number of classes, 80 by default.
max_objs (int): The maximum objects detected, 128 by default.
@@ -1068,3 +1058,110 @@ class Gt2CenterNetTarget(BaseOperator):
sample['size'] = wh
sample['offset'] = reg
return sample
+
+
+@register_op
+class PadGT(BaseOperator):
+ """
+ Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
+ The num_max_boxes is the largest for batch.
+ Args:
+ return_gt_mask (bool): If true, return `pad_gt_mask`,
+ 1 means bbox, 0 means no bbox.
+ """
+
+ def __init__(self, return_gt_mask=True):
+ super(PadGT, self).__init__()
+ self.return_gt_mask = return_gt_mask
+
+ def __call__(self, samples, context=None):
+ num_max_boxes = max([len(s['gt_bbox']) for s in samples])
+ for sample in samples:
+ if self.return_gt_mask:
+ sample['pad_gt_mask'] = np.zeros(
+ (num_max_boxes, 1), dtype=np.float32)
+ if num_max_boxes == 0:
+ continue
+
+ num_gt = len(sample['gt_bbox'])
+ pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
+ pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
+ if num_gt > 0:
+ pad_gt_class[:num_gt] = sample['gt_class']
+ pad_gt_bbox[:num_gt] = sample['gt_bbox']
+ sample['gt_class'] = pad_gt_class
+ sample['gt_bbox'] = pad_gt_bbox
+ # pad_gt_mask
+ if 'pad_gt_mask' in sample:
+ sample['pad_gt_mask'][:num_gt] = 1
+ # gt_score
+ if 'gt_score' in sample:
+ pad_gt_score = np.zeros((num_max_boxes, 1), dtype=np.float32)
+ if num_gt > 0:
+ pad_gt_score[:num_gt] = sample['gt_score']
+ sample['gt_score'] = pad_gt_score
+ if 'is_crowd' in sample:
+ pad_is_crowd = np.zeros((num_max_boxes, 1), dtype=np.int32)
+ if num_gt > 0:
+ pad_is_crowd[:num_gt] = sample['is_crowd']
+ sample['is_crowd'] = pad_is_crowd
+ if 'difficult' in sample:
+ pad_diff = np.zeros((num_max_boxes, 1), dtype=np.int32)
+ if num_gt > 0:
+ pad_diff[:num_gt] = sample['difficult']
+ sample['difficult'] = pad_diff
+ return samples
+
+
+@register_op
+class PadRGT(BaseOperator):
+ """
+ Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
+ The num_max_boxes is the largest for batch.
+ Args:
+ return_gt_mask (bool): If true, return `pad_gt_mask`,
+ 1 means bbox, 0 means no bbox.
+ """
+
+ def __init__(self, return_gt_mask=True):
+ super(PadRGT, self).__init__()
+ self.return_gt_mask = return_gt_mask
+
+ def pad_field(self, sample, field, num_gt):
+ name, shape, dtype = field
+ if name in sample:
+ pad_v = np.zeros(shape, dtype=dtype)
+ if num_gt > 0:
+ pad_v[:num_gt] = sample[name]
+ sample[name] = pad_v
+
+ def __call__(self, samples, context=None):
+ num_max_boxes = max([len(s['gt_bbox']) for s in samples])
+ for sample in samples:
+ if self.return_gt_mask:
+ sample['pad_gt_mask'] = np.zeros(
+ (num_max_boxes, 1), dtype=np.float32)
+ if num_max_boxes == 0:
+ continue
+
+ num_gt = len(sample['gt_bbox'])
+ pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
+ pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
+ if num_gt > 0:
+ pad_gt_class[:num_gt] = sample['gt_class']
+ pad_gt_bbox[:num_gt] = sample['gt_bbox']
+ sample['gt_class'] = pad_gt_class
+ sample['gt_bbox'] = pad_gt_bbox
+ # pad_gt_mask
+ if 'pad_gt_mask' in sample:
+ sample['pad_gt_mask'][:num_gt] = 1
+ # gt_score
+ names = ['gt_score', 'is_crowd', 'difficult', 'gt_poly', 'gt_rbox']
+ dims = [1, 1, 1, 8, 5]
+ dtypes = [np.float32, np.int32, np.int32, np.float32, np.float32]
+
+ for name, dim, dtype in zip(names, dims, dtypes):
+ self.pad_field(sample, [name, (num_max_boxes, dim), dtype],
+ num_gt)
+
+ return samples
diff --git a/paddlers/models/ppdet/data/transform/keypoint_operators.py b/paddlers/models/ppdet/data/transform/keypoint_operators.py
index c9b9648..b33e33b 100644
--- a/paddlers/models/ppdet/data/transform/keypoint_operators.py
+++ b/paddlers/models/ppdet/data/transform/keypoint_operators.py
@@ -511,18 +511,18 @@ class RandomFlipHalfBodyTransform(object):
@register_keypointop
class AugmentationbyInformantionDropping(object):
- """AID: Augmentation by Informantion Dropping. Please refer
- to https://arxiv.org/abs/2008.07139
-
+ """AID: Augmentation by Informantion Dropping. Please refer
+ to https://arxiv.org/abs/2008.07139
+
Args:
prob_cutout (float): The probability of the Cutout augmentation.
offset_factor (float): Offset factor of cutout center.
- num_patch (int): Number of patches to be cutout.
+ num_patch (int): Number of patches to be cutout.
records(dict): the dict contained the image and coords
-
+
Returns:
records (dict): contain the image and coords after tranformed
-
+
"""
def __init__(self,
@@ -698,8 +698,8 @@ class ToHeatmapsTopDown(object):
tmp_size = self.sigma * 3
feat_stride = image_size / self.hmsize
for joint_id in range(num_joints):
- mu_x = int(joints[joint_id][0] + 0.5) / feat_stride[0]
- mu_y = int(joints[joint_id][1] + 0.5) / feat_stride[1]
+ mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
+ mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
# Check that any part of the gaussian is in-bounds
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
@@ -796,7 +796,7 @@ class ToHeatmapsTopDown_DARK(object):
class ToHeatmapsTopDown_UDP(object):
"""This code is based on:
https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
-
+
to generate the gaussian heatmaps of keypoint for heatmap loss.
ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
for Human Pose Estimation (CVPR 2020).
diff --git a/paddlers/models/ppdet/data/transform/mot_operators.py b/paddlers/models/ppdet/data/transform/mot_operators.py
index 43bdac1..da9c23d 100644
--- a/paddlers/models/ppdet/data/transform/mot_operators.py
+++ b/paddlers/models/ppdet/data/transform/mot_operators.py
@@ -132,7 +132,7 @@ class LetterBoxResize(BaseOperator):
@register_op
class MOTRandomAffine(BaseOperator):
- """
+ """
Affine transform to image and coords to achieve the rotate, scale and
shift effect for training image.
@@ -271,7 +271,7 @@ class Gt2JDETargetThres(BaseOperator):
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
- ide_thresh (float): thresh of identity, higher is groud truth
+ ide_thresh (float): thresh of identity, higher is groud truth
fg_thresh (float): thresh of foreground, higher is foreground
bg_thresh (float): thresh of background, lower is background
num_classes (int): number of classes
@@ -529,8 +529,8 @@ class Gt2FairMOTTarget(Gt2TTFTarget):
Generate FairMOT targets by ground truth data.
Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
1. the gaussian kernal radius to generate a heatmap.
- 2. the targets needed during traing.
-
+ 2. the targets needed during training.
+
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
diff --git a/paddlers/models/ppdet/data/transform/operators.py b/paddlers/models/ppdet/data/transform/operators.py
index 8e09902..078ba2e 100644
--- a/paddlers/models/ppdet/data/transform/operators.py
+++ b/paddlers/models/ppdet/data/transform/operators.py
@@ -41,7 +41,6 @@ import threading
MUTEX = threading.Lock()
from paddlers.models.ppdet.core.workspace import serializable
-from paddlers.models.ppdet.modeling import bbox_utils
from ..reader import Compose
from .op_helper import (satisfy_sample_constraint, filter_and_process,
@@ -123,12 +122,15 @@ class Decode(BaseOperator):
sample['image'] = f.read()
sample.pop('im_file')
- im = sample['image']
- data = np.frombuffer(im, dtype='uint8')
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
- if 'keep_ori_im' in sample and sample['keep_ori_im']:
- sample['ori_image'] = im
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+ try:
+ im = sample['image']
+ data = np.frombuffer(im, dtype='uint8')
+ im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
+ if 'keep_ori_im' in sample and sample['keep_ori_im']:
+ sample['ori_image'] = im
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+ except:
+ im = sample['image']
sample['image'] = im
if 'h' not in sample:
@@ -357,19 +359,26 @@ class RandomErasingImage(BaseOperator):
@register_op
class NormalizeImage(BaseOperator):
- def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
- is_scale=True):
+ def __init__(self,
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225],
+ is_scale=True,
+ norm_type='mean_std'):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
+ is_scale (bool): scale the pixel to [0,1]
+ norm_type (str): type in ['mean_std', 'none']
"""
super(NormalizeImage, self).__init__()
self.mean = mean
self.std = std
self.is_scale = is_scale
+ self.norm_type = norm_type
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
- isinstance(self.is_scale, bool)):
+ isinstance(self.is_scale, bool) and
+ self.norm_type in ['mean_std', 'none']):
raise TypeError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
@@ -378,20 +387,20 @@ class NormalizeImage(BaseOperator):
def apply(self, sample, context=None):
"""Normalize the image.
Operators:
- 1.(optional) Scale the image to [0,1]
- 2. Each pixel minus mean and is divided by std
+ 1.(optional) Scale the pixel to [0,1]
+ 2.(optional) Each pixel minus mean and is divided by std
"""
im = sample['image']
im = im.astype(np.float32, copy=False)
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
-
if self.is_scale:
- im = im / 255.0
-
- im -= mean
- im /= std
-
+ scale = 1.0 / 255.0
+ im *= scale
+
+ if self.norm_type == 'mean_std':
+ mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+ std = np.array(self.std)[np.newaxis, np.newaxis, :]
+ im -= mean
+ im /= std
sample['image'] = im
return sample
@@ -448,6 +457,10 @@ class GridMask(BaseOperator):
@register_op
class RandomDistort(BaseOperator):
"""Random color distortion.
+ Note:
+ The 'probability' in [lower, upper, probability] is the probability of not using this transformation,
+ not the probability of using this transformation. And this only applies in this operator(RandomDistort),
+ 'probability' in other BaseOperator means the probability of using that transformation.
Args:
hue (list): hue settings. in [lower, upper, probability] format.
saturation (list): saturation settings. in [lower, upper, probability] format.
@@ -657,18 +670,6 @@ class RandomFlip(BaseOperator):
bbox[:, 2] = width - oldx1
return bbox
- def apply_rbox(self, bbox, width):
- oldx1 = bbox[:, 0].copy()
- oldx2 = bbox[:, 2].copy()
- oldx3 = bbox[:, 4].copy()
- oldx4 = bbox[:, 6].copy()
- bbox[:, 0] = width - oldx1
- bbox[:, 2] = width - oldx2
- bbox[:, 4] = width - oldx3
- bbox[:, 6] = width - oldx4
- bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
- return bbox
-
def apply(self, sample, context=None):
"""Filp the image and bounding box.
Operators:
@@ -700,10 +701,6 @@ class RandomFlip(BaseOperator):
if 'gt_segm' in sample and sample['gt_segm'].any():
sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
- if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
- sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
- width)
-
sample['flipped'] = True
sample['image'] = im
return sample
@@ -713,7 +710,7 @@ class RandomFlip(BaseOperator):
class Resize(BaseOperator):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
- Resize image to target size. if keep_ratio is True,
+ Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
@@ -824,7 +821,7 @@ class Resize(BaseOperator):
im_scale_x = resize_w / im_shape[1]
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
- sample['image'] = im
+ sample['image'] = im.astype(np.float32)
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample:
scale_factor = sample['scale_factor']
@@ -841,16 +838,6 @@ class Resize(BaseOperator):
[im_scale_x, im_scale_y],
[resize_w, resize_h])
- # apply rbox
- if 'gt_rbox2poly' in sample:
- if np.array(sample['gt_rbox2poly']).shape[1] != 8:
- logger.warning(
- "gt_rbox2poly's length shoule be 8, but actually is {}".
- format(len(sample['gt_rbox2poly'])))
- sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
- [im_scale_x, im_scale_y],
- [resize_w, resize_h])
-
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
@@ -1054,7 +1041,7 @@ class CropWithSampling(BaseOperator):
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
- avoid_no_bbox (bool): whether to to avoid the
+ avoid_no_bbox (bool): whether to avoid the
situation where the box does not appear.
"""
super(CropWithSampling, self).__init__()
@@ -1145,7 +1132,7 @@ class CropWithDataAchorSampling(BaseOperator):
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
- avoid_no_bbox (bool): whether to to avoid the
+ avoid_no_bbox (bool): whether to avoid the
situation where the box does not appear.
"""
super(CropWithDataAchorSampling, self).__init__()
@@ -1504,6 +1491,11 @@ class RandomCrop(BaseOperator):
if 'is_crowd' in sample:
sample['is_crowd'] = np.take(
sample['is_crowd'], valid_ids, axis=0)
+
+ if 'difficult' in sample:
+ sample['difficult'] = np.take(
+ sample['difficult'], valid_ids, axis=0)
+
return sample
return sample
@@ -1604,7 +1596,7 @@ class RandomScaledCrop(BaseOperator):
@register_op
class Cutmix(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
- """
+ """
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
@@ -1747,7 +1739,7 @@ class Mixup(BaseOperator):
gt_score2 = np.ones_like(sample[1]['gt_class'])
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
- result['gt_score'] = gt_score
+ result['gt_score'] = gt_score.astype('float32')
if 'is_crowd' in sample[0]:
is_crowd1 = sample[0]['is_crowd']
is_crowd2 = sample[1]['is_crowd']
@@ -2029,13 +2021,14 @@ class Pad(BaseOperator):
if self.size:
h, w = self.size
assert (
- im_h < h and im_w < w
+ im_h <= h and im_w <= w
), '(h, w) of target size should be greater than (im_h, im_w)'
else:
- h = np.ceil(im_h / self.size_divisor) * self.size_divisor
- w = np.ceil(im_w / self.size_divisor) * self.size_divisor
+ h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor)
+ w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor)
if h == im_h and w == im_w:
+ sample['image'] = im.astype(np.float32)
return sample
if self.pad_mode == -1:
@@ -2106,44 +2099,30 @@ class Poly2Mask(BaseOperator):
@register_op
-class Rbox2Poly(BaseOperator):
- """
- Convert rbbox format to poly format.
+class AugmentHSV(BaseOperator):
+ """
+ Augment the SV channel of image data.
+ Args:
+ fraction (float): the fraction for augment. Default: 0.5.
+ is_bgr (bool): whether the image is BGR mode. Default: True.
+ hgain (float): H channel gains
+ sgain (float): S channel gains
+ vgain (float): V channel gains
"""
- def __init__(self):
- super(Rbox2Poly, self).__init__()
-
- def apply(self, sample, context=None):
- assert 'gt_rbox' in sample
- assert sample['gt_rbox'].shape[1] == 5
- rrects = sample['gt_rbox']
- x_ctr = rrects[:, 0]
- y_ctr = rrects[:, 1]
- width = rrects[:, 2]
- height = rrects[:, 3]
- x1 = x_ctr - width / 2.0
- y1 = y_ctr - height / 2.0
- x2 = x_ctr + width / 2.0
- y2 = y_ctr + height / 2.0
- sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
- polys = bbox_utils.rbox2poly_np(rrects)
- sample['gt_rbox2poly'] = polys
- return sample
-
-
-@register_op
-class AugmentHSV(BaseOperator):
- def __init__(self, fraction=0.50, is_bgr=True):
- """
- Augment the SV channel of image data.
- Args:
- fraction (float): the fraction for augment. Default: 0.5.
- is_bgr (bool): whether the image is BGR mode. Default: True.
- """
+ def __init__(self,
+ fraction=0.50,
+ is_bgr=True,
+ hgain=None,
+ sgain=None,
+ vgain=None):
super(AugmentHSV, self).__init__()
self.fraction = fraction
self.is_bgr = is_bgr
+ self.hgain = hgain
+ self.sgain = sgain
+ self.vgain = vgain
+ self.use_hsvgain = False if hgain is None else True
def apply(self, sample, context=None):
img = sample['image']
@@ -2151,27 +2130,39 @@ class AugmentHSV(BaseOperator):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
else:
img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
- S = img_hsv[:, :, 1].astype(np.float32)
- V = img_hsv[:, :, 2].astype(np.float32)
- a = (random.random() * 2 - 1) * self.fraction + 1
- S *= a
- if a > 1:
- np.clip(S, a_min=0, a_max=255, out=S)
+ if self.use_hsvgain:
+ hsv_augs = np.random.uniform(
+ -1, 1, 3) * [self.hgain, self.sgain, self.vgain]
+ # random selection of h, s, v
+ hsv_augs *= np.random.randint(0, 2, 3)
+ img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
+ img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
+ img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
+
+ else:
+ S = img_hsv[:, :, 1].astype(np.float32)
+ V = img_hsv[:, :, 2].astype(np.float32)
+
+ a = (random.random() * 2 - 1) * self.fraction + 1
+ S *= a
+ if a > 1:
+ np.clip(S, a_min=0, a_max=255, out=S)
+
+ a = (random.random() * 2 - 1) * self.fraction + 1
+ V *= a
+ if a > 1:
+ np.clip(V, a_min=0, a_max=255, out=V)
- a = (random.random() * 2 - 1) * self.fraction + 1
- V *= a
- if a > 1:
- np.clip(V, a_min=0, a_max=255, out=V)
+ img_hsv[:, :, 1] = S.astype(np.uint8)
+ img_hsv[:, :, 2] = V.astype(np.uint8)
- img_hsv[:, :, 1] = S.astype(np.uint8)
- img_hsv[:, :, 2] = V.astype(np.uint8)
if self.is_bgr:
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
else:
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
- sample['image'] = img
+ sample['image'] = img.astype(np.float32)
return sample
@@ -2223,7 +2214,7 @@ class RandomResizeCrop(BaseOperator):
'long', resize the image's long side to the maximum of target_size, if keep_ratio is
True and mode is 'short', resize the image's short side to the minimum of target_size.
cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
- mode (str): resize mode, `long` or `short`. Details see resizes.
+ mode (str): resize mode, `long` or `short`. Details see resizes.
prob (float): probability of this op.
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
@@ -2425,16 +2416,6 @@ class RandomResizeCrop(BaseOperator):
[im_scale_x, im_scale_y],
[resize_w, resize_h])
- # apply rbox
- if 'gt_rbox2poly' in sample:
- if np.array(sample['gt_rbox2poly']).shape[1] != 8:
- logger.warn(
- "gt_rbox2poly's length shoule be 8, but actually is {}".
- format(len(sample['gt_rbox2poly'])))
- sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
- [im_scale_x, im_scale_y],
- [resize_w, resize_h])
-
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
@@ -2892,7 +2873,7 @@ class FlipWarpAffine(BaseOperator):
"""FlipWarpAffine
1. Random Crop
2. Flip the image horizontal
- 3. Warp affine the image
+ 3. Warp affine the image
"""
super(FlipWarpAffine, self).__init__()
self.keep_res = keep_res
@@ -3013,3 +2994,409 @@ class CenterRandColor(BaseOperator):
img = func(img, img_gray)
sample['image'] = img
return sample
+
+
+@register_op
+class Mosaic(BaseOperator):
+ """ Mosaic operator for image and gt_bboxes
+ The code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/mosaicdetection.py
+
+ 1. get mosaic coords
+ 2. clip bbox and get mosaic_labels
+ 3. random_affine augment
+ 4. Mixup augment as copypaste (optinal), not used in tiny/nano
+
+ Args:
+ prob (float): probability of using Mosaic, 1.0 as default
+ input_dim (list[int]): input shape
+ degrees (list[2]): the rotate range to apply, transform range is [min, max]
+ translate (list[2]): the translate range to apply, transform range is [min, max]
+ scale (list[2]): the scale range to apply, transform range is [min, max]
+ shear (list[2]): the shear range to apply, transform range is [min, max]
+ enable_mixup (bool): whether to enable Mixup or not
+ mixup_prob (float): probability of using Mixup, 1.0 as default
+ mixup_scale (list[int]): scale range of Mixup
+ remove_outside_box (bool): whether remove outside boxes, False as
+ default in COCO dataset, True in MOT dataset
+ """
+
+ def __init__(self,
+ prob=1.0,
+ input_dim=[640, 640],
+ degrees=[-10, 10],
+ translate=[-0.1, 0.1],
+ scale=[0.1, 2],
+ shear=[-2, 2],
+ enable_mixup=True,
+ mixup_prob=1.0,
+ mixup_scale=[0.5, 1.5],
+ remove_outside_box=False):
+ super(Mosaic, self).__init__()
+ self.prob = prob
+ if isinstance(input_dim, Integral):
+ input_dim = [input_dim, input_dim]
+ self.input_dim = input_dim
+ self.degrees = degrees
+ self.translate = translate
+ self.scale = scale
+ self.shear = shear
+ self.enable_mixup = enable_mixup
+ self.mixup_prob = mixup_prob
+ self.mixup_scale = mixup_scale
+ self.remove_outside_box = remove_outside_box
+
+ def get_mosaic_coords(self, mosaic_idx, xc, yc, w, h, input_h, input_w):
+ # (x1, y1, x2, y2) means coords in large image,
+ # small_coords means coords in small image in mosaic aug.
+ if mosaic_idx == 0:
+ # top left
+ x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
+ small_coords = w - (x2 - x1), h - (y2 - y1), w, h
+ elif mosaic_idx == 1:
+ # top right
+ x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
+ small_coords = 0, h - (y2 - y1), min(w, x2 - x1), h
+ elif mosaic_idx == 2:
+ # bottom left
+ x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
+ small_coords = w - (x2 - x1), 0, w, min(y2 - y1, h)
+ elif mosaic_idx == 3:
+ # bottom right
+ x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2,
+ yc + h)
+ small_coords = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
+
+ return (x1, y1, x2, y2), small_coords
+
+ def random_affine_augment(self,
+ img,
+ labels=[],
+ input_dim=[640, 640],
+ degrees=[-10, 10],
+ scales=[0.1, 2],
+ shears=[-2, 2],
+ translates=[-0.1, 0.1]):
+ # random rotation and scale
+ degree = random.uniform(degrees[0], degrees[1])
+ scale = random.uniform(scales[0], scales[1])
+ assert scale > 0, "Argument scale should be positive."
+ R = cv2.getRotationMatrix2D(angle=degree, center=(0, 0), scale=scale)
+ M = np.ones([2, 3])
+
+ # random shear
+ shear = random.uniform(shears[0], shears[1])
+ shear_x = math.tan(shear * math.pi / 180)
+ shear_y = math.tan(shear * math.pi / 180)
+ M[0] = R[0] + shear_y * R[1]
+ M[1] = R[1] + shear_x * R[0]
+
+ # random translation
+ translate = random.uniform(translates[0], translates[1])
+ translation_x = translate * input_dim[0]
+ translation_y = translate * input_dim[1]
+ M[0, 2] = translation_x
+ M[1, 2] = translation_y
+
+ # warpAffine
+ img = cv2.warpAffine(
+ img, M, dsize=tuple(input_dim), borderValue=(114, 114, 114))
+
+ num_gts = len(labels)
+ if num_gts > 0:
+ # warp corner points
+ corner_points = np.ones((4 * num_gts, 3))
+ corner_points[:, :2] = labels[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+ 4 * num_gts, 2) # x1y1, x2y2, x1y2, x2y1
+ # apply affine transform
+ corner_points = corner_points @M.T
+ corner_points = corner_points.reshape(num_gts, 8)
+
+ # create new boxes
+ corner_xs = corner_points[:, 0::2]
+ corner_ys = corner_points[:, 1::2]
+ new_bboxes = np.concatenate((corner_xs.min(1), corner_ys.min(1),
+ corner_xs.max(1), corner_ys.max(1)))
+ new_bboxes = new_bboxes.reshape(4, num_gts).T
+
+ # clip boxes
+ new_bboxes[:, 0::2] = np.clip(new_bboxes[:, 0::2], 0, input_dim[0])
+ new_bboxes[:, 1::2] = np.clip(new_bboxes[:, 1::2], 0, input_dim[1])
+ labels[:, :4] = new_bboxes
+
+ return img, labels
+
+ def __call__(self, sample, context=None):
+ if not isinstance(sample, Sequence):
+ return sample
+
+ assert len(
+ sample) == 5, "Mosaic needs 5 samples, 4 for mosaic and 1 for mixup."
+ if np.random.uniform(0., 1.) > self.prob:
+ return sample[0]
+
+ mosaic_gt_bbox, mosaic_gt_class, mosaic_is_crowd, mosaic_difficult = [], [], [], []
+ input_h, input_w = self.input_dim
+ yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
+ xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
+ mosaic_img = np.full((input_h * 2, input_w * 2, 3), 114, dtype=np.uint8)
+
+ # 1. get mosaic coords
+ for mosaic_idx, sp in enumerate(sample[:4]):
+ img = sp['image']
+ gt_bbox = sp['gt_bbox']
+ h0, w0 = img.shape[:2]
+ scale = min(1. * input_h / h0, 1. * input_w / w0)
+ img = cv2.resize(
+ img, (int(w0 * scale), int(h0 * scale)),
+ interpolation=cv2.INTER_LINEAR)
+ (h, w, c) = img.shape[:3]
+
+ # suffix l means large image, while s means small image in mosaic aug.
+ (l_x1, l_y1, l_x2, l_y2), (
+ s_x1, s_y1, s_x2, s_y2) = self.get_mosaic_coords(
+ mosaic_idx, xc, yc, w, h, input_h, input_w)
+
+ mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
+ padw, padh = l_x1 - s_x1, l_y1 - s_y1
+
+ # Normalized xywh to pixel xyxy format
+ _gt_bbox = gt_bbox.copy()
+ if len(gt_bbox) > 0:
+ _gt_bbox[:, 0] = scale * gt_bbox[:, 0] + padw
+ _gt_bbox[:, 1] = scale * gt_bbox[:, 1] + padh
+ _gt_bbox[:, 2] = scale * gt_bbox[:, 2] + padw
+ _gt_bbox[:, 3] = scale * gt_bbox[:, 3] + padh
+
+ mosaic_gt_bbox.append(_gt_bbox)
+ mosaic_gt_class.append(sp['gt_class'])
+ if 'is_crowd' in sp:
+ mosaic_is_crowd.append(sp['is_crowd'])
+ if 'difficult' in sp:
+ mosaic_difficult.append(sp['difficult'])
+
+ # 2. clip bbox and get mosaic_labels([gt_bbox, gt_class, is_crowd])
+ if len(mosaic_gt_bbox):
+ mosaic_gt_bbox = np.concatenate(mosaic_gt_bbox, 0)
+ mosaic_gt_class = np.concatenate(mosaic_gt_class, 0)
+ if mosaic_is_crowd:
+ mosaic_is_crowd = np.concatenate(mosaic_is_crowd, 0)
+ mosaic_labels = np.concatenate([
+ mosaic_gt_bbox,
+ mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
+ mosaic_is_crowd.astype(mosaic_gt_bbox.dtype)
+ ], 1)
+ elif mosaic_difficult:
+ mosaic_difficult = np.concatenate(mosaic_difficult, 0)
+ mosaic_labels = np.concatenate([
+ mosaic_gt_bbox,
+ mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
+ mosaic_difficult.astype(mosaic_gt_bbox.dtype)
+ ], 1)
+ else:
+ mosaic_labels = np.concatenate([
+ mosaic_gt_bbox, mosaic_gt_class.astype(mosaic_gt_bbox.dtype)
+ ], 1)
+ if self.remove_outside_box:
+ # for MOT dataset
+ flag1 = mosaic_gt_bbox[:, 0] < 2 * input_w
+ flag2 = mosaic_gt_bbox[:, 2] > 0
+ flag3 = mosaic_gt_bbox[:, 1] < 2 * input_h
+ flag4 = mosaic_gt_bbox[:, 3] > 0
+ flag_all = flag1 * flag2 * flag3 * flag4
+ mosaic_labels = mosaic_labels[flag_all]
+ else:
+ mosaic_labels[:, 0] = np.clip(mosaic_labels[:, 0], 0,
+ 2 * input_w)
+ mosaic_labels[:, 1] = np.clip(mosaic_labels[:, 1], 0,
+ 2 * input_h)
+ mosaic_labels[:, 2] = np.clip(mosaic_labels[:, 2], 0,
+ 2 * input_w)
+ mosaic_labels[:, 3] = np.clip(mosaic_labels[:, 3], 0,
+ 2 * input_h)
+ else:
+ mosaic_labels = np.zeros((1, 6))
+
+ # 3. random_affine augment
+ mosaic_img, mosaic_labels = self.random_affine_augment(
+ mosaic_img,
+ mosaic_labels,
+ input_dim=self.input_dim,
+ degrees=self.degrees,
+ translates=self.translate,
+ scales=self.scale,
+ shears=self.shear)
+
+ # 4. Mixup augment as copypaste, https://arxiv.org/abs/2012.07177
+ # optinal, not used(enable_mixup=False) in tiny/nano
+ if (self.enable_mixup and not len(mosaic_labels) == 0 and
+ random.random() < self.mixup_prob):
+ sample_mixup = sample[4]
+ mixup_img = sample_mixup['image']
+ if 'is_crowd' in sample_mixup:
+ cp_labels = np.concatenate([
+ sample_mixup['gt_bbox'],
+ sample_mixup['gt_class'].astype(mosaic_labels.dtype),
+ sample_mixup['is_crowd'].astype(mosaic_labels.dtype)
+ ], 1)
+ elif 'difficult' in sample_mixup:
+ cp_labels = np.concatenate([
+ sample_mixup['gt_bbox'],
+ sample_mixup['gt_class'].astype(mosaic_labels.dtype),
+ sample_mixup['difficult'].astype(mosaic_labels.dtype)
+ ], 1)
+ else:
+ cp_labels = np.concatenate([
+ sample_mixup['gt_bbox'],
+ sample_mixup['gt_class'].astype(mosaic_labels.dtype)
+ ], 1)
+ mosaic_img, mosaic_labels = self.mixup_augment(
+ mosaic_img, mosaic_labels, self.input_dim, cp_labels, mixup_img)
+
+ sample0 = sample[0]
+ sample0['image'] = mosaic_img.astype(np.uint8) # can not be float32
+ sample0['h'] = float(mosaic_img.shape[0])
+ sample0['w'] = float(mosaic_img.shape[1])
+ sample0['im_shape'][0] = sample0['h']
+ sample0['im_shape'][1] = sample0['w']
+ sample0['gt_bbox'] = mosaic_labels[:, :4].astype(np.float32)
+ sample0['gt_class'] = mosaic_labels[:, 4:5].astype(np.float32)
+ if 'is_crowd' in sample[0]:
+ sample0['is_crowd'] = mosaic_labels[:, 5:6].astype(np.float32)
+ if 'difficult' in sample[0]:
+ sample0['difficult'] = mosaic_labels[:, 5:6].astype(np.float32)
+ return sample0
+
+ def mixup_augment(self, origin_img, origin_labels, input_dim, cp_labels,
+ img):
+ jit_factor = random.uniform(*self.mixup_scale)
+ FLIP = random.uniform(0, 1) > 0.5
+ if len(img.shape) == 3:
+ cp_img = np.ones(
+ (input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
+ else:
+ cp_img = np.ones(input_dim, dtype=np.uint8) * 114
+
+ cp_scale_ratio = min(input_dim[0] / img.shape[0],
+ input_dim[1] / img.shape[1])
+ resized_img = cv2.resize(
+ img, (int(img.shape[1] * cp_scale_ratio),
+ int(img.shape[0] * cp_scale_ratio)),
+ interpolation=cv2.INTER_LINEAR)
+
+ cp_img[:int(img.shape[0] * cp_scale_ratio), :int(img.shape[
+ 1] * cp_scale_ratio)] = resized_img
+
+ cp_img = cv2.resize(cp_img, (int(cp_img.shape[1] * jit_factor),
+ int(cp_img.shape[0] * jit_factor)))
+ cp_scale_ratio *= jit_factor
+
+ if FLIP:
+ cp_img = cp_img[:, ::-1, :]
+
+ origin_h, origin_w = cp_img.shape[:2]
+ target_h, target_w = origin_img.shape[:2]
+ padded_img = np.zeros(
+ (max(origin_h, target_h), max(origin_w, target_w), 3),
+ dtype=np.uint8)
+ padded_img[:origin_h, :origin_w] = cp_img
+
+ x_offset, y_offset = 0, 0
+ if padded_img.shape[0] > target_h:
+ y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
+ if padded_img.shape[1] > target_w:
+ x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
+ padded_cropped_img = padded_img[y_offset:y_offset + target_h, x_offset:
+ x_offset + target_w]
+
+ # adjust boxes
+ cp_bboxes_origin_np = cp_labels[:, :4].copy()
+ cp_bboxes_origin_np[:, 0::2] = np.clip(cp_bboxes_origin_np[:, 0::2] *
+ cp_scale_ratio, 0, origin_w)
+ cp_bboxes_origin_np[:, 1::2] = np.clip(cp_bboxes_origin_np[:, 1::2] *
+ cp_scale_ratio, 0, origin_h)
+
+ if FLIP:
+ cp_bboxes_origin_np[:, 0::2] = (
+ origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1])
+ cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
+ if self.remove_outside_box:
+ # for MOT dataset
+ cp_bboxes_transformed_np[:, 0::2] -= x_offset
+ cp_bboxes_transformed_np[:, 1::2] -= y_offset
+ else:
+ cp_bboxes_transformed_np[:, 0::2] = np.clip(
+ cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
+ cp_bboxes_transformed_np[:, 1::2] = np.clip(
+ cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
+
+ cls_labels = cp_labels[:, 4:5].copy()
+ box_labels = cp_bboxes_transformed_np
+ if cp_labels.shape[-1] == 6:
+ crd_labels = cp_labels[:, 5:6].copy()
+ labels = np.hstack((box_labels, cls_labels, crd_labels))
+ else:
+ labels = np.hstack((box_labels, cls_labels))
+ if self.remove_outside_box:
+ labels = labels[labels[:, 0] < target_w]
+ labels = labels[labels[:, 2] > 0]
+ labels = labels[labels[:, 1] < target_h]
+ labels = labels[labels[:, 3] > 0]
+
+ origin_labels = np.vstack((origin_labels, labels))
+ origin_img = origin_img.astype(np.float32)
+ origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(
+ np.float32)
+
+ return origin_img.astype(np.uint8), origin_labels
+
+
+@register_op
+class PadResize(BaseOperator):
+ """ PadResize for image and gt_bbbox
+
+ Args:
+ target_size (list[int]): input shape
+ fill_value (float): pixel value of padded image
+ """
+
+ def __init__(self, target_size, fill_value=114):
+ super(PadResize, self).__init__()
+ if isinstance(target_size, Integral):
+ target_size = [target_size, target_size]
+ self.target_size = target_size
+ self.fill_value = fill_value
+
+ def _resize(self, img, bboxes, labels):
+ ratio = min(self.target_size[0] / img.shape[0],
+ self.target_size[1] / img.shape[1])
+ w, h = int(img.shape[1] * ratio), int(img.shape[0] * ratio)
+ resized_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
+
+ if len(bboxes) > 0:
+ bboxes *= ratio
+ mask = np.minimum(bboxes[:, 2] - bboxes[:, 0],
+ bboxes[:, 3] - bboxes[:, 1]) > 1
+ bboxes = bboxes[mask]
+ labels = labels[mask]
+ return resized_img, bboxes, labels
+
+ def _pad(self, img):
+ h, w, _ = img.shape
+ if h == self.target_size[0] and w == self.target_size[1]:
+ return img
+ padded_img = np.full(
+ (self.target_size[0], self.target_size[1], 3),
+ self.fill_value,
+ dtype=np.uint8)
+ padded_img[:h, :w] = img
+ return padded_img
+
+ def apply(self, sample, context=None):
+ image = sample['image']
+ bboxes = sample['gt_bbox']
+ labels = sample['gt_class']
+ image, bboxes, labels = self._resize(image, bboxes, labels)
+ sample['image'] = self._pad(image).astype(np.float32)
+ sample['gt_bbox'] = bboxes
+ sample['gt_class'] = labels
+ return sample
diff --git a/paddlers/models/ppdet/data/transform/rotated_operators.py b/paddlers/models/ppdet/data/transform/rotated_operators.py
new file mode 100644
index 0000000..e643d37
--- /dev/null
+++ b/paddlers/models/ppdet/data/transform/rotated_operators.py
@@ -0,0 +1,479 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+try:
+ from collections.abc import Sequence
+except Exception:
+ from collections import Sequence
+
+from numbers import Number, Integral
+
+import cv2
+import numpy as np
+import math
+import copy
+
+from .operators import register_op, BaseOperator
+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
+from paddlers.models.ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register_op
+class RRotate(BaseOperator):
+ """ Rotate Image, Polygon, Box
+
+ Args:
+ scale (float): rotate scale
+ angle (float): rotate angle
+ fill_value (int, tuple): fill color
+ auto_bound (bool): whether auto bound or not
+ """
+
+ def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
+ super(RRotate, self).__init__()
+ self.scale = scale
+ self.angle = angle
+ self.fill_value = fill_value
+ self.auto_bound = auto_bound
+
+ def get_rotated_matrix(self, angle, scale, h, w):
+ center = ((w - 1) * 0.5, (h - 1) * 0.5)
+ matrix = cv2.getRotationMatrix2D(center, -angle, scale)
+ # calculate the new size
+ cos = np.abs(matrix[0, 0])
+ sin = np.abs(matrix[0, 1])
+ new_w = h * sin + w * cos
+ new_h = h * cos + w * sin
+ # calculate offset
+ n_w = int(np.round(new_w))
+ n_h = int(np.round(new_h))
+ if self.auto_bound:
+ ratio = min(w / n_w, h / n_h)
+ matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
+ else:
+ matrix[0, 2] += (new_w - w) * 0.5
+ matrix[1, 2] += (new_h - h) * 0.5
+ w = n_w
+ h = n_h
+ return matrix, h, w
+
+ def get_rect_from_pts(self, pts, h, w):
+ """ get minimum rectangle of points
+ """
+ assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+ min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
+ axis=1)
+ max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
+ axis=1)
+ min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
+ max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
+ boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
+ return boxes
+
+ def apply_image(self, image, matrix, h, w):
+ return cv2.warpAffine(
+ image, matrix, (w, h), borderValue=self.fill_value)
+
+ def apply_pts(self, pts, matrix, h, w):
+ assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+ # n is number of samples and m is two times the number of points due to (x, y)
+ _, m = pts.shape
+ # transpose points
+ pts_ = pts.reshape(-1, 2).T
+ # pad 1 to convert the points to homogeneous coordinates
+ padding = np.ones((1, pts_.shape[1]), pts.dtype)
+ rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
+ return rotated_pts[:2, :].T.reshape(-1, m)
+
+ def apply(self, sample, context=None):
+ image = sample['image']
+ h, w = image.shape[:2]
+ matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
+ sample['image'] = self.apply_image(image, matrix, h, w)
+ polys = sample['gt_poly']
+ # TODO: segment or keypoint to be processed
+ if len(polys) > 0:
+ pts = self.apply_pts(polys, matrix, h, w)
+ sample['gt_poly'] = pts
+ sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
+
+ return sample
+
+
+@register_op
+class RandomRRotate(BaseOperator):
+ """ Random Rotate Image
+ Args:
+ scale (float, tuple, list): rotate scale
+ scale_mode (str): mode of scale, [range, value, None]
+ angle (float, tuple, list): rotate angle
+ angle_mode (str): mode of angle, [range, value, None]
+ fill_value (float, tuple, list): fill value
+ rotate_prob (float): probability of rotation
+ auto_bound (bool): whether auto bound or not
+ """
+
+ def __init__(self,
+ scale=1.0,
+ scale_mode=None,
+ angle=0.,
+ angle_mode=None,
+ fill_value=0.,
+ rotate_prob=1.0,
+ auto_bound=True):
+ super(RandomRRotate, self).__init__()
+ self.scale = scale
+ self.scale_mode = scale_mode
+ self.angle = angle
+ self.angle_mode = angle_mode
+ self.fill_value = fill_value
+ self.rotate_prob = rotate_prob
+ self.auto_bound = auto_bound
+
+ def get_angle(self, angle, angle_mode):
+ assert not angle_mode or angle_mode in [
+ 'range', 'value'
+ ], 'angle mode should be in [range, value, None]'
+ if not angle_mode:
+ return angle
+ elif angle_mode == 'range':
+ low, high = angle
+ return np.random.rand() * (high - low) + low
+ elif angle_mode == 'value':
+ return np.random.choice(angle)
+
+ def get_scale(self, scale, scale_mode):
+ assert not scale_mode or scale_mode in [
+ 'range', 'value'
+ ], 'scale mode should be in [range, value, None]'
+ if not scale_mode:
+ return scale
+ elif scale_mode == 'range':
+ low, high = scale
+ return np.random.rand() * (high - low) + low
+ elif scale_mode == 'value':
+ return np.random.choice(scale)
+
+ def apply(self, sample, context=None):
+ if np.random.rand() > self.rotate_prob:
+ return sample
+
+ angle = self.get_angle(self.angle, self.angle_mode)
+ scale = self.get_scale(self.scale, self.scale_mode)
+ rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
+ return rotator(sample)
+
+
+@register_op
+class Poly2RBox(BaseOperator):
+ """ Polygon to Rotated Box, using new OpenCV definition since 4.5.1
+
+ Args:
+ filter_threshold (int, float): threshold to filter annotations
+ filter_mode (str): filter mode, ['area', 'edge']
+ rbox_type (str): rbox type, ['le135', 'oc']
+
+ """
+
+ def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
+ super(Poly2RBox, self).__init__()
+ self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
+ self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
+
+ def filter(self, size, threshold, mode):
+ if mode == 'area':
+ if size[0] * size[1] < threshold:
+ return True
+ elif mode == 'edge':
+ if min(size) < threshold:
+ return True
+ return False
+
+ def get_rbox(self, polys):
+ valid_ids, rboxes, bboxes = [], [], []
+ for i, poly in enumerate(polys):
+ cx, cy, w, h, angle = self.rbox_fn(poly)
+ if self.filter_fn((w, h)):
+ continue
+ rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
+ valid_ids.append(i)
+ xmin, ymin = min(poly[0::2]), min(poly[1::2])
+ xmax, ymax = max(poly[0::2]), max(poly[1::2])
+ bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
+
+ if len(valid_ids) == 0:
+ rboxes = np.zeros((0, 5), dtype=np.float32)
+ bboxes = np.zeros((0, 4), dtype=np.float32)
+ else:
+ rboxes = np.stack(rboxes)
+ bboxes = np.stack(bboxes)
+
+ return rboxes, bboxes, valid_ids
+
+ def apply(self, sample, context=None):
+ rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
+ sample['gt_rbox'] = rboxes
+ sample['gt_bbox'] = bboxes
+ for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
+ if k in sample:
+ sample[k] = sample[k][valid_ids]
+
+ return sample
+
+
+@register_op
+class Poly2Array(BaseOperator):
+ """ convert gt_poly to np.array for rotated bboxes
+ """
+
+ def __init__(self):
+ super(Poly2Array, self).__init__()
+
+ def apply(self, sample, context=None):
+ if 'gt_poly' in sample:
+ sample['gt_poly'] = np.array(
+ sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
+
+ return sample
+
+
+@register_op
+class RResize(BaseOperator):
+ def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
+ """
+ Resize image to target size. if keep_ratio is True,
+ resize the image's long side to the maximum of target_size
+ if keep_ratio is False, resize the image to target size(h, w)
+ Args:
+ target_size (int|list): image target size
+ keep_ratio (bool): whether keep_ratio or not, default true
+ interp (int): the interpolation method
+ """
+ super(RResize, self).__init__()
+ self.keep_ratio = keep_ratio
+ self.interp = interp
+ if not isinstance(target_size, (Integral, Sequence)):
+ raise TypeError(
+ "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
+ format(type(target_size)))
+ if isinstance(target_size, Integral):
+ target_size = [target_size, target_size]
+ self.target_size = target_size
+
+ def apply_image(self, image, scale):
+ im_scale_x, im_scale_y = scale
+
+ return cv2.resize(
+ image,
+ None,
+ None,
+ fx=im_scale_x,
+ fy=im_scale_y,
+ interpolation=self.interp)
+
+ def apply_pts(self, pts, scale, size):
+ im_scale_x, im_scale_y = scale
+ resize_w, resize_h = size
+ pts[:, 0::2] *= im_scale_x
+ pts[:, 1::2] *= im_scale_y
+ pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
+ pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
+ return pts
+
+ def apply(self, sample, context=None):
+ """ Resize the image numpy.
+ """
+ im = sample['image']
+ if not isinstance(im, np.ndarray):
+ raise TypeError("{}: image type is not numpy.".format(self))
+ if len(im.shape) != 3:
+ raise ImageError('{}: image is not 3-dimensional.'.format(self))
+
+ # apply image
+ im_shape = im.shape
+ if self.keep_ratio:
+
+ im_size_min = np.min(im_shape[0:2])
+ im_size_max = np.max(im_shape[0:2])
+
+ target_size_min = np.min(self.target_size)
+ target_size_max = np.max(self.target_size)
+
+ im_scale = min(target_size_min / im_size_min,
+ target_size_max / im_size_max)
+
+ resize_h = im_scale * float(im_shape[0])
+ resize_w = im_scale * float(im_shape[1])
+
+ im_scale_x = im_scale
+ im_scale_y = im_scale
+ else:
+ resize_h, resize_w = self.target_size
+ im_scale_y = resize_h / im_shape[0]
+ im_scale_x = resize_w / im_shape[1]
+
+ im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
+ sample['image'] = im.astype(np.float32)
+ sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
+ if 'scale_factor' in sample:
+ scale_factor = sample['scale_factor']
+ sample['scale_factor'] = np.asarray(
+ [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
+ dtype=np.float32)
+ else:
+ sample['scale_factor'] = np.asarray(
+ [im_scale_y, im_scale_x], dtype=np.float32)
+
+ # apply bbox
+ if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+ sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
+ [im_scale_x, im_scale_y],
+ [resize_w, resize_h])
+
+ # apply polygon
+ if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+ sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
+ [im_scale_x, im_scale_y],
+ [resize_w, resize_h])
+
+ return sample
+
+
+@register_op
+class RandomRFlip(BaseOperator):
+ def __init__(self, prob=0.5):
+ """
+ Args:
+ prob (float): the probability of flipping image
+ """
+ super(RandomRFlip, self).__init__()
+ self.prob = prob
+ if not (isinstance(self.prob, float)):
+ raise TypeError("{}: input type is invalid.".format(self))
+
+ def apply_image(self, image):
+ return image[:, ::-1, :]
+
+ def apply_pts(self, pts, width):
+ oldx = pts[:, 0::2].copy()
+ pts[:, 0::2] = width - oldx - 1
+ return pts
+
+ def apply(self, sample, context=None):
+ """Filp the image and bounding box.
+ Operators:
+ 1. Flip the image numpy.
+ 2. Transform the bboxes' x coordinates.
+ (Must judge whether the coordinates are normalized!)
+ 3. Transform the segmentations' x coordinates.
+ (Must judge whether the coordinates are normalized!)
+ Output:
+ sample: the image, bounding box and segmentation part
+ in sample are flipped.
+ """
+ if np.random.uniform(0, 1) < self.prob:
+ im = sample['image']
+ height, width = im.shape[:2]
+ im = self.apply_image(im)
+ if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+ sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
+ if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+ sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
+
+ sample['flipped'] = True
+ sample['image'] = im
+ return sample
+
+
+@register_op
+class VisibleRBox(BaseOperator):
+ """
+ In debug mode, visualize images according to `gt_box`.
+ (Currently only supported when not cropping and flipping image.)
+ """
+
+ def __init__(self, output_dir='debug'):
+ super(VisibleRBox, self).__init__()
+ self.output_dir = output_dir
+ if not os.path.isdir(output_dir):
+ os.makedirs(output_dir)
+
+ def apply(self, sample, context=None):
+ image = Image.fromarray(sample['image'].astype(np.uint8))
+ out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
+ width = sample['w']
+ height = sample['h']
+ # gt_poly = sample['gt_rbox']
+ gt_poly = sample['gt_poly']
+ gt_class = sample['gt_class']
+ draw = ImageDraw.Draw(image)
+ for i in range(gt_poly.shape[0]):
+ x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
+ draw.line(
+ [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
+ width=2,
+ fill='green')
+ # draw label
+ xmin = min(x1, x2, x3, x4)
+ ymin = min(y1, y2, y3, y4)
+ text = str(gt_class[i][0])
+ tw, th = draw.textsize(text)
+ draw.rectangle(
+ [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
+ draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+
+ if 'gt_keypoint' in sample.keys():
+ gt_keypoint = sample['gt_keypoint']
+ if self.is_normalized:
+ for i in range(gt_keypoint.shape[1]):
+ if i % 2:
+ gt_keypoint[:, i] = gt_keypoint[:, i] * height
+ else:
+ gt_keypoint[:, i] = gt_keypoint[:, i] * width
+ for i in range(gt_keypoint.shape[0]):
+ keypoint = gt_keypoint[i]
+ for j in range(int(keypoint.shape[0] / 2)):
+ x1 = round(keypoint[2 * j]).astype(np.int32)
+ y1 = round(keypoint[2 * j + 1]).astype(np.int32)
+ draw.ellipse(
+ (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
+ save_path = os.path.join(self.output_dir, out_file_name)
+ image.save(save_path, quality=95)
+ return sample
+
+
+@register_op
+class Rbox2Poly(BaseOperator):
+ """
+ Convert rbbox format to poly format.
+ """
+
+ def __init__(self):
+ super(Rbox2Poly, self).__init__()
+
+ def apply(self, sample, context=None):
+ assert 'gt_rbox' in sample
+ assert sample['gt_rbox'].shape[1] == 5
+ rboxes = sample['gt_rbox']
+ polys = rbox2poly_np(rboxes)
+ sample['gt_poly'] = polys
+ xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
+ xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
+ sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
+ return sample
diff --git a/paddlers/models/ppdet/data/utils.py b/paddlers/models/ppdet/data/utils.py
new file mode 100644
index 0000000..02573e6
--- /dev/null
+++ b/paddlers/models/ppdet/data/utils.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import numbers
+import numpy as np
+
+try:
+ from collections.abc import Sequence, Mapping
+except:
+ from collections import Sequence, Mapping
+
+
+def default_collate_fn(batch):
+ """
+ Default batch collating function for :code:`paddle.io.DataLoader`,
+ get input data as a list of sample datas, each element in list
+ if the data of a sample, and sample data should composed of list,
+ dictionary, string, number, numpy array, this
+ function will parse input data recursively and stack number,
+ numpy array and paddle.Tensor datas as batch datas. e.g. for
+ following input data:
+ [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
+ {'image': np.array(shape=[3, 224, 224]), 'label': 3},
+ {'image': np.array(shape=[3, 224, 224]), 'label': 4},
+ {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
+
+
+ This default collate function zipped each number and numpy array
+ field together and stack each field as the batch field as follows:
+ {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
+ Args:
+ batch(list of sample data): batch should be a list of sample data.
+
+ Returns:
+ Batched data: batched each number, numpy array and paddle.Tensor
+ in input data.
+ """
+ sample = batch[0]
+ if isinstance(sample, np.ndarray):
+ batch = np.stack(batch, axis=0)
+ return batch
+ elif isinstance(sample, numbers.Number):
+ batch = np.array(batch)
+ return batch
+ elif isinstance(sample, (str, bytes)):
+ return batch
+ elif isinstance(sample, Mapping):
+ return {
+ key: default_collate_fn([d[key] for d in batch])
+ for key in sample
+ }
+ elif isinstance(sample, Sequence):
+ sample_fields_num = len(sample)
+ if not all(len(sample) == sample_fields_num for sample in iter(batch)):
+ raise RuntimeError(
+ "fileds number not same among samples in a batch")
+ return [default_collate_fn(fields) for fields in zip(*batch)]
+
+ raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
+ "dict, list, number, but got {}".format(type(sample)))
diff --git a/paddlers/models/ppdet/engine/__init__.py b/paddlers/models/ppdet/engine/__init__.py
index 038bb0f..0074a7e 100644
--- a/paddlers/models/ppdet/engine/__init__.py
+++ b/paddlers/models/ppdet/engine/__init__.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from . import trainer
diff --git a/paddlers/models/ppdet/engine/callbacks.py b/paddlers/models/ppdet/engine/callbacks.py
index 6af2e82..e4a25f7 100644
--- a/paddlers/models/ppdet/engine/callbacks.py
+++ b/paddlers/models/ppdet/engine/callbacks.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -182,7 +182,7 @@ class Checkpointer(Callback):
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
save_name = str(
epoch_id) if epoch_id != end_epoch - 1 else "model_final"
- weight = self.weight
+ weight = self.weight.state_dict()
elif mode == 'eval':
if 'save_best_model' in status and status['save_best_model']:
for metric in self.model._metrics:
@@ -198,15 +198,25 @@ class Checkpointer(Callback):
"training iterations being too few or not " \
"loading the correct weights.")
return
- if map_res[key][0] > self.best_ap:
+ if map_res[key][0] >= self.best_ap:
self.best_ap = map_res[key][0]
save_name = 'best_model'
- weight = self.weight
+ weight = self.weight.state_dict()
logger.info("Best test {} ap is {:0.3f}.".format(
key, self.best_ap))
if weight:
- save_model(weight, self.model.optimizer, self.save_dir,
- save_name, epoch_id + 1)
+ if self.model.use_ema:
+ # save model and ema_model
+ save_model(
+ status['weight'],
+ self.model.optimizer,
+ self.save_dir,
+ save_name,
+ epoch_id + 1,
+ ema_model=weight)
+ else:
+ save_model(weight, self.model.optimizer, self.save_dir,
+ save_name, epoch_id + 1)
class WiferFaceEval(Callback):
@@ -251,7 +261,7 @@ class VisualDLWriter(Callback):
for loss_name, loss_value in training_staus.get().items():
self.vdl_writer.add_scalar(loss_name, loss_value,
self.vdl_loss_step)
- self.vdl_loss_step += 1
+ self.vdl_loss_step += 1
elif mode == 'test':
ori_image = status['original_image']
result_image = status['result_image']
@@ -279,6 +289,157 @@ class VisualDLWriter(Callback):
self.vdl_mAP_step += 1
+class WandbCallback(Callback):
+ def __init__(self, model):
+ super(WandbCallback, self).__init__(model)
+
+ try:
+ import wandb
+ self.wandb = wandb
+ except Exception as e:
+ logger.error('wandb not found, please install wandb. '
+ 'Use: `pip install wandb`.')
+ raise e
+
+ self.wandb_params = model.cfg.get('wandb', None)
+ self.save_dir = os.path.join(self.model.cfg.save_dir,
+ self.model.cfg.filename)
+ if self.wandb_params is None:
+ self.wandb_params = {}
+ for k, v in model.cfg.items():
+ if k.startswith("wandb_"):
+ self.wandb_params.update({k.lstrip("wandb_"): v})
+
+ self._run = None
+ if dist.get_world_size() < 2 or dist.get_rank() == 0:
+ _ = self.run
+ self.run.config.update(self.model.cfg)
+ self.run.define_metric("epoch")
+ self.run.define_metric("eval/*", step_metric="epoch")
+
+ self.best_ap = 0
+
+ @property
+ def run(self):
+ if self._run is None:
+ if self.wandb.run is not None:
+ logger.info(
+ "There is an ongoing wandb run which will be used"
+ "for logging. Please use `wandb.finish()` to end that"
+ "if the behaviour is not intended")
+ self._run = self.wandb.run
+ else:
+ self._run = self.wandb.init(**self.wandb_params)
+ return self._run
+
+ def save_model(self,
+ optimizer,
+ save_dir,
+ save_name,
+ last_epoch,
+ ema_model=None,
+ ap=None,
+ tags=None):
+ if dist.get_world_size() < 2 or dist.get_rank() == 0:
+ model_path = os.path.join(save_dir, save_name)
+ metadata = {}
+ metadata["last_epoch"] = last_epoch
+ if ap:
+ metadata["ap"] = ap
+ if ema_model is None:
+ ema_artifact = self.wandb.Artifact(
+ name="ema_model-{}".format(self.run.id),
+ type="model",
+ metadata=metadata)
+ model_artifact = self.wandb.Artifact(
+ name="model-{}".format(self.run.id),
+ type="model",
+ metadata=metadata)
+
+ ema_artifact.add_file(model_path + ".pdema", name="model_ema")
+ model_artifact.add_file(model_path + ".pdparams", name="model")
+
+ self.run.log_artifact(ema_artifact, aliases=tags)
+ self.run.log_artfact(model_artifact, aliases=tags)
+ else:
+ model_artifact = self.wandb.Artifact(
+ name="model-{}".format(self.run.id),
+ type="model",
+ metadata=metadata)
+ model_artifact.add_file(model_path + ".pdparams", name="model")
+ self.run.log_artifact(model_artifact, aliases=tags)
+
+ def on_step_end(self, status):
+
+ mode = status['mode']
+ if dist.get_world_size() < 2 or dist.get_rank() == 0:
+ if mode == 'train':
+ training_status = status['training_staus'].get()
+ for k, v in training_status.items():
+ training_status[k] = float(v)
+ metrics = {"train/" + k: v for k, v in training_status.items()}
+ self.run.log(metrics)
+
+ def on_epoch_end(self, status):
+ mode = status['mode']
+ epoch_id = status['epoch_id']
+ save_name = None
+ if dist.get_world_size() < 2 or dist.get_rank() == 0:
+ if mode == 'train':
+ end_epoch = self.model.cfg.epoch
+ if (
+ epoch_id + 1
+ ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
+ save_name = str(
+ epoch_id) if epoch_id != end_epoch - 1 else "model_final"
+ tags = ["latest", "epoch_{}".format(epoch_id)]
+ self.save_model(
+ self.model.optimizer,
+ self.save_dir,
+ save_name,
+ epoch_id + 1,
+ self.model.use_ema,
+ tags=tags)
+ if mode == 'eval':
+ merged_dict = {}
+ for metric in self.model._metrics:
+ for key, map_value in metric.get_results().items():
+ merged_dict["eval/{}-mAP".format(key)] = map_value[0]
+ merged_dict["epoch"] = status["epoch_id"]
+ self.run.log(merged_dict)
+
+ if 'save_best_model' in status and status['save_best_model']:
+ for metric in self.model._metrics:
+ map_res = metric.get_results()
+ if 'bbox' in map_res:
+ key = 'bbox'
+ elif 'keypoint' in map_res:
+ key = 'keypoint'
+ else:
+ key = 'mask'
+ if key not in map_res:
+ logger.warning("Evaluation results empty, this may be due to " \
+ "training iterations being too few or not " \
+ "loading the correct weights.")
+ return
+ if map_res[key][0] >= self.best_ap:
+ self.best_ap = map_res[key][0]
+ save_name = 'best_model'
+ tags = ["best", "epoch_{}".format(epoch_id)]
+
+ self.save_model(
+ self.model.optimizer,
+ self.save_dir,
+ save_name,
+ last_epoch=epoch_id + 1,
+ ema_model=self.model.use_ema,
+ ap=self.best_ap,
+ tags=tags)
+
+ def on_train_end(self, status):
+ self.run.finish()
+
+
class SniperProposalsGenerator(Callback):
def __init__(self, model):
super(SniperProposalsGenerator, self).__init__(model)
diff --git a/paddlers/models/ppdet/engine/env.py b/paddlers/models/ppdet/engine/env.py
index 9a378dc..9e1a7e8 100644
--- a/paddlers/models/ppdet/engine/env.py
+++ b/paddlers/models/ppdet/engine/env.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/engine/export_utils.py b/paddlers/models/ppdet/engine/export_utils.py
index 79fe86a..5b0a997 100644
--- a/paddlers/models/ppdet/engine/export_utils.py
+++ b/paddlers/models/ppdet/engine/export_utils.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -41,22 +41,26 @@ TRT_MIN_SUBGRAPH = {
'HigherHRNet': 3,
'HRNet': 3,
'DeepSORT': 3,
+ 'ByteTrack': 10,
'JDE': 10,
'FairMOT': 5,
'GFL': 16,
'PicoDet': 3,
'CenterNet': 5,
'TOOD': 5,
+ 'YOLOX': 8,
}
KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
def _prune_input_spec(input_spec, program, targets):
# try to prune static program to figure out pruned input spec
# so we perform following operations in static mode
+ device = paddle.get_device()
paddle.enable_static()
+ paddle.set_device(device)
pruned_input_spec = [{}]
program = program.clone()
program = program._prune(targets=targets)
@@ -67,7 +71,7 @@ def _prune_input_spec(input_spec, program, targets):
pruned_input_spec[0][name] = spec
except Exception:
pass
- paddle.disable_static()
+ paddle.disable_static(place=device)
return pruned_input_spec
@@ -88,6 +92,7 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
if key == 'Resize':
if int(image_shape[1]) != -1:
value['target_size'] = image_shape[1:]
+ value['interp'] = value.get('interp', 1) # cv2.INTER_LINEAR
if fuse_normalize and key == 'NormalizeImage':
continue
p.update(value)
@@ -120,12 +125,20 @@ def _dump_infer_config(config, path, image_shape, model):
setup_orderdict()
use_dynamic_shape = True if image_shape[2] == -1 else False
infer_cfg = OrderedDict({
- 'mode': 'fluid',
+ 'mode': 'paddle',
'draw_threshold': 0.5,
'metric': config['metric'],
'use_dynamic_shape': use_dynamic_shape
})
+ export_onnx = config.get('export_onnx', False)
+ export_eb = config.get('export_eb', False)
+
infer_arch = config['architecture']
+ if 'RCNN' in infer_arch and export_onnx:
+ logger.warning(
+ "Exporting RCNN model to ONNX only support batch_size = 1")
+ infer_cfg['export_onnx'] = True
+ infer_cfg['export_eb'] = export_eb
if infer_arch in MOT_ARCH:
if infer_arch == 'DeepSORT':
@@ -140,6 +153,12 @@ def _dump_infer_config(config, path, image_shape, model):
infer_cfg['min_subgraph_size'] = min_subgraph_size
arch_state = True
break
+
+ if infer_arch == 'YOLOX':
+ infer_cfg['arch'] = infer_arch
+ infer_cfg['min_subgraph_size'] = TRT_MIN_SUBGRAPH[infer_arch]
+ arch_state = True
+
if not arch_state:
logger.error(
'Architecture: {} is not supported for exporting model now.\n'.
@@ -165,12 +184,17 @@ def _dump_infer_config(config, path, image_shape, model):
reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
if infer_arch == 'PicoDet':
- infer_cfg['NMS'] = config['PicoHead']['nms']
- # In order to speed up the prediction, the threshold of nms
+ if hasattr(config, 'export') and config['export'].get(
+ 'post_process',
+ False) and not config['export'].get('benchmark', False):
+ infer_cfg['arch'] = 'GFL'
+ head_name = 'PicoHeadV2' if config['PicoHeadV2'] else 'PicoHead'
+ infer_cfg['NMS'] = config[head_name]['nms']
+ # In order to speed up the prediction, the threshold of nms
# is adjusted here, which can be changed in infer_cfg.yml
- config['PicoHead']['nms']["score_threshold"] = 0.3
- config['PicoHead']['nms']["nms_threshold"] = 0.5
- infer_cfg['fpn_stride'] = config['PicoHead']['fpn_stride']
+ config[head_name]['nms']["score_threshold"] = 0.3
+ config[head_name]['nms']["nms_threshold"] = 0.5
+ infer_cfg['fpn_stride'] = config[head_name]['fpn_stride']
yaml.dump(infer_cfg, open(path, 'w'))
logger.info("Export inference config file to {}".format(os.path.join(path)))
diff --git a/paddlers/models/ppdet/engine/tracker.py b/paddlers/models/ppdet/engine/tracker.py
index ab358bc..6a1b7a2 100644
--- a/paddlers/models/ppdet/engine/tracker.py
+++ b/paddlers/models/ppdet/engine/tracker.py
@@ -17,27 +17,33 @@ from __future__ import division
from __future__ import print_function
import os
-import cv2
import glob
import re
import paddle
+import paddle.nn as nn
import numpy as np
-import os.path as osp
+from tqdm import tqdm
from collections import defaultdict
from paddlers.models.ppdet.core.workspace import create
from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
-
-from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric
-from paddlers.models.ppdet.metrics import MCMOTMetric
+from paddlers.models.ppdet.modeling.mot.tracker import JDETracker, DeepSORTTracker, OCSORTTracker
+from paddlers.models.ppdet.modeling.architectures import YOLOX
+from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric, MCMOTMetric
+import paddlers.models.ppdet.utils.stats as stats
from .callbacks import Callback, ComposeCallback
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
+MOT_ARCH_JDE = ['JDE', 'FairMOT']
+MOT_ARCH_SDE = ['DeepSORT', 'ByteTrack']
+MOT_DATA_TYPE = ['mot', 'mcmot', 'kitti']
+
__all__ = ['Tracker']
@@ -55,6 +61,12 @@ class Tracker(object):
# build model
self.model = create(cfg.architecture)
+ if isinstance(self.model.detector, YOLOX):
+ for k, m in self.model.named_sublayers():
+ if isinstance(m, nn.BatchNorm2D):
+ m._epsilon = 1e-3 # for amp(fp16)
+ m._momentum = 0.97 # 0.03 in pytorch
+
self.status = {}
self.start_epoch = 0
@@ -108,11 +120,15 @@ class Tracker(object):
load_weight(self.model, weights, self.optimizer)
def load_weights_sde(self, det_weights, reid_weights):
- if self.model.detector:
+ with_detector = self.model.detector is not None
+ with_reid = self.model.reid is not None
+
+ if with_detector:
load_weight(self.model.detector, det_weights)
- load_weight(self.model.reid, reid_weights)
+ if with_reid:
+ load_weight(self.model.reid, reid_weights)
else:
- load_weight(self.model.reid, reid_weights, self.optimizer)
+ load_weight(self.model.reid, reid_weights)
def _eval_seq_jde(self,
dataloader,
@@ -131,11 +147,8 @@ class Tracker(object):
self.model.eval()
results = defaultdict(list) # support single class and multi classes
- for step_id, data in enumerate(dataloader):
+ for step_id, data in enumerate(tqdm(dataloader)):
self.status['step_id'] = step_id
- if frame_id % 40 == 0:
- logger.info('Processing frame {} ({:.2f} fps)'.format(
- frame_id, 1. / max(1e-5, timer.average_time)))
# forward
timer.tic()
pred_dets, pred_embs = self.model(data)
@@ -184,24 +197,23 @@ class Tracker(object):
if save_dir:
if not os.path.exists(save_dir): os.makedirs(save_dir)
use_detector = False if not self.model.detector else True
+ use_reid = False if not self.model.reid else True
timer = MOTTimer()
results = defaultdict(list)
frame_id = 0
self.status['mode'] = 'track'
self.model.eval()
- self.model.reid.eval()
+ if use_reid:
+ self.model.reid.eval()
if not use_detector:
dets_list = load_det_results(det_file, len(dataloader))
logger.info('Finish loading detection results file {}.'.format(
det_file))
- for step_id, data in enumerate(dataloader):
+ tracker = self.model.tracker
+ for step_id, data in enumerate(tqdm(dataloader)):
self.status['step_id'] = step_id
- if frame_id % 40 == 0:
- logger.info('Processing frame {} ({:.2f} fps)'.format(
- frame_id, 1. / max(1e-5, timer.average_time)))
-
ori_image = data['ori_image'] # [bs, H, W, 3]
ori_image_shape = data['ori_image'].shape[1:3]
# ori_image_shape: [H, W]
@@ -214,7 +226,7 @@ class Tracker(object):
scale_factor = data['scale_factor'][0].numpy()
empty_detections = False
- # when it has no detected bboxes, will not inference reid model
+ # when it has no detected bboxes, will not inference reid model
# and if visualize, use original image instead
# forward
@@ -240,7 +252,7 @@ class Tracker(object):
outs['bbox'] = outs['bbox'].numpy()
outs['bbox_num'] = outs['bbox_num'].numpy()
- if outs['bbox_num'] > 0 and empty_detections == False:
+ if len(outs['bbox']) > 0 and empty_detections == False:
# detector outputs: pred_cls_ids, pred_scores, pred_bboxes
pred_cls_ids = outs['bbox'][:, 0:1]
pred_scores = outs['bbox'][:, 1:2]
@@ -249,13 +261,15 @@ class Tracker(object):
# with LetterBoxResize and JDEBBoxPostProcess.
#
# 'scaled' means whether the coords after detector outputs
- # have been scaled back to the original image, set True
+ # have been scaled back to the original image, set True
# in general detector, set False in JDE YOLOv3.
pred_bboxes = scale_coords(outs['bbox'][:, 2:],
input_shape, im_shape,
scale_factor)
else:
pred_bboxes = outs['bbox'][:, 2:]
+ pred_dets_old = np.concatenate(
+ (pred_cls_ids, pred_scores, pred_bboxes), axis=1)
else:
logger.warning(
'Frame {} has not detected object, try to modify score threshold.'.
@@ -281,52 +295,104 @@ class Tracker(object):
# thus will not inference reid model
continue
- pred_scores = pred_scores[keep_idx[0]]
pred_cls_ids = pred_cls_ids[keep_idx[0]]
- pred_tlwhs = np.concatenate(
- (pred_xyxys[:, 0:2],
- pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
- axis=1)
+ pred_scores = pred_scores[keep_idx[0]]
pred_dets = np.concatenate(
- (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
-
- tracker = self.model.tracker
- crops = get_crops(
- pred_xyxys,
- ori_image,
- w=tracker.input_size[0],
- h=tracker.input_size[1])
- crops = paddle.to_tensor(crops)
-
- data.update({'crops': crops})
- pred_embs = self.model(data).numpy()
-
- tracker.predict()
- online_targets = tracker.update(pred_dets, pred_embs)
-
- online_tlwhs, online_scores, online_ids = [], [], []
- for t in online_targets:
- if not t.is_confirmed() or t.time_since_update > 1:
- continue
- tlwh = t.to_tlwh()
- tscore = t.score
- tid = t.track_id
- if tscore < draw_threshold: continue
- if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
- if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
- 3] > tracker.vertical_ratio:
- continue
- online_tlwhs.append(tlwh)
- online_scores.append(tscore)
- online_ids.append(tid)
- timer.toc()
+ (pred_cls_ids, pred_scores, pred_xyxys), axis=1)
+
+ if use_reid:
+ crops = get_crops(
+ pred_xyxys,
+ ori_image,
+ w=tracker.input_size[0],
+ h=tracker.input_size[1])
+ crops = paddle.to_tensor(crops)
+
+ data.update({'crops': crops})
+ pred_embs = self.model(data)['embeddings'].numpy()
+ else:
+ pred_embs = None
- # save results
- results[0].append(
- (frame_id + 1, online_tlwhs, online_scores, online_ids))
- save_vis_results(data, frame_id, online_ids, online_tlwhs,
- online_scores, timer.average_time, show_image,
- save_dir, self.cfg.num_classes)
+ if isinstance(tracker, DeepSORTTracker):
+ online_tlwhs, online_scores, online_ids = [], [], []
+ tracker.predict()
+ online_targets = tracker.update(pred_dets, pred_embs)
+ for t in online_targets:
+ if not t.is_confirmed() or t.time_since_update > 1:
+ continue
+ tlwh = t.to_tlwh()
+ tscore = t.score
+ tid = t.track_id
+ if tscore < draw_threshold: continue
+ if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
+ if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+ 3] > tracker.vertical_ratio:
+ continue
+ online_tlwhs.append(tlwh)
+ online_scores.append(tscore)
+ online_ids.append(tid)
+ timer.toc()
+
+ # save results
+ results[0].append(
+ (frame_id + 1, online_tlwhs, online_scores, online_ids))
+ save_vis_results(data, frame_id, online_ids, online_tlwhs,
+ online_scores, timer.average_time, show_image,
+ save_dir, self.cfg.num_classes)
+
+ elif isinstance(tracker, JDETracker):
+ # trick hyperparams only used for MOTChallenge (MOT17, MOT20) Test-set
+ tracker.track_buffer, tracker.conf_thres = get_trick_hyperparams(
+ seq_name, tracker.track_buffer, tracker.conf_thres)
+
+ online_targets_dict = tracker.update(pred_dets_old, pred_embs)
+ online_tlwhs = defaultdict(list)
+ online_scores = defaultdict(list)
+ online_ids = defaultdict(list)
+ for cls_id in range(self.cfg.num_classes):
+ online_targets = online_targets_dict[cls_id]
+ for t in online_targets:
+ tlwh = t.tlwh
+ tid = t.track_id
+ tscore = t.score
+ if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
+ if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+ 3] > tracker.vertical_ratio:
+ continue
+ online_tlwhs[cls_id].append(tlwh)
+ online_ids[cls_id].append(tid)
+ online_scores[cls_id].append(tscore)
+ # save results
+ results[cls_id].append(
+ (frame_id + 1, online_tlwhs[cls_id],
+ online_scores[cls_id], online_ids[cls_id]))
+ timer.toc()
+ save_vis_results(data, frame_id, online_ids, online_tlwhs,
+ online_scores, timer.average_time, show_image,
+ save_dir, self.cfg.num_classes)
+ elif isinstance(tracker, OCSORTTracker):
+ # OC_SORT Tracker
+ online_targets = tracker.update(pred_dets_old, pred_embs)
+ online_tlwhs = []
+ online_ids = []
+ online_scores = []
+ for t in online_targets:
+ tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
+ tscore = float(t[4])
+ tid = int(t[5])
+ if tlwh[2] * tlwh[3] > 0:
+ online_tlwhs.append(tlwh)
+ online_ids.append(tid)
+ online_scores.append(tscore)
+ timer.toc()
+ # save results
+ results[0].append(
+ (frame_id + 1, online_tlwhs, online_scores, online_ids))
+ save_vis_results(data, frame_id, online_ids, online_tlwhs,
+ online_scores, timer.average_time, show_image,
+ save_dir, self.cfg.num_classes)
+ else:
+ raise ValueError(tracker)
frame_id += 1
return results, frame_id, timer.average_time, timer.calls
@@ -345,10 +411,10 @@ class Tracker(object):
if not os.path.exists(output_dir): os.makedirs(output_dir)
result_root = os.path.join(output_dir, 'mot_results')
if not os.path.exists(result_root): os.makedirs(result_root)
- assert data_type in ['mot', 'mcmot', 'kitti'], \
+ assert data_type in MOT_DATA_TYPE, \
"data_type should be 'mot', 'mcmot' or 'kitti'"
- assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
- "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
+ assert model_type in MOT_ARCH, \
+ "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'"
# run tracking
n_frame = 0
@@ -371,7 +437,7 @@ class Tracker(object):
save_dir = os.path.join(output_dir, 'mot_outputs',
seq) if save_images or save_videos else None
- logger.info('start seq: {}'.format(seq))
+ logger.info('Evaluate seq: {}'.format(seq))
self.dataset.set_images(self.get_infer_images(infer_dir))
dataloader = create('EvalMOTReader')(self.dataset, 0)
@@ -379,13 +445,13 @@ class Tracker(object):
result_filename = os.path.join(result_root, '{}.txt'.format(seq))
with paddle.no_grad():
- if model_type in ['JDE', 'FairMOT']:
+ if model_type in MOT_ARCH_JDE:
results, nf, ta, tc = self._eval_seq_jde(
dataloader,
save_dir=save_dir,
show_image=show_image,
frame_rate=frame_rate)
- elif model_type in ['DeepSORT']:
+ elif model_type in MOT_ARCH_SDE:
results, nf, ta, tc = self._eval_seq_sde(
dataloader,
save_dir=save_dir,
@@ -412,7 +478,6 @@ class Tracker(object):
os.system(cmd_str)
logger.info('Save video in {}.'.format(output_video_path))
- logger.info('Evaluate seq: {}'.format(seq))
# update metrics
for metric in self._metrics:
metric.update(data_root, seq, data_type, result_root,
@@ -471,12 +536,12 @@ class Tracker(object):
if not os.path.exists(output_dir): os.makedirs(output_dir)
result_root = os.path.join(output_dir, 'mot_results')
if not os.path.exists(result_root): os.makedirs(result_root)
- assert data_type in ['mot', 'mcmot', 'kitti'], \
+ assert data_type in MOT_DATA_TYPE, \
"data_type should be 'mot', 'mcmot' or 'kitti'"
- assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
- "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
+ assert model_type in MOT_ARCH, \
+ "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'"
- # run tracking
+ # run tracking
if video_file:
seq = video_file.split('/')[-1].split('.')[0]
self.dataset.set_video(video_file, frame_rate)
@@ -504,14 +569,14 @@ class Tracker(object):
frame_rate = self.dataset.frame_rate
with paddle.no_grad():
- if model_type in ['JDE', 'FairMOT']:
+ if model_type in MOT_ARCH_JDE:
results, nf, ta, tc = self._eval_seq_jde(
dataloader,
save_dir=save_dir,
show_image=show_image,
frame_rate=frame_rate,
draw_threshold=draw_threshold)
- elif model_type in ['DeepSORT']:
+ elif model_type in MOT_ARCH_SDE:
results, nf, ta, tc = self._eval_seq_sde(
dataloader,
save_dir=save_dir,
@@ -535,3 +600,35 @@ class Tracker(object):
write_mot_results(result_filename, results, data_type,
self.cfg.num_classes)
+
+
+def get_trick_hyperparams(video_name, ori_buffer, ori_thresh):
+ if video_name[:3] != 'MOT':
+ # only used for MOTChallenge (MOT17, MOT20) Test-set
+ return ori_buffer, ori_thresh
+
+ video_name = video_name[:8]
+ if 'MOT17-05' in video_name:
+ track_buffer = 14
+ elif 'MOT17-13' in video_name:
+ track_buffer = 25
+ else:
+ track_buffer = ori_buffer
+
+ if 'MOT17-01' in video_name:
+ track_thresh = 0.65
+ elif 'MOT17-06' in video_name:
+ track_thresh = 0.65
+ elif 'MOT17-12' in video_name:
+ track_thresh = 0.7
+ elif 'MOT17-14' in video_name:
+ track_thresh = 0.67
+ else:
+ track_thresh = ori_thresh
+
+ if 'MOT20-06' in video_name or 'MOT20-08' in video_name:
+ track_thresh = 0.3
+ else:
+ track_thresh = ori_thresh
+
+ return track_buffer, ori_thresh
diff --git a/paddlers/models/ppdet/engine/trainer.py b/paddlers/models/ppdet/engine/trainer.py
index 2d6640e..93caa64 100644
--- a/paddlers/models/ppdet/engine/trainer.py
+++ b/paddlers/models/ppdet/engine/trainer.py
@@ -20,38 +20,44 @@ import os
import sys
import copy
import time
+from tqdm import tqdm
import numpy as np
import typing
-from PIL import Image, ImageOps
+from PIL import Image, ImageOps, ImageFile
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True
import paddle
+import paddle.nn as nn
import paddle.distributed as dist
from paddle.distributed import fleet
-from paddle import amp
from paddle.static import InputSpec
from paddlers.models.ppdet.optimizer import ModelEMA
from paddlers.models.ppdet.core.workspace import create
-from paddlers.models.ppdet.modeling.architectures.meta_arch import BaseArch
from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
from paddlers.models.ppdet.data.source.category import get_categories
-from paddlers.models.ppdet.utils import stats
+import paddlers.models.ppdet.utils.stats as stats
+from paddlers.models.ppdet.utils.fuse_utils import fuse_conv_bn
from paddlers.models.ppdet.utils import profiler
+from paddlers.models.ppdet.modeling.post_process import multiclass_nms
-from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator
+from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator, WandbCallback
from .export_utils import _dump_infer_config, _prune_input_spec
+from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
+
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
__all__ = ['Trainer']
-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
class Trainer(object):
@@ -62,19 +68,30 @@ class Trainer(object):
self.mode = mode.lower()
self.optimizer = None
self.is_loaded_weights = False
+ self.use_amp = self.cfg.get('amp', False)
+ self.amp_level = self.cfg.get('amp_level', 'O1')
+ self.custom_white_list = self.cfg.get('custom_white_list', None)
+ self.custom_black_list = self.cfg.get('custom_black_list', None)
# build data loader
+ capital_mode = self.mode.capitalize()
if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
- self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
+ self.dataset = self.cfg['{}MOTDataset'.format(
+ capital_mode)] = create('{}MOTDataset'.format(capital_mode))()
else:
- self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
+ self.dataset = self.cfg['{}Dataset'.format(capital_mode)] = create(
+ '{}Dataset'.format(capital_mode))()
if cfg.architecture == 'DeepSORT' and self.mode == 'train':
logger.error('DeepSORT has no need of training on mot dataset.')
sys.exit(1)
+ if cfg.architecture == 'FairMOT' and self.mode == 'eval':
+ images = self.parse_mot_images(cfg)
+ self.dataset.set_images(images)
+
if self.mode == 'train':
- self.loader = create('{}Reader'.format(self.mode.capitalize()))(
+ self.loader = create('{}Reader'.format(capital_mode))(
self.dataset, cfg.worker_num)
if cfg.architecture == 'JDE' and self.mode == 'train':
@@ -94,41 +111,73 @@ class Trainer(object):
self.model = self.cfg.model
self.is_loaded_weights = True
- #normalize params for deploy
- self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
+ if cfg.architecture == 'YOLOX':
+ for k, m in self.model.named_sublayers():
+ if isinstance(m, nn.BatchNorm2D):
+ m._epsilon = 1e-3 # for amp(fp16)
+ m._momentum = 0.97 # 0.03 in pytorch
- self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
- if self.use_ema:
- ema_decay = self.cfg.get('ema_decay', 0.9998)
- cycle_epoch = self.cfg.get('cycle_epoch', -1)
- self.ema = ModelEMA(
- self.model,
- decay=ema_decay,
- use_thres_step=True,
- cycle_epoch=cycle_epoch)
+ #normalize params for deploy
+ if 'slim' in cfg and cfg['slim_type'] == 'OFA':
+ self.model.model.load_meanstd(cfg['TestReader'][
+ 'sample_transforms'])
+ elif 'slim' in cfg and cfg['slim_type'] == 'Distill':
+ self.model.student_model.load_meanstd(cfg['TestReader'][
+ 'sample_transforms'])
+ elif 'slim' in cfg and cfg[
+ 'slim_type'] == 'DistillPrune' and self.mode == 'train':
+ self.model.student_model.load_meanstd(cfg['TestReader'][
+ 'sample_transforms'])
+ else:
+ self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
# EvalDataset build with BatchSampler to evaluate in single device
# TODO: multi-device evaluate
if self.mode == 'eval':
- self._eval_batch_sampler = paddle.io.BatchSampler(
- self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
- reader_name = '{}Reader'.format(self.mode.capitalize())
- # If metric is VOC, need to be set collate_batch=False.
- if cfg.metric == 'VOC':
- cfg[reader_name]['collate_batch'] = False
- self.loader = create(reader_name)(self.dataset, cfg.worker_num,
- self._eval_batch_sampler)
+ if cfg.architecture == 'FairMOT':
+ self.loader = create('EvalMOTReader')(self.dataset, 0)
+ else:
+ self._eval_batch_sampler = paddle.io.BatchSampler(
+ self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
+ reader_name = '{}Reader'.format(self.mode.capitalize())
+ # If metric is VOC, need to be set collate_batch=False.
+ if cfg.metric == 'VOC':
+ cfg[reader_name]['collate_batch'] = False
+ self.loader = create(reader_name)(self.dataset, cfg.worker_num,
+ self._eval_batch_sampler)
# TestDataset build after user set images, skip loader creation here
# build optimizer in train mode
if self.mode == 'train':
steps_per_epoch = len(self.loader)
+ if steps_per_epoch < 1:
+ logger.warning(
+ "Samples in dataset are less than batch_size, please set smaller batch_size in TrainReader."
+ )
self.lr = create('LearningRate')(steps_per_epoch)
self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
- if self.cfg.get('unstructured_prune'):
- self.pruner = create('UnstructuredPruner')(self.model,
- steps_per_epoch)
+ # Unstructured pruner is only enabled in the train mode.
+ if self.cfg.get('unstructured_prune'):
+ self.pruner = create('UnstructuredPruner')(self.model,
+ steps_per_epoch)
+ if self.use_amp and self.amp_level == 'O2':
+ self.model, self.optimizer = paddle.amp.decorate(
+ models=self.model,
+ optimizers=self.optimizer,
+ level=self.amp_level)
+ self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
+ if self.use_ema:
+ ema_decay = self.cfg.get('ema_decay', 0.9998)
+ ema_decay_type = self.cfg.get('ema_decay_type', 'threshold')
+ cycle_epoch = self.cfg.get('cycle_epoch', -1)
+ ema_black_list = self.cfg.get('ema_black_list', None)
+ self.ema = ModelEMA(
+ self.model,
+ decay=ema_decay,
+ ema_decay_type=ema_decay_type,
+ cycle_epoch=cycle_epoch,
+ ema_black_list=ema_black_list)
self._nranks = dist.get_world_size()
self._local_rank = dist.get_rank()
@@ -152,6 +201,8 @@ class Trainer(object):
self._callbacks.append(VisualDLWriter(self))
if self.cfg.get('save_proposals', False):
self._callbacks.append(SniperProposalsGenerator(self))
+ if self.cfg.get('use_wandb', False) or 'wandb' in self.cfg:
+ self._callbacks.append(WandbCallback(self))
self._compose_callback = ComposeCallback(self._callbacks)
elif self.mode == 'eval':
self._callbacks = [LogPrinter(self)]
@@ -172,7 +223,7 @@ class Trainer(object):
classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO":
# TODO: bias should be unified
- bias = self.cfg['bias'] if 'bias' in self.cfg else 0
+ bias = 1 if self.cfg.get('bias', False) else 0
output_eval = self.cfg['output_eval'] \
if 'output_eval' in self.cfg else None
save_prediction_only = self.cfg.get('save_prediction_only', False)
@@ -184,13 +235,14 @@ class Trainer(object):
# when do validation in train, annotation file should be get from
# EvalReader instead of self.dataset(which is TrainReader)
- anno_file = self.dataset.get_anno()
- dataset = self.dataset
if self.mode == 'train' and validate:
eval_dataset = self.cfg['EvalDataset']
eval_dataset.check_or_download_dataset()
anno_file = eval_dataset.get_anno()
dataset = eval_dataset
+ else:
+ dataset = self.dataset
+ anno_file = dataset.get_anno()
IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
if self.cfg.metric == "COCO":
@@ -222,11 +274,7 @@ class Trainer(object):
output_eval = self.cfg['output_eval'] \
if 'output_eval' in self.cfg else None
save_prediction_only = self.cfg.get('save_prediction_only', False)
-
- # pass clsid2catid info to metric instance to avoid multiple loading
- # annotation file
- clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
- if self.mode == 'eval' else None
+ imid2path = self.cfg.get('imid2path', None)
# when do validation in train, annotation file should be get from
# EvalReader instead of self.dataset(which is TrainReader)
@@ -239,19 +287,25 @@ class Trainer(object):
self._metrics = [
RBoxMetric(
anno_file=anno_file,
- clsid2catid=clsid2catid,
classwise=classwise,
output_eval=output_eval,
bias=bias,
- save_prediction_only=save_prediction_only)
+ save_prediction_only=save_prediction_only,
+ imid2path=imid2path)
]
elif self.cfg.metric == 'VOC':
+ output_eval = self.cfg['output_eval'] \
+ if 'output_eval' in self.cfg else None
+ save_prediction_only = self.cfg.get('save_prediction_only', False)
+
self._metrics = [
VOCMetric(
label_list=self.dataset.get_label_list(),
class_num=self.cfg.num_classes,
map_type=self.cfg.map_type,
- classwise=classwise)
+ classwise=classwise,
+ output_eval=output_eval,
+ save_prediction_only=save_prediction_only)
]
elif self.cfg.metric == 'WiderFace':
multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
@@ -334,19 +388,29 @@ class Trainer(object):
self.start_epoch = load_weight(self.model.student_model, weights,
self.optimizer)
else:
- self.start_epoch = load_weight(self.model, weights, self.optimizer)
+ self.start_epoch = load_weight(self.model, weights, self.optimizer,
+ self.ema if self.use_ema else None)
logger.debug("Resume weights of epoch {}".format(self.start_epoch))
def train(self, validate=False):
assert self.mode == 'train', "Model not in 'train' mode"
Init_mark = False
+ if validate:
+ self.cfg['EvalDataset'] = self.cfg.EvalDataset = create(
+ "EvalDataset")()
- sync_bn = (getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
+ model = self.model
+ sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
self.cfg.use_gpu and self._nranks > 1)
if sync_bn:
- self.model = BaseArch.convert_sync_batchnorm(self.model)
-
- model = self.model
+ model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+
+ # enabel auto mixed precision mode
+ if self.use_amp:
+ scaler = paddle.amp.GradScaler(
+ enable=self.cfg.use_gpu or self.cfg.use_npu,
+ init_loss_scaling=self.cfg.get('init_loss_scaling', 1024))
+ # get distributed model
if self.cfg.get('fleet', False):
model = fleet.distributed_model(model)
self.optimizer = fleet.distributed_optimizer(self.optimizer)
@@ -354,12 +418,7 @@ class Trainer(object):
find_unused_parameters = self.cfg[
'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
model = paddle.DataParallel(
- self.model, find_unused_parameters=find_unused_parameters)
-
- # initial fp16
- if self.cfg.get('fp16', False):
- scaler = amp.GradScaler(
- enable=self.cfg.use_gpu, init_loss_scaling=1024)
+ model, find_unused_parameters=find_unused_parameters)
self.status.update({
'epoch_id': self.start_epoch,
@@ -381,6 +440,9 @@ class Trainer(object):
self._compose_callback.on_train_begin(self.status)
+ use_fused_allreduce_gradients = self.cfg[
+ 'use_fused_allreduce_gradients'] if 'use_fused_allreduce_gradients' in self.cfg else False
+
for epoch_id in range(self.start_epoch, self.cfg.epoch):
self.status['mode'] = 'train'
self.status['epoch_id'] = epoch_id
@@ -395,23 +457,56 @@ class Trainer(object):
self._compose_callback.on_step_begin(self.status)
data['epoch_id'] = epoch_id
- if self.cfg.get('fp16', False):
- with amp.auto_cast(enable=self.cfg.use_gpu):
- # model forward
- outputs = model(data)
- loss = outputs['loss']
-
- # model backward
- scaled_loss = scaler.scale(loss)
- scaled_loss.backward()
+ if self.use_amp:
+ if isinstance(
+ model, paddle.
+ DataParallel) and use_fused_allreduce_gradients:
+ with model.no_sync():
+ with paddle.amp.auto_cast(
+ enable=self.cfg.use_gpu,
+ custom_white_list=self.custom_white_list,
+ custom_black_list=self.custom_black_list,
+ level=self.amp_level):
+ # model forward
+ outputs = model(data)
+ loss = outputs['loss']
+ # model backward
+ scaled_loss = scaler.scale(loss)
+ scaled_loss.backward()
+ fused_allreduce_gradients(
+ list(model.parameters()), None)
+ else:
+ with paddle.amp.auto_cast(
+ enable=self.cfg.use_gpu,
+ custom_white_list=self.custom_white_list,
+ custom_black_list=self.custom_black_list,
+ level=self.amp_level):
+ # model forward
+ outputs = model(data)
+ loss = outputs['loss']
+ # model backward
+ scaled_loss = scaler.scale(loss)
+ scaled_loss.backward()
# in dygraph mode, optimizer.minimize is equal to optimizer.step
scaler.minimize(self.optimizer, scaled_loss)
else:
- # model forward
- outputs = model(data)
- loss = outputs['loss']
- # model backward
- loss.backward()
+ if isinstance(
+ model, paddle.
+ DataParallel) and use_fused_allreduce_gradients:
+ with model.no_sync():
+ # model forward
+ outputs = model(data)
+ loss = outputs['loss']
+ # model backward
+ loss.backward()
+ fused_allreduce_gradients(
+ list(model.parameters()), None)
+ else:
+ # model forward
+ outputs = model(data)
+ loss = outputs['loss']
+ # model backward
+ loss.backward()
self.optimizer.step()
curr_lr = self.optimizer.get_lr()
self.lr.step()
@@ -426,21 +521,23 @@ class Trainer(object):
self.status['batch_time'].update(time.time() - iter_tic)
self._compose_callback.on_step_end(self.status)
if self.use_ema:
- self.ema.update(self.model)
+ self.ema.update()
iter_tic = time.time()
- # apply ema weight on model
- if self.use_ema:
- weight = copy.deepcopy(self.model.state_dict())
- self.model.set_dict(self.ema.apply())
if self.cfg.get('unstructured_prune'):
self.pruner.update_params()
+ is_snapshot = (self._nranks < 2 or self._local_rank == 0) \
+ and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 or epoch_id == self.end_epoch - 1)
+ if is_snapshot and self.use_ema:
+ # apply ema weight on model
+ weight = copy.deepcopy(self.model.state_dict())
+ self.model.set_dict(self.ema.apply())
+ self.status['weight'] = weight
+
self._compose_callback.on_epoch_end(self.status)
- if validate and (self._nranks < 2 or self._local_rank == 0) \
- and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
- or epoch_id == self.end_epoch - 1):
+ if validate and is_snapshot:
if not hasattr(self, '_eval_loader'):
# build evaluation dataset and loader
self._eval_dataset = self.cfg.EvalDataset
@@ -461,13 +558,15 @@ class Trainer(object):
Init_mark = True
self._init_metrics(validate=validate)
self._reset_metrics()
+
with paddle.no_grad():
self.status['save_best_model'] = True
self._eval_with_loader(self._eval_loader)
- # restore origin weight on model
- if self.use_ema:
+ if is_snapshot and self.use_ema:
+ # reset original weight
self.model.set_dict(weight)
+ self.status.pop('weight')
self._compose_callback.on_train_end(self.status)
@@ -485,7 +584,15 @@ class Trainer(object):
self.status['step_id'] = step_id
self._compose_callback.on_step_begin(self.status)
# forward
- outs = self.model(data)
+ if self.use_amp:
+ with paddle.amp.auto_cast(
+ enable=self.cfg.use_gpu,
+ custom_white_list=self.custom_white_list,
+ custom_black_list=self.custom_black_list,
+ level=self.amp_level):
+ outs = self.model(data)
+ else:
+ outs = self.model(data)
# update metrics
for metric in self._metrics:
@@ -513,32 +620,267 @@ class Trainer(object):
with paddle.no_grad():
self._eval_with_loader(self.loader)
+ def _eval_with_loader_slice(self,
+ loader,
+ slice_size=[640, 640],
+ overlap_ratio=[0.25, 0.25],
+ combine_method='nms',
+ match_threshold=0.6,
+ match_metric='iou'):
+ sample_num = 0
+ tic = time.time()
+ self._compose_callback.on_epoch_begin(self.status)
+ self.status['mode'] = 'eval'
+ self.model.eval()
+ if self.cfg.get('print_flops', False):
+ flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
+ self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
+ self._flops(flops_loader)
+
+ merged_bboxs = []
+ for step_id, data in enumerate(loader):
+ self.status['step_id'] = step_id
+ self._compose_callback.on_step_begin(self.status)
+ # forward
+ if self.use_amp:
+ with paddle.amp.auto_cast(
+ enable=self.cfg.use_gpu,
+ custom_white_list=self.custom_white_list,
+ custom_black_list=self.custom_black_list,
+ level=self.amp_level):
+ outs = self.model(data)
+ else:
+ outs = self.model(data)
+
+ shift_amount = data['st_pix']
+ outs['bbox'][:, 2:4] = outs['bbox'][:, 2:4] + shift_amount
+ outs['bbox'][:, 4:6] = outs['bbox'][:, 4:6] + shift_amount
+ merged_bboxs.append(outs['bbox'])
+
+ if data['is_last'] > 0:
+ # merge matching predictions
+ merged_results = {'bbox': []}
+ if combine_method == 'nms':
+ final_boxes = multiclass_nms(
+ np.concatenate(merged_bboxs), self.cfg.num_classes,
+ match_threshold, match_metric)
+ merged_results['bbox'] = np.concatenate(final_boxes)
+ elif combine_method == 'concat':
+ merged_results['bbox'] = np.concatenate(merged_bboxs)
+ else:
+ raise ValueError(
+ "Now only support 'nms' or 'concat' to fuse detection results."
+ )
+ merged_results['im_id'] = np.array([[0]])
+ merged_results['bbox_num'] = np.array(
+ [len(merged_results['bbox'])])
+
+ merged_bboxs = []
+ data['im_id'] = data['ori_im_id']
+ # update metrics
+ for metric in self._metrics:
+ metric.update(data, merged_results)
+
+ # multi-scale inputs: all inputs have same im_id
+ if isinstance(data, typing.Sequence):
+ sample_num += data[0]['im_id'].numpy().shape[0]
+ else:
+ sample_num += data['im_id'].numpy().shape[0]
+
+ self._compose_callback.on_step_end(self.status)
+
+ self.status['sample_num'] = sample_num
+ self.status['cost_time'] = time.time() - tic
+
+ # accumulate metric to log out
+ for metric in self._metrics:
+ metric.accumulate()
+ metric.log()
+ self._compose_callback.on_epoch_end(self.status)
+ # reset metric states for metric may performed multiple times
+ self._reset_metrics()
+
+ def evaluate_slice(self,
+ slice_size=[640, 640],
+ overlap_ratio=[0.25, 0.25],
+ combine_method='nms',
+ match_threshold=0.6,
+ match_metric='iou'):
+ with paddle.no_grad():
+ self._eval_with_loader_slice(self.loader, slice_size, overlap_ratio,
+ combine_method, match_threshold,
+ match_metric)
+
+ def slice_predict(self,
+ images,
+ slice_size=[640, 640],
+ overlap_ratio=[0.25, 0.25],
+ combine_method='nms',
+ match_threshold=0.6,
+ match_metric='iou',
+ draw_threshold=0.5,
+ output_dir='output',
+ save_results=False,
+ visualize=True):
+ self.dataset.set_slice_images(images, slice_size, overlap_ratio)
+ loader = create('TestReader')(self.dataset, 0)
+
+ imid2path = self.dataset.get_imid2path()
+
+ anno_file = self.dataset.get_anno()
+ clsid2catid, catid2name = get_categories(
+ self.cfg.metric, anno_file=anno_file)
+
+ # Run Infer
+ self.status['mode'] = 'test'
+ self.model.eval()
+ if self.cfg.get('print_flops', False):
+ flops_loader = create('TestReader')(self.dataset, 0)
+ self._flops(flops_loader)
+
+ results = [] # all images
+ merged_bboxs = [] # single image
+ for step_id, data in enumerate(tqdm(loader)):
+ self.status['step_id'] = step_id
+ # forward
+ outs = self.model(data)
+
+ outs['bbox'] = outs['bbox'].numpy() # only in test mode
+ shift_amount = data['st_pix']
+ outs['bbox'][:, 2:4] = outs['bbox'][:, 2:4] + shift_amount.numpy()
+ outs['bbox'][:, 4:6] = outs['bbox'][:, 4:6] + shift_amount.numpy()
+ merged_bboxs.append(outs['bbox'])
+
+ if data['is_last'] > 0:
+ # merge matching predictions
+ merged_results = {'bbox': []}
+ if combine_method == 'nms':
+ final_boxes = multiclass_nms(
+ np.concatenate(merged_bboxs), self.cfg.num_classes,
+ match_threshold, match_metric)
+ merged_results['bbox'] = np.concatenate(final_boxes)
+ elif combine_method == 'concat':
+ merged_results['bbox'] = np.concatenate(merged_bboxs)
+ else:
+ raise ValueError(
+ "Now only support 'nms' or 'concat' to fuse detection results."
+ )
+ merged_results['im_id'] = np.array([[0]])
+ merged_results['bbox_num'] = np.array(
+ [len(merged_results['bbox'])])
+
+ merged_bboxs = []
+ data['im_id'] = data['ori_im_id']
+
+ for key in ['im_shape', 'scale_factor', 'im_id']:
+ if isinstance(data, typing.Sequence):
+ merged_results[key] = data[0][key]
+ else:
+ merged_results[key] = data[key]
+ for key, value in merged_results.items():
+ if hasattr(value, 'numpy'):
+ merged_results[key] = value.numpy()
+ results.append(merged_results)
+
+ if visualize:
+ for outs in results:
+ batch_res = get_infer_results(outs, clsid2catid)
+ bbox_num = outs['bbox_num']
+ start = 0
+ for i, im_id in enumerate(outs['im_id']):
+ image_path = imid2path[int(im_id)]
+ image = Image.open(image_path).convert('RGB')
+ image = ImageOps.exif_transpose(image)
+ self.status['original_image'] = np.array(image.copy())
+ end = start + bbox_num[i]
+ bbox_res = batch_res['bbox'][start:end] \
+ if 'bbox' in batch_res else None
+ mask_res, segm_res, keypoint_res = None, None, None
+ image = visualize_results(
+ image, bbox_res, mask_res, segm_res, keypoint_res,
+ int(im_id), catid2name, draw_threshold)
+ self.status['result_image'] = np.array(image.copy())
+ if self._compose_callback:
+ self._compose_callback.on_step_end(self.status)
+ # save image with detection
+ save_name = self._get_save_image_name(output_dir,
+ image_path)
+ logger.info("Detection bbox results save in {}".format(
+ save_name))
+ image.save(save_name, quality=95)
+ start = end
+
def predict(self,
images,
draw_threshold=0.5,
output_dir='output',
- save_txt=False):
+ save_results=False,
+ visualize=True):
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
self.dataset.set_images(images)
loader = create('TestReader')(self.dataset, 0)
imid2path = self.dataset.get_imid2path()
+ def setup_metrics_for_loader():
+ # mem
+ metrics = copy.deepcopy(self._metrics)
+ mode = self.mode
+ save_prediction_only = self.cfg[
+ 'save_prediction_only'] if 'save_prediction_only' in self.cfg else None
+ output_eval = self.cfg[
+ 'output_eval'] if 'output_eval' in self.cfg else None
+
+ # modify
+ self.mode = '_test'
+ self.cfg['save_prediction_only'] = True
+ self.cfg['output_eval'] = output_dir
+ self.cfg['imid2path'] = imid2path
+ self._init_metrics()
+
+ # restore
+ self.mode = mode
+ self.cfg.pop('save_prediction_only')
+ if save_prediction_only is not None:
+ self.cfg['save_prediction_only'] = save_prediction_only
+
+ self.cfg.pop('output_eval')
+ if output_eval is not None:
+ self.cfg['output_eval'] = output_eval
+
+ self.cfg.pop('imid2path')
+
+ _metrics = copy.deepcopy(self._metrics)
+ self._metrics = metrics
+
+ return _metrics
+
+ if save_results:
+ metrics = setup_metrics_for_loader()
+ else:
+ metrics = []
+
anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(
self.cfg.metric, anno_file=anno_file)
- # Run Infer
+ # Run Infer
self.status['mode'] = 'test'
self.model.eval()
if self.cfg.get('print_flops', False):
flops_loader = create('TestReader')(self.dataset, 0)
self._flops(flops_loader)
results = []
- for step_id, data in enumerate(loader):
+ for step_id, data in enumerate(tqdm(loader)):
self.status['step_id'] = step_id
# forward
outs = self.model(data)
+ for _m in metrics:
+ _m.update(data, outs)
+
for key in ['im_shape', 'scale_factor', 'im_id']:
if isinstance(data, typing.Sequence):
outs[key] = data[0][key]
@@ -548,64 +890,64 @@ class Trainer(object):
if hasattr(value, 'numpy'):
outs[key] = value.numpy()
results.append(outs)
+
# sniper
if type(self.dataset) == SniperCOCODataSet:
results = self.dataset.anno_cropper.aggregate_chips_detections(
results)
- for outs in results:
- batch_res = get_infer_results(outs, clsid2catid)
- bbox_num = outs['bbox_num']
-
- start = 0
- for i, im_id in enumerate(outs['im_id']):
- image_path = imid2path[int(im_id)]
- image = Image.open(image_path).convert('RGB')
- image = ImageOps.exif_transpose(image)
- self.status['original_image'] = np.array(image.copy())
-
- end = start + bbox_num[i]
- bbox_res = batch_res['bbox'][start:end] \
- if 'bbox' in batch_res else None
- mask_res = batch_res['mask'][start:end] \
- if 'mask' in batch_res else None
- segm_res = batch_res['segm'][start:end] \
- if 'segm' in batch_res else None
- keypoint_res = batch_res['keypoint'][start:end] \
- if 'keypoint' in batch_res else None
- image = visualize_results(
- image, bbox_res, mask_res, segm_res, keypoint_res,
- int(im_id), catid2name, draw_threshold)
- self.status['result_image'] = np.array(image.copy())
- if self._compose_callback:
- self._compose_callback.on_step_end(self.status)
- # save image with detection
- save_name = self._get_save_image_name(output_dir, image_path)
- logger.info("Detection bbox results save in {}".format(
- save_name))
- image.save(save_name, quality=95)
- if save_txt:
- save_path = os.path.splitext(save_name)[0] + '.txt'
- results = {}
- results["im_id"] = im_id
- if bbox_res:
- results["bbox_res"] = bbox_res
- if keypoint_res:
- results["keypoint_res"] = keypoint_res
- save_result(save_path, results, catid2name, draw_threshold)
- start = end
+ for _m in metrics:
+ _m.accumulate()
+ _m.reset()
+
+ if visualize:
+ for outs in results:
+ batch_res = get_infer_results(outs, clsid2catid)
+ bbox_num = outs['bbox_num']
+
+ start = 0
+ for i, im_id in enumerate(outs['im_id']):
+ image_path = imid2path[int(im_id)]
+ image = Image.open(image_path).convert('RGB')
+ image = ImageOps.exif_transpose(image)
+ self.status['original_image'] = np.array(image.copy())
+
+ end = start + bbox_num[i]
+ bbox_res = batch_res['bbox'][start:end] \
+ if 'bbox' in batch_res else None
+ mask_res = batch_res['mask'][start:end] \
+ if 'mask' in batch_res else None
+ segm_res = batch_res['segm'][start:end] \
+ if 'segm' in batch_res else None
+ keypoint_res = batch_res['keypoint'][start:end] \
+ if 'keypoint' in batch_res else None
+ image = visualize_results(
+ image, bbox_res, mask_res, segm_res, keypoint_res,
+ int(im_id), catid2name, draw_threshold)
+ self.status['result_image'] = np.array(image.copy())
+ if self._compose_callback:
+ self._compose_callback.on_step_end(self.status)
+ # save image with detection
+ save_name = self._get_save_image_name(output_dir,
+ image_path)
+ logger.info("Detection bbox results save in {}".format(
+ save_name))
+ image.save(save_name, quality=95)
+
+ start = end
def _get_save_image_name(self, output_dir, image_path):
"""
Get save image name from source image path.
"""
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
return os.path.join(output_dir, "{}".format(name)) + ext
- def _get_infer_cfg_and_input_spec(self, save_dir, prune_input=True):
+ def _get_infer_cfg_and_input_spec(self,
+ save_dir,
+ prune_input=True,
+ kl_quant=False):
image_shape = None
im_shape = [None, 2]
scale_factor = [None, 2]
@@ -628,9 +970,27 @@ class Trainer(object):
if hasattr(self.model, 'deploy'):
self.model.deploy = True
+
+ if 'slim' not in self.cfg:
+ for layer in self.model.sublayers():
+ if hasattr(layer, 'convert_to_deploy'):
+ layer.convert_to_deploy()
+
+ export_post_process = self.cfg['export'].get(
+ 'post_process', False) if hasattr(self.cfg, 'export') else True
+ export_nms = self.cfg['export'].get('nms', False) if hasattr(
+ self.cfg, 'export') else True
+ export_benchmark = self.cfg['export'].get(
+ 'benchmark', False) if hasattr(self.cfg, 'export') else False
if hasattr(self.model, 'fuse_norm'):
self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize',
False)
+ if hasattr(self.model, 'export_post_process'):
+ self.model.export_post_process = export_post_process if not export_benchmark else False
+ if hasattr(self.model, 'export_nms'):
+ self.model.export_nms = export_nms if not export_benchmark else False
+ if export_post_process and not export_benchmark:
+ image_shape = [None] + image_shape[1:]
# Save infer cfg
_dump_infer_config(self.cfg,
@@ -663,16 +1023,34 @@ class Trainer(object):
pruned_input_spec = input_spec
# TODO: Hard code, delete it when support prune input_spec.
- if self.cfg.architecture == 'PicoDet':
+ if self.cfg.architecture == 'PicoDet' and not export_post_process:
pruned_input_spec = [{
"image": InputSpec(
shape=image_shape, name='image')
}]
+ if kl_quant:
+ if self.cfg.architecture == 'PicoDet' or 'ppyoloe' in self.cfg.weights:
+ pruned_input_spec = [{
+ "image": InputSpec(
+ shape=image_shape, name='image'),
+ "scale_factor": InputSpec(
+ shape=scale_factor, name='scale_factor')
+ }]
+ elif 'tinypose' in self.cfg.weights:
+ pruned_input_spec = [{
+ "image": InputSpec(
+ shape=image_shape, name='image')
+ }]
return static_model, pruned_input_spec
def export(self, output_dir='output_inference'):
self.model.eval()
+
+ if hasattr(self.cfg, 'export') and 'fuse_conv_bn' in self.cfg[
+ 'export'] and self.cfg['export']['fuse_conv_bn']:
+ self.model = fuse_conv_bn(self.model)
+
model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
save_dir = os.path.join(output_dir, model_name)
if not os.path.exists(save_dir):
@@ -682,7 +1060,7 @@ class Trainer(object):
save_dir)
# dy2st and save model
- if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
+ if 'slim' not in self.cfg or 'QAT' not in self.cfg['slim_type']:
paddle.jit.save(
static_model,
os.path.join(save_dir, 'model'),
@@ -706,8 +1084,9 @@ class Trainer(object):
break
# TODO: support prune input_spec
+ kl_quant = True if hasattr(self.cfg.slim, 'ptq') else False
_, pruned_input_spec = self._get_infer_cfg_and_input_spec(
- save_dir, prune_input=False)
+ save_dir, prune_input=False, kl_quant=kl_quant)
self.cfg.slim.save_quantized_model(
self.model,
@@ -739,3 +1118,29 @@ class Trainer(object):
flops = flops(self.model, input_spec) / (1000**3)
logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format(
flops, input_data['image'][0].unsqueeze(0).shape))
+
+ def parse_mot_images(self, cfg):
+ import glob
+ # for quant
+ dataset_dir = cfg['EvalMOTDataset'].dataset_dir
+ data_root = cfg['EvalMOTDataset'].data_root
+ data_root = '{}/{}'.format(dataset_dir, data_root)
+ seqs = os.listdir(data_root)
+ seqs.sort()
+ all_images = []
+ for seq in seqs:
+ infer_dir = os.path.join(data_root, seq)
+ assert infer_dir is None or os.path.isdir(infer_dir), \
+ "{} is not a directory".format(infer_dir)
+ images = set()
+ exts = ['jpg', 'jpeg', 'png', 'bmp']
+ exts += [ext.upper() for ext in exts]
+ for ext in exts:
+ images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+ images = list(images)
+ images.sort()
+ assert len(images) > 0, "no image found in {}".format(infer_dir)
+ all_images.extend(images)
+ logger.info("Found {} inference images in total.".format(
+ len(images)))
+ return all_images
diff --git a/paddlers/models/ppdet/ext_op/README.md b/paddlers/models/ppdet/ext_op/README.md
new file mode 100644
index 0000000..0d67062
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/README.md
@@ -0,0 +1,35 @@
+# 自定义OP编译
+旋转框IOU计算OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。
+
+## 1. 环境依赖
+- Paddle >= 2.0.1
+- gcc 8.2
+
+## 2. 安装
+```
+python setup.py install
+```
+
+编译完成后即可使用,以下为`rbox_iou`的使用示例
+```
+# 引入自定义op
+from ext_op import rbox_iou
+
+paddle.set_device('gpu:0')
+paddle.disable_static()
+
+rbox1 = np.random.rand(13000, 5)
+rbox2 = np.random.rand(7, 5)
+
+pd_rbox1 = paddle.to_tensor(rbox1)
+pd_rbox2 = paddle.to_tensor(rbox2)
+
+iou = rbox_iou(pd_rbox1, pd_rbox2)
+print('iou', iou)
+```
+
+## 3. 单元测试
+可以通过执行单元测试来确认自定义算子功能的正确性,执行单元测试的示例如下所示:
+```
+python unittest/test_matched_rbox_iou.py
+```
diff --git a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
new file mode 100644
index 0000000..2c3c58b
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
@@ -0,0 +1,90 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "paddle/extension.h"
+#include "rbox_iou_op.h"
+
+template
+void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
+ const T *rbox2_data_ptr, T *output_data_ptr) {
+
+ int i;
+ for (i = 0; i < rbox_num; i++) {
+ output_data_ptr[i] =
+ rbox_iou_single(rbox1_data_ptr + i * 5, rbox2_data_ptr + i * 5);
+ }
+}
+
+#define CHECK_INPUT_CPU(x) \
+ PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
+ CHECK_INPUT_CPU(rbox1);
+ CHECK_INPUT_CPU(rbox2);
+ PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
+
+ auto rbox_num = rbox1.shape()[0];
+ auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num});
+
+ PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] {
+ matched_rbox_iou_cpu_kernel(
+ rbox_num, rbox1.data(),
+ rbox2.data(),
+ output.mutable_data());
+ }));
+
+ return {output};
+}
+
+#ifdef PADDLE_WITH_CUDA
+std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2);
+#endif
+
+#define CHECK_INPUT_SAME(x1, x2) \
+ PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+
+std::vector MatchedRboxIouForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
+ CHECK_INPUT_SAME(rbox1, rbox2);
+ if (rbox1.place() == paddle::PlaceType::kCPU) {
+ return MatchedRboxIouCPUForward(rbox1, rbox2);
+#ifdef PADDLE_WITH_CUDA
+ } else if (rbox1.place() == paddle::PlaceType::kGPU) {
+ return MatchedRboxIouCUDAForward(rbox1, rbox2);
+#endif
+ }
+}
+
+std::vector>
+MatchedRboxIouInferShape(std::vector rbox1_shape,
+ std::vector rbox2_shape) {
+ return {{rbox1_shape[0]}};
+}
+
+std::vector MatchedRboxIouInferDtype(paddle::DataType t1,
+ paddle::DataType t2) {
+ return {t1};
+}
+
+PD_BUILD_OP(matched_rbox_iou)
+ .Inputs({"RBOX1", "RBOX2"})
+ .Outputs({"Output"})
+ .SetKernelFn(PD_KERNEL(MatchedRboxIouForward))
+ .SetInferShapeFn(PD_INFER_SHAPE(MatchedRboxIouInferShape))
+ .SetInferDtypeFn(PD_INFER_DTYPE(MatchedRboxIouInferDtype));
diff --git a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
new file mode 100644
index 0000000..8d03ecc
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
@@ -0,0 +1,63 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "paddle/extension.h"
+#include "rbox_iou_op.h"
+
+/**
+ Computes ceil(a / b)
+*/
+
+static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
+
+template
+__global__ void
+matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
+ const T *rbox2_data_ptr, T *output_data_ptr) {
+ for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num;
+ tid += blockDim.x * gridDim.x) {
+ output_data_ptr[tid] =
+ rbox_iou_single(rbox1_data_ptr + tid * 5, rbox2_data_ptr + tid * 5);
+ }
+}
+
+#define CHECK_INPUT_GPU(x) \
+ PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
+
+std::vector MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
+ CHECK_INPUT_GPU(rbox1);
+ CHECK_INPUT_GPU(rbox2);
+ PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
+
+ auto rbox_num = rbox1.shape()[0];
+
+ auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num});
+
+ const int thread_per_block = 512;
+ const int block_per_grid = CeilDiv(rbox_num, thread_per_block);
+
+ PD_DISPATCH_FLOATING_TYPES(
+ rbox1.type(), "matched_rbox_iou_cuda_kernel", ([&] {
+ matched_rbox_iou_cuda_kernel<
+ data_t><<>>(
+ rbox_num, rbox1.data(), rbox2.data(),
+ output.mutable_data());
+ }));
+
+ return {output};
+}
diff --git a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
new file mode 100644
index 0000000..d66004e
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
@@ -0,0 +1,97 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "rbox_iou_op.h"
+#include "paddle/extension.h"
+
+
+template
+void rbox_iou_cpu_kernel(
+ const int rbox1_num,
+ const int rbox2_num,
+ const T* rbox1_data_ptr,
+ const T* rbox2_data_ptr,
+ T* output_data_ptr) {
+
+ int i, j;
+ for (i = 0; i < rbox1_num; i++) {
+ for (j = 0; j < rbox2_num; j++) {
+ int offset = i * rbox2_num + j;
+ output_data_ptr[offset] = rbox_iou_single(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
+ }
+ }
+}
+
+
+#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
+ CHECK_INPUT_CPU(rbox1);
+ CHECK_INPUT_CPU(rbox2);
+
+ auto rbox1_num = rbox1.shape()[0];
+ auto rbox2_num = rbox2.shape()[0];
+
+ auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num});
+
+ PD_DISPATCH_FLOATING_TYPES(
+ rbox1.type(),
+ "rbox_iou_cpu_kernel",
+ ([&] {
+ rbox_iou_cpu_kernel(
+ rbox1_num,
+ rbox2_num,
+ rbox1.data(),
+ rbox2.data(),
+ output.mutable_data());
+ }));
+
+ return {output};
+}
+
+
+#ifdef PADDLE_WITH_CUDA
+std::vector RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
+#endif
+
+
+#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+
+std::vector RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
+ CHECK_INPUT_SAME(rbox1, rbox2);
+ if (rbox1.place() == paddle::PlaceType::kCPU) {
+ return RboxIouCPUForward(rbox1, rbox2);
+#ifdef PADDLE_WITH_CUDA
+ } else if (rbox1.place() == paddle::PlaceType::kGPU) {
+ return RboxIouCUDAForward(rbox1, rbox2);
+#endif
+ }
+}
+
+std::vector> InferShape(std::vector rbox1_shape, std::vector rbox2_shape) {
+ return {{rbox1_shape[0], rbox2_shape[0]}};
+}
+
+std::vector InferDtype(paddle::DataType t1, paddle::DataType t2) {
+ return {t1};
+}
+
+PD_BUILD_OP(rbox_iou)
+ .Inputs({"RBOX1", "RBOX2"})
+ .Outputs({"Output"})
+ .SetKernelFn(PD_KERNEL(RboxIouForward))
+ .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
+ .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
diff --git a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
new file mode 100644
index 0000000..a61be13
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
@@ -0,0 +1,114 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "paddle/extension.h"
+#include "rbox_iou_op.h"
+
+// 2D block with 32 * 16 = 512 threads per block
+const int BLOCK_DIM_X = 32;
+const int BLOCK_DIM_Y = 16;
+
+/**
+ Computes ceil(a / b)
+*/
+
+static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
+
+template
+__global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
+ const T *rbox1_data_ptr,
+ const T *rbox2_data_ptr,
+ T *output_data_ptr) {
+
+ // get row_start and col_start
+ const int rbox1_block_idx = blockIdx.x * blockDim.x;
+ const int rbox2_block_idx = blockIdx.y * blockDim.y;
+
+ const int rbox1_thread_num = min(rbox1_num - rbox1_block_idx, blockDim.x);
+ const int rbox2_thread_num = min(rbox2_num - rbox2_block_idx, blockDim.y);
+
+ __shared__ T block_boxes1[BLOCK_DIM_X * 5];
+ __shared__ T block_boxes2[BLOCK_DIM_Y * 5];
+
+ // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
+ if (threadIdx.x < rbox1_thread_num && threadIdx.y == 0) {
+ block_boxes1[threadIdx.x * 5 + 0] =
+ rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 0];
+ block_boxes1[threadIdx.x * 5 + 1] =
+ rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 1];
+ block_boxes1[threadIdx.x * 5 + 2] =
+ rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 2];
+ block_boxes1[threadIdx.x * 5 + 3] =
+ rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 3];
+ block_boxes1[threadIdx.x * 5 + 4] =
+ rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 4];
+ }
+
+ // threadIdx.x < BLOCK_DIM_Y=rbox2_thread_num, just use same condition as
+ // above: threadIdx.y == 0
+ if (threadIdx.x < rbox2_thread_num && threadIdx.y == 0) {
+ block_boxes2[threadIdx.x * 5 + 0] =
+ rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 0];
+ block_boxes2[threadIdx.x * 5 + 1] =
+ rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 1];
+ block_boxes2[threadIdx.x * 5 + 2] =
+ rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 2];
+ block_boxes2[threadIdx.x * 5 + 3] =
+ rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 3];
+ block_boxes2[threadIdx.x * 5 + 4] =
+ rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 4];
+ }
+
+ // sync
+ __syncthreads();
+
+ if (threadIdx.x < rbox1_thread_num && threadIdx.y < rbox2_thread_num) {
+ int offset = (rbox1_block_idx + threadIdx.x) * rbox2_num + rbox2_block_idx +
+ threadIdx.y;
+ output_data_ptr[offset] = rbox_iou_single(
+ block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
+ }
+}
+
+#define CHECK_INPUT_GPU(x) \
+ PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
+
+std::vector RboxIouCUDAForward(const paddle::Tensor &rbox1,
+ const paddle::Tensor &rbox2) {
+ CHECK_INPUT_GPU(rbox1);
+ CHECK_INPUT_GPU(rbox2);
+
+ auto rbox1_num = rbox1.shape()[0];
+ auto rbox2_num = rbox2.shape()[0];
+
+ auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num});
+
+ const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X);
+ const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y);
+
+ dim3 blocks(blocks_x, blocks_y);
+ dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
+
+ PD_DISPATCH_FLOATING_TYPES(
+ rbox1.type(), "rbox_iou_cuda_kernel", ([&] {
+ rbox_iou_cuda_kernel<<>>(
+ rbox1_num, rbox2_num, rbox1.data(), rbox2.data(),
+ output.mutable_data());
+ }));
+
+ return {output};
+}
diff --git a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
new file mode 100644
index 0000000..b592c39
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
@@ -0,0 +1,348 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#pragma once
+
+#include
+#include
+#include
+
+#ifdef __CUDACC__
+// Designates functions callable from the host (CPU) and the device (GPU)
+#define HOST_DEVICE __host__ __device__
+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
+#else
+#include
+#define HOST_DEVICE
+#define HOST_DEVICE_INLINE HOST_DEVICE inline
+#endif
+
+namespace {
+
+template struct RotatedBox { T x_ctr, y_ctr, w, h, a; };
+
+template struct Point {
+ T x, y;
+ HOST_DEVICE_INLINE Point(const T &px = 0, const T &py = 0) : x(px), y(py) {}
+ HOST_DEVICE_INLINE Point operator+(const Point &p) const {
+ return Point(x + p.x, y + p.y);
+ }
+ HOST_DEVICE_INLINE Point &operator+=(const Point &p) {
+ x += p.x;
+ y += p.y;
+ return *this;
+ }
+ HOST_DEVICE_INLINE Point operator-(const Point &p) const {
+ return Point(x - p.x, y - p.y);
+ }
+ HOST_DEVICE_INLINE Point operator*(const T coeff) const {
+ return Point(x * coeff, y * coeff);
+ }
+};
+
+template
+HOST_DEVICE_INLINE T dot_2d(const Point &A, const Point &B) {
+ return A.x * B.x + A.y * B.y;
+}
+
+template
+HOST_DEVICE_INLINE T cross_2d(const Point &A, const Point &B) {
+ return A.x * B.y - B.x * A.y;
+}
+
+template
+HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox &box,
+ Point (&pts)[4]) {
+ // M_PI / 180. == 0.01745329251
+ // double theta = box.a * 0.01745329251;
+ // MODIFIED
+ double theta = box.a;
+ T cosTheta2 = (T)cos(theta) * 0.5f;
+ T sinTheta2 = (T)sin(theta) * 0.5f;
+
+ // y: top --> down; x: left --> right
+ pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
+ pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
+ pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
+ pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
+ pts[2].x = 2 * box.x_ctr - pts[0].x;
+ pts[2].y = 2 * box.y_ctr - pts[0].y;
+ pts[3].x = 2 * box.x_ctr - pts[1].x;
+ pts[3].y = 2 * box.y_ctr - pts[1].y;
+}
+
+template
+HOST_DEVICE_INLINE int get_intersection_points(const Point (&pts1)[4],
+ const Point (&pts2)[4],
+ Point (&intersections)[24]) {
+ // Line vector
+ // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+ Point vec1[4], vec2[4];
+ for (int i = 0; i < 4; i++) {
+ vec1[i] = pts1[(i + 1) % 4] - pts1[i];
+ vec2[i] = pts2[(i + 1) % 4] - pts2[i];
+ }
+
+ // Line test - test all line combos for intersection
+ int num = 0; // number of intersections
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ // Solve for 2x2 Ax=b
+ T det = cross_2d(vec2[j], vec1[i]);
+
+ // This takes care of parallel lines
+ if (fabs(det) <= 1e-14) {
+ continue;
+ }
+
+ auto vec12 = pts2[j] - pts1[i];
+
+ T t1 = cross_2d(vec2[j], vec12) / det;
+ T t2 = cross_2d(vec1[i], vec12) / det;
+
+ if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
+ intersections[num++] = pts1[i] + vec1[i] * t1;
+ }
+ }
+ }
+
+ // Check for vertices of rect1 inside rect2
+ {
+ const auto &AB = vec2[0];
+ const auto &DA = vec2[3];
+ auto ABdotAB = dot_2d(AB, AB);
+ auto ADdotAD = dot_2d(DA, DA);
+ for (int i = 0; i < 4; i++) {
+ // assume ABCD is the rectangle, and P is the point to be judged
+ // P is inside ABCD iff. P's projection on AB lies within AB
+ // and P's projection on AD lies within AD
+
+ auto AP = pts1[i] - pts2[0];
+
+ auto APdotAB = dot_2d(AP, AB);
+ auto APdotAD = -dot_2d(AP, DA);
+
+ if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+ (APdotAD <= ADdotAD)) {
+ intersections[num++] = pts1[i];
+ }
+ }
+ }
+
+ // Reverse the check - check for vertices of rect2 inside rect1
+ {
+ const auto &AB = vec1[0];
+ const auto &DA = vec1[3];
+ auto ABdotAB = dot_2d(AB, AB);
+ auto ADdotAD = dot_2d(DA, DA);
+ for (int i = 0; i < 4; i++) {
+ auto AP = pts2[i] - pts1[0];
+
+ auto APdotAB = dot_2d(AP, AB);
+ auto APdotAD = -dot_2d(AP, DA);
+
+ if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+ (APdotAD <= ADdotAD)) {
+ intersections[num++] = pts2[i];
+ }
+ }
+ }
+
+ return num;
+}
+
+template
+HOST_DEVICE_INLINE int convex_hull_graham(const Point (&p)[24],
+ const int &num_in, Point (&q)[24],
+ bool shift_to_zero = false) {
+ assert(num_in >= 2);
+
+ // Step 1:
+ // Find point with minimum y
+ // if more than 1 points have the same minimum y,
+ // pick the one with the minimum x.
+ int t = 0;
+ for (int i = 1; i < num_in; i++) {
+ if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+ t = i;
+ }
+ }
+ auto &start = p[t]; // starting point
+
+ // Step 2:
+ // Subtract starting point from every points (for sorting in the next step)
+ for (int i = 0; i < num_in; i++) {
+ q[i] = p[i] - start;
+ }
+
+ // Swap the starting point to position 0
+ auto tmp = q[0];
+ q[0] = q[t];
+ q[t] = tmp;
+
+ // Step 3:
+ // Sort point 1 ~ num_in according to their relative cross-product values
+ // (essentially sorting according to angles)
+ // If the angles are the same, sort according to their distance to origin
+ T dist[24];
+ for (int i = 0; i < num_in; i++) {
+ dist[i] = dot_2d(q[i], q[i]);
+ }
+
+#ifdef __CUDACC__
+ // CUDA version
+ // In the future, we can potentially use thrust
+ // for sorting here to improve speed (though not guaranteed)
+ for (int i = 1; i < num_in - 1; i++) {
+ for (int j = i + 1; j < num_in; j++) {
+ T crossProduct = cross_2d(q[i], q[j]);
+ if ((crossProduct < -1e-6) ||
+ (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
+ auto q_tmp = q[i];
+ q[i] = q[j];
+ q[j] = q_tmp;
+ auto dist_tmp = dist[i];
+ dist[i] = dist[j];
+ dist[j] = dist_tmp;
+ }
+ }
+ }
+#else
+ // CPU version
+ std::sort(q + 1, q + num_in,
+ [](const Point &A, const Point &B) -> bool {
+ T temp = cross_2d(A, B);
+ if (fabs(temp) < 1e-6) {
+ return dot_2d(A, A) < dot_2d(B, B);
+ } else {
+ return temp > 0;
+ }
+ });
+#endif
+
+ // Step 4:
+ // Make sure there are at least 2 points (that don't overlap with each other)
+ // in the stack
+ int k; // index of the non-overlapped second point
+ for (k = 1; k < num_in; k++) {
+ if (dist[k] > 1e-8) {
+ break;
+ }
+ }
+ if (k == num_in) {
+ // We reach the end, which means the convex hull is just one point
+ q[0] = p[t];
+ return 1;
+ }
+ q[1] = q[k];
+ int m = 2; // 2 points in the stack
+ // Step 5:
+ // Finally we can start the scanning process.
+ // When a non-convex relationship between the 3 points is found
+ // (either concave shape or duplicated points),
+ // we pop the previous point from the stack
+ // until the 3-point relationship is convex again, or
+ // until the stack only contains two points
+ for (int i = k + 1; i < num_in; i++) {
+ while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
+ m--;
+ }
+ q[m++] = q[i];
+ }
+
+ // Step 6 (Optional):
+ // In general sense we need the original coordinates, so we
+ // need to shift the points back (reverting Step 2)
+ // But if we're only interested in getting the area/perimeter of the shape
+ // We can simply return.
+ if (!shift_to_zero) {
+ for (int i = 0; i < m; i++) {
+ q[i] += start;
+ }
+ }
+
+ return m;
+}
+
+template
+HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int &m) {
+ if (m <= 2) {
+ return 0;
+ }
+
+ T area = 0;
+ for (int i = 1; i < m - 1; i++) {
+ area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0]));
+ }
+
+ return area / 2.0;
+}
+
+template
+HOST_DEVICE_INLINE T rboxes_intersection(const RotatedBox &box1,
+ const RotatedBox &box2) {
+ // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+ // from rotated_rect_intersection_pts
+ Point intersectPts[24], orderedPts[24];
+
+ Point pts1[4];
+ Point pts2[4];
+ get_rotated_vertices(box1, pts1);
+ get_rotated_vertices(box2, pts2);
+
+ int num = get_intersection_points(pts1, pts2, intersectPts);
+
+ if (num <= 2) {
+ return 0.0;
+ }
+
+ // Convex Hull to order the intersection points in clockwise order and find
+ // the contour area.
+ int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true);
+ return polygon_area(orderedPts, num_convex);
+}
+
+} // namespace
+
+template
+HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw,
+ T const *const box2_raw) {
+ // shift center to the middle point to achieve higher precision in result
+ RotatedBox box1, box2;
+ auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
+ auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
+ box1.x_ctr = box1_raw[0] - center_shift_x;
+ box1.y_ctr = box1_raw[1] - center_shift_y;
+ box1.w = box1_raw[2];
+ box1.h = box1_raw[3];
+ box1.a = box1_raw[4];
+ box2.x_ctr = box2_raw[0] - center_shift_x;
+ box2.y_ctr = box2_raw[1] - center_shift_y;
+ box2.w = box2_raw[2];
+ box2.h = box2_raw[3];
+ box2.a = box2_raw[4];
+
+ const T area1 = box1.w * box1.h;
+ const T area2 = box2.w * box2.h;
+ if (area1 < 1e-14 || area2 < 1e-14) {
+ return 0.f;
+ }
+
+ const T intersection = rboxes_intersection(box1, box2);
+ const T iou = intersection / (area1 + area2 - intersection);
+ return iou;
+}
diff --git a/paddlers/models/ppdet/ext_op/setup.py b/paddlers/models/ppdet/ext_op/setup.py
new file mode 100644
index 0000000..5892f46
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/setup.py
@@ -0,0 +1,33 @@
+import os
+import glob
+import paddle
+from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
+
+
+def get_extensions():
+ root_dir = os.path.dirname(os.path.abspath(__file__))
+ ext_root_dir = os.path.join(root_dir, 'csrc')
+ sources = []
+ for ext_name in os.listdir(ext_root_dir):
+ ext_dir = os.path.join(ext_root_dir, ext_name)
+ source = glob.glob(os.path.join(ext_dir, '*.cc'))
+ kwargs = dict()
+ if paddle.device.is_compiled_with_cuda():
+ source += glob.glob(os.path.join(ext_dir, '*.cu'))
+
+ if not source:
+ continue
+
+ sources += source
+
+ if paddle.device.is_compiled_with_cuda():
+ extension = CUDAExtension(
+ sources, extra_compile_args={'cxx': ['-DPADDLE_WITH_CUDA']})
+ else:
+ extension = CppExtension(sources)
+
+ return extension
+
+
+if __name__ == "__main__":
+ setup(name='ext_op', ext_modules=get_extensions())
diff --git a/paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py b/paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py
new file mode 100644
index 0000000..bff95b3
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py
@@ -0,0 +1,149 @@
+import numpy as np
+import sys
+import time
+from shapely.geometry import Polygon
+import paddle
+import unittest
+
+from ext_op import matched_rbox_iou
+
+
+def rbox2poly_single(rrect, get_best_begin_point=False):
+ """
+ rrect:[x_ctr,y_ctr,w,h,angle]
+ to
+ poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+ """
+ x_ctr, y_ctr, width, height, angle = rrect[:5]
+ tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
+ # rect 2x4
+ rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
+ R = np.array([[np.cos(angle), -np.sin(angle)],
+ [np.sin(angle), np.cos(angle)]])
+ # poly
+ poly = R.dot(rect)
+ x0, x1, x2, x3 = poly[0, :4] + x_ctr
+ y0, y1, y2, y3 = poly[1, :4] + y_ctr
+ poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64)
+ return poly
+
+
+def intersection(g, p):
+ """
+ Intersection.
+ """
+
+ g = g[:8].reshape((4, 2))
+ p = p[:8].reshape((4, 2))
+
+ a = g
+ b = p
+
+ use_filter = True
+ if use_filter:
+ # step1:
+ inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0]))
+ inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0]))
+ inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1]))
+ inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1]))
+ if inter_x1 >= inter_x2 or inter_y1 >= inter_y2:
+ return 0.
+ x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0]))
+ x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0]))
+ y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1]))
+ y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1]))
+ if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2:
+ return 0.
+
+ g = Polygon(g)
+ p = Polygon(p)
+ if not g.is_valid or not p.is_valid:
+ return 0
+
+ inter = Polygon(g).intersection(Polygon(p)).area
+ union = g.area + p.area - inter
+ if union == 0:
+ return 0
+ else:
+ return inter / union
+
+
+def matched_rbox_overlaps(anchors, gt_bboxes, use_cv2=False):
+ """
+
+ Args:
+ anchors: [M, 5] x1,y1,x2,y2,angle
+ gt_bboxes: [M, 5] x1,y1,x2,y2,angle
+
+ Returns:
+ macthed_iou: [M]
+ """
+ assert anchors.shape[1] == 5
+ assert gt_bboxes.shape[1] == 5
+
+ gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes]
+ anchors_ploy = [rbox2poly_single(e) for e in anchors]
+
+ num = len(anchors_ploy)
+ iou = np.zeros((num, ), dtype=np.float64)
+
+ start_time = time.time()
+ for i in range(num):
+ try:
+ iou[i] = intersection(gt_bboxes_ploy[i], anchors_ploy[i])
+ except Exception as e:
+ print('cur gt_bboxes_ploy[i]', gt_bboxes_ploy[i], 'anchors_ploy[j]',
+ anchors_ploy[i], e)
+ return iou
+
+
+def gen_sample(n):
+ rbox = np.random.rand(n, 5)
+ rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001
+ rbox[:, 4] = rbox[:, 4] - 0.5
+ return rbox
+
+
+class MatchedRBoxIoUTest(unittest.TestCase):
+ def setUp(self):
+ self.initTestCase()
+ self.rbox1 = gen_sample(self.n)
+ self.rbox2 = gen_sample(self.n)
+
+ def initTestCase(self):
+ self.n = 1000
+
+ def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2):
+ self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg)
+
+ def get_places(self):
+ places = [paddle.CPUPlace()]
+ if paddle.device.is_compiled_with_cuda():
+ places.append(paddle.CUDAPlace(0))
+
+ return places
+
+ def check_output(self, place):
+ paddle.disable_static()
+ pd_rbox1 = paddle.to_tensor(self.rbox1, place=place)
+ pd_rbox2 = paddle.to_tensor(self.rbox2, place=place)
+ actual_t = matched_rbox_iou(pd_rbox1, pd_rbox2).numpy()
+ poly_rbox1 = self.rbox1
+ poly_rbox2 = self.rbox2
+ poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024
+ poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024
+ expect_t = matched_rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False)
+ self.assertAllClose(
+ actual_t,
+ expect_t,
+ msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format(
+ str(place), str(expect_t), str(actual_t)))
+
+ def test_output(self):
+ places = self.get_places()
+ for place in places:
+ self.check_output(place)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py b/paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py
new file mode 100644
index 0000000..8ef19ae
--- /dev/null
+++ b/paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py
@@ -0,0 +1,151 @@
+import numpy as np
+import sys
+import time
+from shapely.geometry import Polygon
+import paddle
+import unittest
+
+from ext_op import rbox_iou
+
+
+def rbox2poly_single(rrect, get_best_begin_point=False):
+ """
+ rrect:[x_ctr,y_ctr,w,h,angle]
+ to
+ poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+ """
+ x_ctr, y_ctr, width, height, angle = rrect[:5]
+ tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
+ # rect 2x4
+ rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
+ R = np.array([[np.cos(angle), -np.sin(angle)],
+ [np.sin(angle), np.cos(angle)]])
+ # poly
+ poly = R.dot(rect)
+ x0, x1, x2, x3 = poly[0, :4] + x_ctr
+ y0, y1, y2, y3 = poly[1, :4] + y_ctr
+ poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64)
+ return poly
+
+
+def intersection(g, p):
+ """
+ Intersection.
+ """
+
+ g = g[:8].reshape((4, 2))
+ p = p[:8].reshape((4, 2))
+
+ a = g
+ b = p
+
+ use_filter = True
+ if use_filter:
+ # step1:
+ inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0]))
+ inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0]))
+ inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1]))
+ inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1]))
+ if inter_x1 >= inter_x2 or inter_y1 >= inter_y2:
+ return 0.
+ x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0]))
+ x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0]))
+ y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1]))
+ y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1]))
+ if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2:
+ return 0.
+
+ g = Polygon(g)
+ p = Polygon(p)
+ if not g.is_valid or not p.is_valid:
+ return 0
+
+ inter = Polygon(g).intersection(Polygon(p)).area
+ union = g.area + p.area - inter
+ if union == 0:
+ return 0
+ else:
+ return inter / union
+
+
+def rbox_overlaps(anchors, gt_bboxes, use_cv2=False):
+ """
+
+ Args:
+ anchors: [NA, 5] x1,y1,x2,y2,angle
+ gt_bboxes: [M, 5] x1,y1,x2,y2,angle
+
+ Returns:
+ iou: [NA, M]
+ """
+ assert anchors.shape[1] == 5
+ assert gt_bboxes.shape[1] == 5
+
+ gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes]
+ anchors_ploy = [rbox2poly_single(e) for e in anchors]
+
+ num_gt, num_anchors = len(gt_bboxes_ploy), len(anchors_ploy)
+ iou = np.zeros((num_anchors, num_gt), dtype=np.float64)
+
+ start_time = time.time()
+ for i in range(num_anchors):
+ for j in range(num_gt):
+ try:
+ iou[i, j] = intersection(anchors_ploy[i], gt_bboxes_ploy[j])
+ except Exception as e:
+ print('cur anchors_ploy[i]', anchors_ploy[i],
+ 'gt_bboxes_ploy[j]', gt_bboxes_ploy[j], e)
+ return iou
+
+
+def gen_sample(n):
+ rbox = np.random.rand(n, 5)
+ rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001
+ rbox[:, 4] = rbox[:, 4] - 0.5
+ return rbox
+
+
+class RBoxIoUTest(unittest.TestCase):
+ def setUp(self):
+ self.initTestCase()
+ self.rbox1 = gen_sample(self.n)
+ self.rbox2 = gen_sample(self.m)
+
+ def initTestCase(self):
+ self.n = 13000
+ self.m = 7
+
+ def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2):
+ self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg)
+
+ def get_places(self):
+ places = [paddle.CPUPlace()]
+ if paddle.device.is_compiled_with_cuda():
+ places.append(paddle.CUDAPlace(0))
+
+ return places
+
+ def check_output(self, place):
+ paddle.disable_static()
+ pd_rbox1 = paddle.to_tensor(self.rbox1, place=place)
+ pd_rbox2 = paddle.to_tensor(self.rbox2, place=place)
+ actual_t = rbox_iou(pd_rbox1, pd_rbox2).numpy()
+ poly_rbox1 = self.rbox1
+ poly_rbox2 = self.rbox2
+ poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024
+ poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024
+ expect_t = rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False)
+ self.assertAllClose(
+ actual_t,
+ expect_t,
+ msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format(
+ str(place), str(expect_t), str(actual_t)))
+
+ def test_output(self):
+ places = self.get_places()
+ for place in places:
+ self.check_output(place)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/paddlers/models/ppdet/metrics/__init__.py b/paddlers/models/ppdet/metrics/__init__.py
index e0659bd..dc1a53b 100644
--- a/paddlers/models/ppdet/metrics/__init__.py
+++ b/paddlers/models/ppdet/metrics/__init__.py
@@ -26,4 +26,4 @@ __all__ = metrics.__all__ + mot_metrics.__all__
from . import mcmot_metrics
from .mcmot_metrics import *
-__all__ = metrics.__all__ + mcmot_metrics.__all__
+__all__ = metrics.__all__ + mcmot_metrics.__all__
\ No newline at end of file
diff --git a/paddlers/models/ppdet/metrics/coco_utils.py b/paddlers/models/ppdet/metrics/coco_utils.py
index c920fd4..b6a1cff 100644
--- a/paddlers/models/ppdet/metrics/coco_utils.py
+++ b/paddlers/models/ppdet/metrics/coco_utils.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/metrics/json_results.py b/paddlers/models/ppdet/metrics/json_results.py
old mode 100644
new mode 100755
index aab0fbe..b20c30e
--- a/paddlers/models/ppdet/metrics/json_results.py
+++ b/paddlers/models/ppdet/metrics/json_results.py
@@ -65,6 +65,14 @@ def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
return det_res
+def strip_mask(mask):
+ row = mask[0, 0, :]
+ col = mask[0, :, 0]
+ im_h = len(col) - np.count_nonzero(col == -1)
+ im_w = len(row) - np.count_nonzero(row == -1)
+ return mask[:, :im_h, :im_w]
+
+
def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
import pycocotools.mask as mask_util
seg_res = []
@@ -72,8 +80,10 @@ def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
for i in range(len(mask_nums)):
cur_image_id = int(image_id[i][0])
det_nums = mask_nums[i]
+ mask_i = masks[k:k + det_nums]
+ mask_i = strip_mask(mask_i)
for j in range(det_nums):
- mask = masks[k].astype(np.uint8)
+ mask = mask_i[j].astype(np.uint8)
score = float(bboxes[k][1])
label = int(bboxes[k][0])
k = k + 1
diff --git a/paddlers/models/ppdet/metrics/keypoint_metrics.py b/paddlers/models/ppdet/metrics/keypoint_metrics.py
index 54eeda2..6e10a0c 100644
--- a/paddlers/models/ppdet/metrics/keypoint_metrics.py
+++ b/paddlers/models/ppdet/metrics/keypoint_metrics.py
@@ -1,21 +1,22 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
from collections import defaultdict, OrderedDict
import numpy as np
+import paddle
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from ..modeling.keypoint_utils import oks_nms
@@ -70,15 +71,23 @@ class KeyPointTopDownCOCOEval(object):
self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
3] = kpts[:, :, 0:3]
self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
- 'center'].numpy()[:, 0:2]
+ 'center'].numpy()[:, 0:2] if isinstance(
+ inputs['center'], paddle.Tensor) else inputs['center'][:, 0:2]
self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
- 'scale'].numpy()[:, 0:2]
+ 'scale'].numpy()[:, 0:2] if isinstance(
+ inputs['scale'], paddle.Tensor) else inputs['scale'][:, 0:2]
self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
- inputs['scale'].numpy() * 200, 1)
- self.results['all_boxes'][self.idx:self.idx + num_images,
- 5] = np.squeeze(inputs['score'].numpy())
- self.results['image_path'].extend(inputs['im_id'].numpy())
-
+ inputs['scale'].numpy() * 200,
+ 1) if isinstance(inputs['scale'], paddle.Tensor) else np.prod(
+ inputs['scale'] * 200, 1)
+ self.results['all_boxes'][
+ self.idx:self.idx + num_images,
+ 5] = np.squeeze(inputs['score'].numpy()) if isinstance(
+ inputs['score'], paddle.Tensor) else np.squeeze(inputs['score'])
+ if isinstance(inputs['im_id'], paddle.Tensor):
+ self.results['image_path'].extend(inputs['im_id'].numpy())
+ else:
+ self.results['image_path'].extend(inputs['im_id'])
self.idx += num_images
def _write_coco_keypoint_results(self, keypoints):
diff --git a/paddlers/models/ppdet/metrics/map_utils.py b/paddlers/models/ppdet/metrics/map_utils.py
index a7d786e..32c7020 100644
--- a/paddlers/models/ppdet/metrics/map_utils.py
+++ b/paddlers/models/ppdet/metrics/map_utils.py
@@ -22,7 +22,7 @@ import sys
import numpy as np
import itertools
import paddle
-from paddlers.models.ppdet.modeling.bbox_utils import poly2rbox, rbox2poly_np
+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_np
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@@ -91,15 +91,13 @@ def jaccard_overlap(pred, gt, is_bbox_normalized=False):
return overlap
-def calc_rbox_iou(pred, gt_rbox):
+def calc_rbox_iou(pred, gt_poly):
"""
calc iou between rotated bbox
"""
# calc iou of bounding box for speedup
- pred = np.array(pred, np.float32).reshape(-1, 8)
- pred = pred.reshape(-1, 2)
- gt_poly = rbox2poly_np(np.array(gt_rbox).reshape(-1, 5))[0]
- gt_poly = gt_poly.reshape(-1, 2)
+ pred = np.array(pred, np.float32).reshape(-1, 2)
+ gt_poly = np.array(gt_poly, np.float32).reshape(-1, 2)
pred_rect = [
np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
np.max(pred[:, 1])
@@ -114,20 +112,15 @@ def calc_rbox_iou(pred, gt_rbox):
return iou
# calc rbox iou
- pred = pred.reshape(-1, 8)
-
- pred = np.array(pred, np.float32).reshape(-1, 8)
- pred_rbox = poly2rbox(pred)
- pred_rbox = pred_rbox.reshape(-1, 5)
- pred_rbox = pred_rbox.reshape(-1, 5)
+ pred_rbox = poly2rbox_np(pred.reshape(-1, 8)).reshape(-1, 5)
+ gt_rbox = poly2rbox_np(gt_poly.reshape(-1, 8)).reshape(-1, 5)
try:
- from rbox_iou_ops import rbox_iou
+ from ext_op import rbox_iou
except Exception as e:
- print("import custom_ops error, try install rbox_iou_ops " \
+ print("import custom_ops error, try install ext_op " \
"following ppdet/ext_op/README.md", e)
sys.stdout.flush()
sys.exit(-1)
- gt_rbox = np.array(gt_rbox, np.float32).reshape(-1, 5)
pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
@@ -138,8 +131,7 @@ def calc_rbox_iou(pred, gt_rbox):
def prune_zero_padding(gt_box, gt_label, difficult=None):
valid_cnt = 0
for i in range(len(gt_box)):
- if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
- gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
+ if (gt_box[i] == 0).all():
break
valid_cnt += 1
return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
@@ -154,8 +146,8 @@ class DetectionMAP(object):
Args:
class_num (int): The class number.
overlap_thresh (float): The threshold of overlap
- ratio between prediction bounding box and
- ground truth bounding box for deciding
+ ratio between prediction bounding box and
+ ground truth bounding box for deciding
true/false positive. Default 0.5.
map_type (str): Calculation method of mean average
precision, currently support '11point' and
@@ -212,7 +204,7 @@ class DetectionMAP(object):
max_overlap = -1.0
for i, gl in enumerate(gt_label):
if int(gl) == int(l):
- if len(gt_box[i]) == 5:
+ if len(gt_box[i]) == 8:
overlap = calc_rbox_iou(pred, gt_box[i])
else:
overlap = jaccard_overlap(pred, gt_box[i],
@@ -363,7 +355,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
"""
Computes the average precision, given the recall and precision curves.
Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
-
+
Args:
tp (list): True positives.
conf (list): Objectness value from 0-1.
@@ -417,7 +409,7 @@ def compute_ap(recall, precision):
"""
Computes the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
-
+
Args:
recall (list): The recall curve.
precision (list): The precision curve.
diff --git a/paddlers/models/ppdet/metrics/mcmot_metrics.py b/paddlers/models/ppdet/metrics/mcmot_metrics.py
index 75bbca5..50c6aa3 100644
--- a/paddlers/models/ppdet/metrics/mcmot_metrics.py
+++ b/paddlers/models/ppdet/metrics/mcmot_metrics.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -21,18 +21,21 @@ import copy
import sys
import math
from collections import defaultdict
-from motmetrics.math_util import quiet_divide
import numpy as np
import pandas as pd
-import paddle
-import paddle.nn.functional as F
from .metrics import Metric
-import motmetrics as mm
-import openpyxl
-metrics = mm.metrics.motchallenge_metrics
-mh = mm.metrics.create()
+try:
+ import motmetrics as mm
+ from motmetrics.math_util import quiet_divide
+ metrics = mm.metrics.motchallenge_metrics
+ mh = mm.metrics.create()
+except:
+ print(
+ 'Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+ )
+ pass
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@@ -78,7 +81,7 @@ NAME_MAP = {
def parse_accs_metrics(seq_acc, index_name, verbose=False):
"""
- Parse the evaluation indicators of multiple MOTAccumulator
+ Parse the evaluation indicators of multiple MOTAccumulator
"""
mh = mm.metrics.create()
summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
@@ -302,24 +305,30 @@ class MCMOTEvaluator(object):
self.num_classes = num_classes
self.load_annotations()
+ try:
+ import motmetrics as mm
+ mm.lap.default_solver = 'lap'
+ except Exception as e:
+ raise RuntimeError(
+ 'Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+ )
self.reset_accumulator()
self.class_accs = []
def load_annotations(self):
assert self.data_type == 'mcmot'
- self.gt_filename = os.path.join(self.data_root, '../', '../',
- 'sequences',
+ self.gt_filename = os.path.join(self.data_root, '../', 'sequences',
'{}.txt'.format(self.seq_name))
+ if not os.path.exists(self.gt_filename):
+ logger.warning(
+ "gt_filename '{}' of MCMOTEvaluator is not exist, so the MOTA will be -INF."
+ )
def reset_accumulator(self):
- import motmetrics as mm
- mm.lap.default_solver = 'lap'
self.acc = mm.MOTAccumulator(auto_id=True)
def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False, union=False):
- import motmetrics as mm
- mm.lap.default_solver = 'lap'
if union:
trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
@@ -393,9 +402,6 @@ class MCMOTEvaluator(object):
names,
metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
'precision', 'recall')):
- import motmetrics as mm
- mm.lap.default_solver = 'lap'
-
names = copy.deepcopy(names)
if metrics is None:
metrics = mm.metrics.motchallenge_metrics
diff --git a/paddlers/models/ppdet/metrics/metrics.py b/paddlers/models/ppdet/metrics/metrics.py
index a23e5cc..577bf6b 100644
--- a/paddlers/models/ppdet/metrics/metrics.py
+++ b/paddlers/models/ppdet/metrics/metrics.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -22,11 +22,14 @@ import json
import paddle
import numpy as np
import typing
+from collections import defaultdict
+from pathlib import Path
from .map_utils import prune_zero_padding, DetectionMAP
from .coco_utils import get_infer_results, cocoapi_eval
from .widerface_utils import face_eval_run
from paddlers.models.ppdet.data.source.category import get_categories
+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_np
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@@ -69,8 +72,6 @@ class Metric(paddle.metric.Metric):
class COCOMetric(Metric):
def __init__(self, anno_file, **kwargs):
- assert os.path.isfile(anno_file), \
- "anno_file {} not a file".format(anno_file)
self.anno_file = anno_file
self.clsid2catid = kwargs.get('clsid2catid', None)
if self.clsid2catid is None:
@@ -81,6 +82,14 @@ class COCOMetric(Metric):
self.bias = kwargs.get('bias', 0)
self.save_prediction_only = kwargs.get('save_prediction_only', False)
self.iou_type = kwargs.get('IouType', 'bbox')
+
+ if not self.save_prediction_only:
+ assert os.path.isfile(anno_file), \
+ "anno_file {} not a file".format(anno_file)
+
+ if self.output_eval is not None:
+ Path(self.output_eval).mkdir(exist_ok=True)
+
self.reset()
def reset(self):
@@ -218,7 +227,9 @@ class VOCMetric(Metric):
map_type='11point',
is_bbox_normalized=False,
evaluate_difficult=False,
- classwise=False):
+ classwise=False,
+ output_eval=None,
+ save_prediction_only=False):
assert os.path.isfile(label_list), \
"label_list {} not a file".format(label_list)
self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
@@ -226,6 +237,8 @@ class VOCMetric(Metric):
self.overlap_thresh = overlap_thresh
self.map_type = map_type
self.evaluate_difficult = evaluate_difficult
+ self.output_eval = output_eval
+ self.save_prediction_only = save_prediction_only
self.detection_map = DetectionMAP(
class_num=class_num,
overlap_thresh=overlap_thresh,
@@ -238,34 +251,52 @@ class VOCMetric(Metric):
self.reset()
def reset(self):
+ self.results = {'bbox': [], 'score': [], 'label': []}
self.detection_map.reset()
def update(self, inputs, outputs):
- bbox_np = outputs['bbox'].numpy()
+ bbox_np = outputs['bbox'].numpy() if isinstance(
+ outputs['bbox'], paddle.Tensor) else outputs['bbox']
bboxes = bbox_np[:, 2:]
scores = bbox_np[:, 1]
labels = bbox_np[:, 0]
- bbox_lengths = outputs['bbox_num'].numpy()
+ bbox_lengths = outputs['bbox_num'].numpy() if isinstance(
+ outputs['bbox_num'], paddle.Tensor) else outputs['bbox_num']
+
+ self.results['bbox'].append(bboxes.tolist())
+ self.results['score'].append(scores.tolist())
+ self.results['label'].append(labels.tolist())
if bboxes.shape == (1, 1) or bboxes is None:
return
+ if self.save_prediction_only:
+ return
+
gt_boxes = inputs['gt_bbox']
gt_labels = inputs['gt_class']
difficults = inputs['difficult'] if not self.evaluate_difficult \
else None
- scale_factor = inputs['scale_factor'].numpy(
- ) if 'scale_factor' in inputs else np.ones(
- (gt_boxes.shape[0], 2)).astype('float32')
+ if 'scale_factor' in inputs:
+ scale_factor = inputs['scale_factor'].numpy() if isinstance(
+ inputs['scale_factor'],
+ paddle.Tensor) else inputs['scale_factor']
+ else:
+ scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
bbox_idx = 0
for i in range(len(gt_boxes)):
- gt_box = gt_boxes[i].numpy()
+ gt_box = gt_boxes[i].numpy() if isinstance(
+ gt_boxes[i], paddle.Tensor) else gt_boxes[i]
h, w = scale_factor[i]
gt_box = gt_box / np.array([w, h, w, h])
- gt_label = gt_labels[i].numpy()
- difficult = None if difficults is None \
- else difficults[i].numpy()
+ gt_label = gt_labels[i].numpy() if isinstance(
+ gt_labels[i], paddle.Tensor) else gt_labels[i]
+ if difficults is not None:
+ difficult = difficults[i].numpy() if isinstance(
+ difficults[i], paddle.Tensor) else difficults[i]
+ else:
+ difficult = None
bbox_num = bbox_lengths[i]
bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
score = scores[bbox_idx:bbox_idx + bbox_num]
@@ -277,6 +308,15 @@ class VOCMetric(Metric):
bbox_idx += bbox_num
def accumulate(self):
+ output = "bbox.json"
+ if self.output_eval:
+ output = os.path.join(self.output_eval, output)
+ with open(output, 'w') as f:
+ json.dump(self.results, f)
+ logger.info('The bbox result is saved to bbox.json.')
+ if self.save_prediction_only:
+ return
+
logger.info("Accumulating evaluatation results...")
self.detection_map.accumulate()
@@ -309,25 +349,16 @@ class WiderFaceMetric(Metric):
class RBoxMetric(Metric):
def __init__(self, anno_file, **kwargs):
- assert os.path.isfile(anno_file), \
- "anno_file {} not a file".format(anno_file)
- assert os.path.exists(anno_file), "anno_file {} not exists".format(
- anno_file)
self.anno_file = anno_file
- self.gt_anno = json.load(open(self.anno_file))
- cats = self.gt_anno['categories']
- self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
- self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
- self.catid2name = {cat['id']: cat['name'] for cat in cats}
+ self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
+ self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
self.classwise = kwargs.get('classwise', False)
self.output_eval = kwargs.get('output_eval', None)
- # TODO: bias should be unified
- self.bias = kwargs.get('bias', 0)
self.save_prediction_only = kwargs.get('save_prediction_only', False)
- self.iou_type = kwargs.get('IouType', 'bbox')
self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
self.map_type = kwargs.get('map_type', '11point')
self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
+ self.imid2path = kwargs.get('imid2path', None)
class_num = len(self.catid2name)
self.detection_map = DetectionMAP(
class_num=class_num,
@@ -341,7 +372,7 @@ class RBoxMetric(Metric):
self.reset()
def reset(self):
- self.result_bbox = []
+ self.results = []
self.detection_map.reset()
def update(self, inputs, outputs):
@@ -351,43 +382,83 @@ class RBoxMetric(Metric):
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
im_id = inputs['im_id']
- outs['im_id'] = im_id.numpy() if isinstance(im_id,
- paddle.Tensor) else im_id
+ im_id = im_id.numpy() if isinstance(im_id, paddle.Tensor) else im_id
+ outs['im_id'] = im_id
- infer_results = get_infer_results(
- outs, self.clsid2catid, bias=self.bias)
- self.result_bbox += infer_results[
- 'bbox'] if 'bbox' in infer_results else []
- bbox = [b['bbox'] for b in self.result_bbox]
- score = [b['score'] for b in self.result_bbox]
- label = [b['category_id'] for b in self.result_bbox]
- label = [self.catid2clsid[e] for e in label]
- gt_box = [
- e['bbox'] for e in self.gt_anno['annotations']
- if e['image_id'] == outs['im_id']
- ]
- gt_label = [
- e['category_id'] for e in self.gt_anno['annotations']
- if e['image_id'] == outs['im_id']
- ]
- gt_label = [self.catid2clsid[e] for e in gt_label]
- self.detection_map.update(bbox, score, label, gt_box, gt_label)
+ infer_results = get_infer_results(outs, self.clsid2catid)
+ infer_results = infer_results['bbox'] if 'bbox' in infer_results else []
+ self.results += infer_results
+ if self.save_prediction_only:
+ return
- def accumulate(self):
- if len(self.result_bbox) > 0:
- output = "bbox.json"
- if self.output_eval:
- output = os.path.join(self.output_eval, output)
+ gt_boxes = inputs['gt_poly']
+ gt_labels = inputs['gt_class']
+
+ if 'scale_factor' in inputs:
+ scale_factor = inputs['scale_factor'].numpy() if isinstance(
+ inputs['scale_factor'],
+ paddle.Tensor) else inputs['scale_factor']
+ else:
+ scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
+
+ for i in range(len(gt_boxes)):
+ gt_box = gt_boxes[i].numpy() if isinstance(
+ gt_boxes[i], paddle.Tensor) else gt_boxes[i]
+ h, w = scale_factor[i]
+ gt_box = gt_box / np.array([w, h, w, h, w, h, w, h])
+ gt_label = gt_labels[i].numpy() if isinstance(
+ gt_labels[i], paddle.Tensor) else gt_labels[i]
+ gt_box, gt_label, _ = prune_zero_padding(gt_box, gt_label)
+ bbox = [
+ res['bbox'] for res in infer_results
+ if int(res['image_id']) == int(im_id[i])
+ ]
+ score = [
+ res['score'] for res in infer_results
+ if int(res['image_id']) == int(im_id[i])
+ ]
+ label = [
+ self.catid2clsid[int(res['category_id'])]
+ for res in infer_results
+ if int(res['image_id']) == int(im_id[i])
+ ]
+ self.detection_map.update(bbox, score, label, gt_box, gt_label)
+
+ def save_results(self, results, output_dir, imid2path):
+ if imid2path:
+ data_dicts = defaultdict(list)
+ for result in results:
+ image_id = result['image_id']
+ data_dicts[image_id].append(result)
+
+ for image_id, image_path in imid2path.items():
+ basename = os.path.splitext(os.path.split(image_path)[-1])[0]
+ output = os.path.join(output_dir, "{}.txt".format(basename))
+ dets = data_dicts.get(image_id, [])
+ with open(output, 'w') as f:
+ for det in dets:
+ catid, bbox, score = det['category_id'], det[
+ 'bbox'], det['score']
+ bbox_pred = '{} {} '.format(self.catid2name[catid],
+ score) + ' '.join(
+ [str(e) for e in bbox])
+ f.write(bbox_pred + '\n')
+
+ logger.info('The bbox result is saved to {}.'.format(output_dir))
+ else:
+ output = os.path.join(output_dir, "bbox.json")
with open(output, 'w') as f:
- json.dump(self.result_bbox, f)
- logger.info('The bbox result is saved to bbox.json.')
+ json.dump(results, f)
- if self.save_prediction_only:
- logger.info('The bbox result is saved to {} and do not '
- 'evaluate the mAP.'.format(output))
- else:
- logger.info("Accumulating evaluatation results...")
- self.detection_map.accumulate()
+ logger.info('The bbox result is saved to {}.'.format(output))
+
+ def accumulate(self):
+ if self.output_eval:
+ self.save_results(self.results, self.output_eval, self.imid2path)
+
+ if not self.save_prediction_only:
+ logger.info("Accumulating evaluatation results...")
+ self.detection_map.accumulate()
def log(self):
map_stat = 100. * self.detection_map.get_map()
diff --git a/paddlers/models/ppdet/metrics/mot_metrics.py b/paddlers/models/ppdet/metrics/mot_metrics.py
index 1935840..e369edf 100644
--- a/paddlers/models/ppdet/metrics/mot_metrics.py
+++ b/paddlers/models/ppdet/metrics/mot_metrics.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -22,13 +22,21 @@ import sys
import math
from collections import defaultdict
import numpy as np
-import paddle
-import paddle.nn.functional as F
+
from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
from .map_utils import ap_per_class
from .metrics import Metric
from .munkres import Munkres
+try:
+ import motmetrics as mm
+ mm.lap.default_solver = 'lap'
+except:
+ print(
+ 'Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+ )
+ pass
+
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@@ -36,8 +44,13 @@ __all__ = ['MOTEvaluator', 'MOTMetric', 'JDEDetMetric', 'KITTIMOTMetric']
def read_mot_results(filename, is_gt=False, is_ignore=False):
- valid_labels = {1}
- ignore_labels = {2, 7, 8, 12} # only in motchallenge datasets like 'MOT16'
+ valid_label = [1]
+ ignore_labels = [2, 7, 8, 12] # only in motchallenge datasets like 'MOT16'
+ if is_gt:
+ logger.info(
+ "In MOT16/17 dataset the valid_label of ground truth is '{}', "
+ "in other dataset it should be '0' for single classs MOT.".format(
+ valid_label[0]))
results_dict = dict()
if os.path.isfile(filename):
with open(filename, 'r') as f:
@@ -50,12 +63,10 @@ def read_mot_results(filename, is_gt=False, is_ignore=False):
continue
results_dict.setdefault(fid, list())
- box_size = float(linelist[4]) * float(linelist[5])
-
if is_gt:
label = int(float(linelist[7]))
mark = int(float(linelist[6]))
- if mark == 0 or label not in valid_labels:
+ if mark == 0 or label not in valid_label:
continue
score = 1
elif is_ignore:
@@ -112,24 +123,31 @@ class MOTEvaluator(object):
self.data_type = data_type
self.load_annotations()
+ try:
+ import motmetrics as mm
+ mm.lap.default_solver = 'lap'
+ except Exception as e:
+ raise RuntimeError(
+ 'Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+ )
self.reset_accumulator()
def load_annotations(self):
assert self.data_type == 'mot'
gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
'gt.txt')
+ if not os.path.exists(gt_filename):
+ logger.warning(
+ "gt_filename '{}' of MOTEvaluator is not exist, so the MOTA will be -INF."
+ )
self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
self.gt_ignore_frame_dict = read_mot_results(
gt_filename, is_ignore=True)
def reset_accumulator(self):
- import motmetrics as mm
- mm.lap.default_solver = 'lap'
self.acc = mm.MOTAccumulator(auto_id=True)
def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
- import motmetrics as mm
- mm.lap.default_solver = 'lap'
# results
trk_tlwhs = np.copy(trk_tlwhs)
trk_ids = np.copy(trk_ids)
@@ -187,8 +205,6 @@ class MOTEvaluator(object):
names,
metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
'precision', 'recall')):
- import motmetrics as mm
- mm.lap.default_solver = 'lap'
names = copy.deepcopy(names)
if metrics is None:
metrics = mm.metrics.motchallenge_metrics
@@ -225,8 +241,6 @@ class MOTMetric(Metric):
self.result_root = result_root
def accumulate(self):
- import motmetrics as mm
- import openpyxl
metrics = mm.metrics.motchallenge_metrics
mh = mm.metrics.create()
summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
@@ -422,7 +436,7 @@ class KITTIEvaluation(object):
self.ifn = 0 # number of ignored false negatives
self.ifns = [] # number of ignored false negatives PER SEQUENCE
self.fp = 0 # number of false positives
- # a bit tricky, the number of ignored false negatives and ignored true positives
+ # a bit tricky, the number of ignored false negatives and ignored true positives
# is subtracted, but if both tracker detection and ground truth detection
# are ignored this number is added again to avoid double counting
self.fps = [] # above PER SEQUENCE
@@ -551,7 +565,7 @@ class KITTIEvaluation(object):
"track ids are not unique for sequence %d: frame %d"
% (seq, t_data.frame))
logger.info(
- "track id %d occured at least twice for this frame"
+ "track id %d occurred at least twice for this frame"
% t_data.track_id)
logger.info("Exiting...")
#continue # this allows to evaluate non-unique result files
diff --git a/paddlers/models/ppdet/metrics/munkres.py b/paddlers/models/ppdet/metrics/munkres.py
index 58c95d6..307028b 100644
--- a/paddlers/models/ppdet/metrics/munkres.py
+++ b/paddlers/models/ppdet/metrics/munkres.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py
diff --git a/paddlers/models/ppdet/model_zoo/.gitignore b/paddlers/models/ppdet/model_zoo/.gitignore
new file mode 100644
index 0000000..f296851
--- /dev/null
+++ b/paddlers/models/ppdet/model_zoo/.gitignore
@@ -0,0 +1 @@
+MODEL_ZOO
diff --git a/paddlers/models/ppdet/model_zoo/__init__.py b/paddlers/models/ppdet/model_zoo/__init__.py
index caffdb5..8c3b37d 100644
--- a/paddlers/models/ppdet/model_zoo/__init__.py
+++ b/paddlers/models/ppdet/model_zoo/__init__.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from . import model_zoo
diff --git a/paddlers/models/ppdet/model_zoo/model_zoo.py b/paddlers/models/ppdet/model_zoo/model_zoo.py
index 2d0dbef..bbc9e9f 100644
--- a/paddlers/models/ppdet/model_zoo/model_zoo.py
+++ b/paddlers/models/ppdet/model_zoo/model_zoo.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import os.path as osp
diff --git a/paddlers/models/ppdet/model_zoo/tests/__init__.py b/paddlers/models/ppdet/model_zoo/tests/__init__.py
new file mode 100644
index 0000000..5135585
--- /dev/null
+++ b/paddlers/models/ppdet/model_zoo/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/paddlers/models/ppdet/model_zoo/tests/test_get_model.py b/paddlers/models/ppdet/model_zoo/tests/test_get_model.py
new file mode 100644
index 0000000..ee47df5
--- /dev/null
+++ b/paddlers/models/ppdet/model_zoo/tests/test_get_model.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import paddle
+import paddlers.models.ppdet as ppdet
+import unittest
+
+# NOTE: weights downloading costs time, we choose
+# a small model for unittesting
+MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco'
+
+
+class TestGetConfigFile(unittest.TestCase):
+ def test_main(self):
+ try:
+ cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME)
+ assert os.path.isfile(cfg_file)
+ except:
+ self.assertTrue(False)
+
+
+class TestGetModel(unittest.TestCase):
+ def test_main(self):
+ try:
+ model = ppdet.model_zoo.get_model(MODEL_NAME)
+ assert isinstance(model, paddle.nn.Layer)
+ except:
+ self.assertTrue(False)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/paddlers/models/ppdet/model_zoo/tests/test_list_model.py b/paddlers/models/ppdet/model_zoo/tests/test_list_model.py
new file mode 100644
index 0000000..3dca71f
--- /dev/null
+++ b/paddlers/models/ppdet/model_zoo/tests/test_list_model.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import paddlers.models.ppdet as ppdet
+
+
+class TestListModel(unittest.TestCase):
+ def setUp(self):
+ self._filter = []
+
+ def test_main(self):
+ try:
+ ppdet.model_zoo.list_model(self._filter)
+ self.assertTrue(True)
+ except:
+ self.assertTrue(False)
+
+
+class TestListModelYOLO(TestListModel):
+ def setUp(self):
+ self._filter = ['yolo']
+
+
+class TestListModelRCNN(TestListModel):
+ def setUp(self):
+ self._filter = ['rcnn']
+
+
+class TestListModelSSD(TestListModel):
+ def setUp(self):
+ self._filter = ['ssd']
+
+
+class TestListModelMultiFilter(TestListModel):
+ def setUp(self):
+ self._filter = ['yolo', 'darknet']
+
+
+class TestListModelError(unittest.TestCase):
+ def setUp(self):
+ self._filter = ['xxx']
+
+ def test_main(self):
+ try:
+ ppdet.model_zoo.list_model(self._filter)
+ self.assertTrue(False)
+ except ValueError:
+ self.assertTrue(True)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/paddlers/models/ppdet/modeling/__init__.py b/paddlers/models/ppdet/modeling/__init__.py
index 815d089..823602a 100644
--- a/paddlers/models/ppdet/modeling/__init__.py
+++ b/paddlers/models/ppdet/modeling/__init__.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
@@ -29,6 +29,7 @@ from . import reid
from . import mot
from . import transformers
from . import assigners
+from . import rbox_utils
from .ops import *
from .backbones import *
@@ -43,3 +44,4 @@ from .reid import *
from .mot import *
from .transformers import *
from .assigners import *
+from .rbox_utils import *
diff --git a/paddlers/models/ppdet/modeling/architectures/__init__.py b/paddlers/models/ppdet/modeling/architectures/__init__.py
index 8d34367..c4528e6 100644
--- a/paddlers/models/ppdet/modeling/architectures/__init__.py
+++ b/paddlers/models/ppdet/modeling/architectures/__init__.py
@@ -1,10 +1,17 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from . import meta_arch
from . import faster_rcnn
from . import mask_rcnn
@@ -26,6 +33,9 @@ from . import picodet
from . import detr
from . import sparse_rcnn
from . import tood
+from . import retinanet
+from . import bytetrack
+from . import yolox
from .meta_arch import *
from .faster_rcnn import *
@@ -49,3 +59,6 @@ from .picodet import *
from .detr import *
from .sparse_rcnn import *
from .tood import *
+from .retinanet import *
+from .bytetrack import *
+from .yolox import *
diff --git a/paddlers/models/ppdet/modeling/architectures/bytetrack.py b/paddlers/models/ppdet/modeling/architectures/bytetrack.py
new file mode 100644
index 0000000..435f953
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/architectures/bytetrack.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['ByteTrack']
+
+
+@register
+class ByteTrack(BaseArch):
+ """
+ ByteTrack network, see https://arxiv.org/abs/2110.06864
+
+ Args:
+ detector (object): detector model instance
+ reid (object): reid model instance, default None
+ tracker (object): tracker instance
+ """
+ __category__ = 'architecture'
+
+ def __init__(self, detector='YOLOX', reid=None, tracker='JDETracker'):
+ super(ByteTrack, self).__init__()
+ self.detector = detector
+ self.reid = reid
+ self.tracker = tracker
+
+ @classmethod
+ def from_config(cls, cfg, *args, **kwargs):
+ detector = create(cfg['detector'])
+
+ if cfg['reid'] != 'None':
+ reid = create(cfg['reid'])
+ else:
+ reid = None
+
+ tracker = create(cfg['tracker'])
+
+ return {
+ "detector": detector,
+ "reid": reid,
+ "tracker": tracker,
+ }
+
+ def _forward(self):
+ det_outs = self.detector(self.inputs)
+
+ if self.training:
+ return det_outs
+ else:
+ if self.reid is not None:
+ assert 'crops' in self.inputs
+ crops = self.inputs['crops']
+ pred_embs = self.reid(crops)
+ else:
+ pred_embs = None
+ det_outs['embeddings'] = pred_embs
+ return det_outs
+
+ def get_loss(self):
+ return self._forward()
+
+ def get_pred(self):
+ return self._forward()
diff --git a/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py b/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
index db1a100..6e8c330 100644
--- a/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -111,14 +111,14 @@ class CascadeRCNN(BaseArch):
bbox, bbox_num = self.bbox_post_process(
preds, (refined_rois, rois_num), im_shape, scale_factor)
# rescale the prediction back to origin image
- bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
- im_shape, scale_factor)
+ bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
+ bbox, bbox_num, im_shape, scale_factor)
if not self.with_mask:
return bbox_pred, bbox_num, None
mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
origin_shape = self.bbox_post_process.get_origin_shape()
- mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
- bbox_num, origin_shape)
+ mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
+ origin_shape)
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
diff --git a/paddlers/models/ppdet/modeling/architectures/centernet.py b/paddlers/models/ppdet/modeling/architectures/centernet.py
old mode 100644
new mode 100755
index e534fdd..52916ac
--- a/paddlers/models/ppdet/modeling/architectures/centernet.py
+++ b/paddlers/models/ppdet/modeling/architectures/centernet.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/deepsort.py b/paddlers/models/ppdet/modeling/architectures/deepsort.py
index 14ed6cd..3caf4fd 100644
--- a/paddlers/models/ppdet/modeling/architectures/deepsort.py
+++ b/paddlers/models/ppdet/modeling/architectures/deepsort.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -62,8 +62,9 @@ class DeepSORT(BaseArch):
def _forward(self):
crops = self.inputs['crops']
- features = self.reid(crops)
- return features
+ outs = {}
+ outs['embeddings'] = self.reid(crops)
+ return outs
def get_pred(self):
return self._forward()
diff --git a/paddlers/models/ppdet/modeling/architectures/fairmot.py b/paddlers/models/ppdet/modeling/architectures/fairmot.py
old mode 100644
new mode 100755
index cf1127f..897bc46
--- a/paddlers/models/ppdet/modeling/architectures/fairmot.py
+++ b/paddlers/models/ppdet/modeling/architectures/fairmot.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py b/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
index 23fe0e0..49e5ad5 100644
--- a/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -87,8 +87,8 @@ class FasterRCNN(BaseArch):
im_shape, scale_factor)
# rescale the prediction back to origin image
- bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
- im_shape, scale_factor)
+ bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
+ bbox, bbox_num, im_shape, scale_factor)
return bbox_pred, bbox_num
def get_loss(self, ):
diff --git a/paddlers/models/ppdet/modeling/architectures/fcos.py b/paddlers/models/ppdet/modeling/architectures/fcos.py
index c851416..5a3447b 100644
--- a/paddlers/models/ppdet/modeling/architectures/fcos.py
+++ b/paddlers/models/ppdet/modeling/architectures/fcos.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/gfl.py b/paddlers/models/ppdet/modeling/architectures/gfl.py
index fabd3d6..86471ea 100644
--- a/paddlers/models/ppdet/modeling/architectures/gfl.py
+++ b/paddlers/models/ppdet/modeling/architectures/gfl.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/jde.py b/paddlers/models/ppdet/modeling/architectures/jde.py
index 7210eeb..b0fd65c 100644
--- a/paddlers/models/ppdet/modeling/architectures/jde.py
+++ b/paddlers/models/ppdet/modeling/architectures/jde.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py b/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
index 7c81727..cc980ee 100644
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -153,7 +153,7 @@ class HrHRNetPostProcess(object):
heat_thresh (float): value of topk below this threshhold will be ignored
tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
- inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
+ inputs(list[heatmap]): the output list of model, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
original_height, original_width (float): the original image size
'''
diff --git a/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py b/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
index 254a15b..6a4751d 100644
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py b/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
index 186cce6..f3a949e 100644
--- a/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -112,11 +112,11 @@ class MaskRCNN(BaseArch):
body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
# rescale the prediction back to origin image
- bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
- im_shape, scale_factor)
+ bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
+ bbox, bbox_num, im_shape, scale_factor)
origin_shape = self.bbox_post_process.get_origin_shape()
- mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
- bbox_num, origin_shape)
+ mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
+ origin_shape)
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
diff --git a/paddlers/models/ppdet/modeling/architectures/meta_arch.py b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
index 77db52b..f32b542 100644
--- a/paddlers/models/ppdet/modeling/architectures/meta_arch.py
+++ b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
@@ -22,22 +22,23 @@ class BaseArch(nn.Layer):
self.fuse_norm = False
def load_meanstd(self, cfg_transform):
- self.scale = 1.
- self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
- (1, 3, 1, 1))
- self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))
+ scale = 1.
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
for item in cfg_transform:
if 'NormalizeImage' in item:
- self.mean = paddle.to_tensor(item['NormalizeImage'][
- 'mean']).reshape((1, 3, 1, 1))
- self.std = paddle.to_tensor(item['NormalizeImage'][
- 'std']).reshape((1, 3, 1, 1))
+ mean = np.array(
+ item['NormalizeImage']['mean'], dtype=np.float32)
+ std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
if item['NormalizeImage'].get('is_scale', True):
- self.scale = 1. / 255.
+ scale = 1. / 255.
break
if self.data_format == 'NHWC':
- self.mean = self.mean.reshape(1, 1, 1, 3)
- self.std = self.std.reshape(1, 1, 1, 3)
+ self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
+ self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
+ else:
+ self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
+ self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
def forward(self, inputs):
if self.data_format == 'NHWC':
@@ -46,7 +47,7 @@ class BaseArch(nn.Layer):
if self.fuse_norm:
image = inputs['image']
- self.inputs['image'] = (image * self.scale - self.mean) / self.std
+ self.inputs['image'] = image * self.scale + self.bias
self.inputs['im_shape'] = inputs['im_shape']
self.inputs['scale_factor'] = inputs['scale_factor']
else:
@@ -63,10 +64,14 @@ class BaseArch(nn.Layer):
inputs_list.append(inputs)
else:
inputs_list.extend(inputs)
-
outs = []
for inp in inputs_list:
- self.inputs = inp
+ if self.fuse_norm:
+ self.inputs['image'] = inp['image'] * self.scale + self.bias
+ self.inputs['im_shape'] = inp['im_shape']
+ self.inputs['scale_factor'] = inp['scale_factor']
+ else:
+ self.inputs = inp
outs.append(self.get_pred())
# multi-scale test
@@ -124,16 +129,3 @@ class BaseArch(nn.Layer):
def get_pred(self, ):
raise NotImplementedError("Should implement get_pred method!")
-
- @classmethod
- def convert_sync_batchnorm(cls, layer):
- layer_output = layer
- if getattr(layer, 'norm_type', None) == 'sync_bn':
- layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
- else:
- for name, sublayer in layer.named_children():
- layer_output.add_sublayer(name,
- cls.convert_sync_batchnorm(sublayer))
-
- del layer
- return layer_output
diff --git a/paddlers/models/ppdet/modeling/architectures/picodet.py b/paddlers/models/ppdet/modeling/architectures/picodet.py
index baff894..f2a091b 100644
--- a/paddlers/models/ppdet/modeling/architectures/picodet.py
+++ b/paddlers/models/ppdet/modeling/architectures/picodet.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -41,7 +41,8 @@ class PicoDet(BaseArch):
self.backbone = backbone
self.neck = neck
self.head = head
- self.deploy = False
+ self.export_post_process = True
+ self.export_nms = True
@classmethod
def from_config(cls, cfg, *args, **kwargs):
@@ -62,14 +63,13 @@ class PicoDet(BaseArch):
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
- head_outs = self.head(fpn_feats, self.deploy)
- if self.training or self.deploy:
+ head_outs = self.head(fpn_feats, self.export_post_process)
+ if self.training or not self.export_post_process:
return head_outs, None
else:
- im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
- bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
- scale_factor)
+ bboxes, bbox_num = self.head.post_process(
+ head_outs, scale_factor, export_nms=self.export_nms)
return bboxes, bbox_num
def get_loss(self, ):
@@ -83,9 +83,13 @@ class PicoDet(BaseArch):
return loss
def get_pred(self):
- if self.deploy:
+ if not self.export_post_process:
return {'picodet': self._forward()[0]}
- else:
+ elif self.export_nms:
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
+ else:
+ bboxes, mlvl_scores = self._forward()
+ output = {'bbox': bboxes, 'scores': mlvl_scores}
+ return output
diff --git a/paddlers/models/ppdet/modeling/architectures/retinanet.py b/paddlers/models/ppdet/modeling/architectures/retinanet.py
new file mode 100644
index 0000000..fcba467
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/architectures/retinanet.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+import paddle
+
+__all__ = ['RetinaNet']
+
+
+@register
+class RetinaNet(BaseArch):
+ __category__ = 'architecture'
+
+ def __init__(self, backbone, neck, head):
+ super(RetinaNet, self).__init__()
+ self.backbone = backbone
+ self.neck = neck
+ self.head = head
+
+ @classmethod
+ def from_config(cls, cfg, *args, **kwargs):
+ backbone = create(cfg['backbone'])
+
+ kwargs = {'input_shape': backbone.out_shape}
+ neck = create(cfg['neck'], **kwargs)
+
+ kwargs = {'input_shape': neck.out_shape}
+ head = create(cfg['head'], **kwargs)
+
+ return {
+ 'backbone': backbone,
+ 'neck': neck,
+ 'head': head,
+ }
+
+ def _forward(self):
+ body_feats = self.backbone(self.inputs)
+ neck_feats = self.neck(body_feats)
+
+ if self.training:
+ return self.head(neck_feats, self.inputs)
+ else:
+ head_outs = self.head(neck_feats)
+ bbox, bbox_num = self.head.post_process(
+ head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
+ return {'bbox': bbox, 'bbox_num': bbox_num}
+
+ def get_loss(self):
+ return self._forward()
+
+ def get_pred(self):
+ return self._forward()
diff --git a/paddlers/models/ppdet/modeling/architectures/s2anet.py b/paddlers/models/ppdet/modeling/architectures/s2anet.py
index 5c412a3..0703f22 100644
--- a/paddlers/models/ppdet/modeling/architectures/s2anet.py
+++ b/paddlers/models/ppdet/modeling/architectures/s2anet.py
@@ -1,15 +1,15 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -26,26 +26,21 @@ __all__ = ['S2ANet']
@register
class S2ANet(BaseArch):
__category__ = 'architecture'
- __inject__ = [
- 's2anet_head',
- 's2anet_bbox_post_process',
- ]
+ __inject__ = ['head']
- def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
+ def __init__(self, backbone, neck, head):
"""
S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
Args:
backbone (object): backbone instance
neck (object): `FPN` instance
- s2anet_head (object): `S2ANetHead` instance
- s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
+ head (object): `Head` instance
"""
super(S2ANet, self).__init__()
self.backbone = backbone
self.neck = neck
- self.s2anet_head = s2anet_head
- self.s2anet_bbox_post_process = s2anet_bbox_post_process
+ self.s2anet_head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
@@ -55,42 +50,28 @@ class S2ANet(BaseArch):
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
- s2anet_head = create(cfg['s2anet_head'], **kwargs)
- s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
- **kwargs)
+ head = create(cfg['head'], **kwargs)
- return {
- 'backbone': backbone,
- 'neck': neck,
- "s2anet_head": s2anet_head,
- "s2anet_bbox_post_process": s2anet_bbox_post_process,
- }
+ return {'backbone': backbone, 'neck': neck, "head": head}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
- self.s2anet_head(body_feats)
if self.training:
- loss = self.s2anet_head.get_loss(self.inputs)
- total_loss = paddle.add_n(list(loss.values()))
- loss.update({'loss': total_loss})
+ loss = self.s2anet_head(body_feats, self.inputs)
return loss
else:
- im_shape = self.inputs['im_shape']
- scale_factor = self.inputs['scale_factor']
- nms_pre = self.s2anet_bbox_post_process.nms_pre
- pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
-
+ head_outs = self.s2anet_head(body_feats)
# post_process
- pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
- pred_bboxes)
+ bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
# rescale the prediction back to origin image
- pred_bboxes = self.s2anet_bbox_post_process.get_pred(
- pred_bboxes, bbox_num, im_shape, scale_factor)
-
+ im_shape = self.inputs['im_shape']
+ scale_factor = self.inputs['scale_factor']
+ bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
+ scale_factor)
# output
- output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
+ output = {'bbox': bboxes, 'bbox_num': bbox_num}
return output
def get_loss(self, ):
diff --git a/paddlers/models/ppdet/modeling/architectures/ttfnet.py b/paddlers/models/ppdet/modeling/architectures/ttfnet.py
index ec0916e..7441606 100644
--- a/paddlers/models/ppdet/modeling/architectures/ttfnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/ttfnet.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/architectures/yolo.py b/paddlers/models/ppdet/modeling/architectures/yolo.py
index 83e2bed..51af34c 100644
--- a/paddlers/models/ppdet/modeling/architectures/yolo.py
+++ b/paddlers/models/ppdet/modeling/architectures/yolo.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -109,10 +109,13 @@ class YOLOv3(BaseArch):
if self.return_idx:
_, bbox, bbox_num, _ = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors)
- else:
+ elif self.post_process is not None:
bbox, bbox_num = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
+ else:
+ bbox, bbox_num = self.yolo_head.post_process(
+ yolo_head_outs, self.inputs['scale_factor'])
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
diff --git a/paddlers/models/ppdet/modeling/architectures/yolox.py b/paddlers/models/ppdet/modeling/architectures/yolox.py
new file mode 100644
index 0000000..3c8db24
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/architectures/yolox.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+import random
+import paddle
+import paddle.nn.functional as F
+import paddle.distributed as dist
+
+__all__ = ['YOLOX']
+
+
+@register
+class YOLOX(BaseArch):
+ """
+ YOLOX network, see https://arxiv.org/abs/2107.08430
+
+ Args:
+ backbone (nn.Layer): backbone instance
+ neck (nn.Layer): neck instance
+ head (nn.Layer): head instance
+ for_mot (bool): whether used for MOT or not
+ input_size (list[int]): initial scale, will be reset by self._preprocess()
+ size_stride (int): stride of the size range
+ size_range (list[int]): multi-scale range for training
+ random_interval (int): interval of iter to change self._input_size
+ """
+ __category__ = 'architecture'
+
+ def __init__(self,
+ backbone='CSPDarkNet',
+ neck='YOLOCSPPAN',
+ head='YOLOXHead',
+ for_mot=False,
+ input_size=[640, 640],
+ size_stride=32,
+ size_range=[15, 25],
+ random_interval=10):
+ super(YOLOX, self).__init__()
+ self.backbone = backbone
+ self.neck = neck
+ self.head = head
+ self.for_mot = for_mot
+
+ self.input_size = input_size
+ self._input_size = paddle.to_tensor(input_size)
+ self.size_stride = size_stride
+ self.size_range = size_range
+ self.random_interval = random_interval
+ self._step = 0
+
+ @classmethod
+ def from_config(cls, cfg, *args, **kwargs):
+ # backbone
+ backbone = create(cfg['backbone'])
+
+ # fpn
+ kwargs = {'input_shape': backbone.out_shape}
+ neck = create(cfg['neck'], **kwargs)
+
+ # head
+ kwargs = {'input_shape': neck.out_shape}
+ head = create(cfg['head'], **kwargs)
+
+ return {
+ 'backbone': backbone,
+ 'neck': neck,
+ "head": head,
+ }
+
+ def _forward(self):
+ if self.training:
+ self._preprocess()
+ body_feats = self.backbone(self.inputs)
+ neck_feats = self.neck(body_feats, self.for_mot)
+
+ if self.training:
+ yolox_losses = self.head(neck_feats, self.inputs)
+ yolox_losses.update({'size': self._input_size[0]})
+ return yolox_losses
+ else:
+ head_outs = self.head(neck_feats)
+ bbox, bbox_num = self.head.post_process(
+ head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
+ return {'bbox': bbox, 'bbox_num': bbox_num}
+
+ def get_loss(self):
+ return self._forward()
+
+ def get_pred(self):
+ return self._forward()
+
+ def _preprocess(self):
+ # YOLOX multi-scale training, interpolate resize before inputs of the network.
+ self._get_size()
+ scale_y = self._input_size[0] / self.input_size[0]
+ scale_x = self._input_size[1] / self.input_size[1]
+ if scale_x != 1 or scale_y != 1:
+ self.inputs['image'] = F.interpolate(
+ self.inputs['image'],
+ size=self._input_size,
+ mode='bilinear',
+ align_corners=False)
+ gt_bboxes = self.inputs['gt_bbox']
+ for i in range(len(gt_bboxes)):
+ if len(gt_bboxes[i]) > 0:
+ gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x
+ gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y
+ self.inputs['gt_bbox'] = gt_bboxes
+
+ def _get_size(self):
+ # random_interval = 10 as default, every 10 iters to change self._input_size
+ image_ratio = self.input_size[1] * 1.0 / self.input_size[0]
+ if self._step % self.random_interval == 0:
+ size_factor = random.randint(*self.size_range)
+ size = [
+ self.size_stride * size_factor,
+ self.size_stride * int(size_factor * image_ratio)
+ ]
+ self._input_size = paddle.to_tensor(size)
+ self._step += 1
diff --git a/paddlers/models/ppdet/modeling/assigners/__init__.py b/paddlers/models/ppdet/modeling/assigners/__init__.py
index b9b351e..fa51ef1 100644
--- a/paddlers/models/ppdet/modeling/assigners/__init__.py
+++ b/paddlers/models/ppdet/modeling/assigners/__init__.py
@@ -16,8 +16,10 @@ from . import utils
from . import task_aligned_assigner
from . import atss_assigner
from . import simota_assigner
+from . import max_iou_assigner
from .utils import *
from .task_aligned_assigner import *
from .atss_assigner import *
from .simota_assigner import *
+from .max_iou_assigner import *
diff --git a/paddlers/models/ppdet/modeling/assigners/atss_assigner.py b/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
index cbcee0c..57930d7 100644
--- a/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
@@ -22,11 +22,13 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
-from ..ops import iou_similarity
+from ..bbox_utils import iou_similarity, batch_iou_similarity
from ..bbox_utils import bbox_center
-from .utils import (pad_gt, check_points_inside_bboxes, compute_max_iou_anchor,
+from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
compute_max_iou_gt)
+__all__ = ['ATSSAssigner']
+
@register
class ATSSAssigner(nn.Layer):
@@ -48,7 +50,6 @@ class ATSSAssigner(nn.Layer):
def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
pad_gt_mask):
- pad_gt_mask = pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)
gt2anchor_distances_list = paddle.split(
gt2anchor_distances, num_anchors_list, axis=-1)
num_anchors_index = np.cumsum(num_anchors_list).tolist()
@@ -58,15 +59,12 @@ class ATSSAssigner(nn.Layer):
for distances, anchors_index in zip(gt2anchor_distances_list,
num_anchors_index):
num_anchors = distances.shape[-1]
- topk_metrics, topk_idxs = paddle.topk(
+ _, topk_idxs = paddle.topk(
distances, self.topk, axis=-1, largest=False)
topk_idxs_list.append(topk_idxs + anchors_index)
- topk_idxs = paddle.where(pad_gt_mask, topk_idxs,
- paddle.zeros_like(topk_idxs))
- is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
- is_in_topk = paddle.where(is_in_topk > 1,
- paddle.zeros_like(is_in_topk), is_in_topk)
- is_in_topk_list.append(is_in_topk.astype(gt2anchor_distances.dtype))
+ is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
+ axis=-2).astype(gt2anchor_distances.dtype)
+ is_in_topk_list.append(is_in_topk * pad_gt_mask)
is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
return is_in_topk_list, topk_idxs_list
@@ -77,8 +75,10 @@ class ATSSAssigner(nn.Layer):
num_anchors_list,
gt_labels,
gt_bboxes,
+ pad_gt_mask,
bg_index,
- gt_scores=None):
+ gt_scores=None,
+ pred_bboxes=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
@@ -99,18 +99,18 @@ class ATSSAssigner(nn.Layer):
anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
"xmin, xmax, ymin, ymax" format
num_anchors_list (List): num of anchors in each level
- gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
- gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
+ gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+ gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+ pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
- gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
+ gt_scores (Tensor|None, float32) Score of gt_bboxes,
shape(B, n, 1), if None, then it will initialize with one_hot label
+ pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
- assigned_scores (Tensor): (B, L, C)
+ assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
"""
- gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
- gt_labels, gt_bboxes, gt_scores)
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
@@ -119,7 +119,8 @@ class ATSSAssigner(nn.Layer):
# negative batch
if num_max_boxes == 0:
- assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
+ assigned_labels = paddle.full(
+ [batch_size, num_anchors], bg_index, dtype='int32')
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, self.num_classes])
@@ -149,9 +150,8 @@ class ATSSAssigner(nn.Layer):
iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
iou_threshold.std(axis=-1, keepdim=True)
- is_in_topk = paddle.where(
- iou_candidates > iou_threshold.tile([1, 1, num_anchors]),
- is_in_topk, paddle.zeros_like(is_in_topk))
+ is_in_topk = paddle.where(iou_candidates > iou_threshold, is_in_topk,
+ paddle.zeros_like(is_in_topk))
# 6. check the positive sample's center in gt, [B, n, L]
is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
@@ -178,9 +178,6 @@ class ATSSAssigner(nn.Layer):
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
- assert mask_positive_sum.max() == 1, \
- ("one anchor just assign one gt, but received not equals 1. "
- "Received: %f" % mask_positive_sum.max().item())
# assigned target
batch_ind = paddle.arange(
@@ -197,10 +194,19 @@ class ATSSAssigner(nn.Layer):
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
- assigned_scores = F.one_hot(assigned_labels, self.num_classes)
- if gt_scores is not None:
+ assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
+ ind = list(range(self.num_classes + 1))
+ ind.remove(bg_index)
+ assigned_scores = paddle.index_select(
+ assigned_scores, paddle.to_tensor(ind), axis=-1)
+ if pred_bboxes is not None:
+ # assigned iou
+ ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
+ ious = ious.max(axis=-2).unsqueeze(-1)
+ assigned_scores *= ious
+ elif gt_scores is not None:
gather_scores = paddle.gather(
- pad_gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
+ gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
gather_scores = gather_scores.reshape([batch_size, num_anchors])
gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
paddle.zeros_like(gather_scores))
diff --git a/paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py b/paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py
new file mode 100644
index 0000000..891b707
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register
+from paddlers.models.ppdet.modeling.proposal_generator.target import label_box
+
+__all__ = ['MaxIoUAssigner']
+
+
+@register
+class MaxIoUAssigner(object):
+ """a standard bbox assigner based on max IoU, use ppdet's label_box
+ as backend.
+ Args:
+ positive_overlap (float): threshold for defining positive samples
+ negative_overlap (float): threshold for denining negative samples
+ allow_low_quality (bool): whether to lower IoU thr if a GT poorly
+ overlaps with candidate bboxes
+ """
+
+ def __init__(self,
+ positive_overlap,
+ negative_overlap,
+ allow_low_quality=True):
+ self.positive_overlap = positive_overlap
+ self.negative_overlap = negative_overlap
+ self.allow_low_quality = allow_low_quality
+
+ def __call__(self, bboxes, gt_bboxes):
+ matches, match_labels = label_box(
+ bboxes,
+ gt_bboxes,
+ positive_overlap=self.positive_overlap,
+ negative_overlap=self.negative_overlap,
+ allow_low_quality=self.allow_low_quality,
+ ignore_thresh=-1,
+ is_crowd=None,
+ assign_on_cpu=False)
+ return matches, match_labels
diff --git a/paddlers/models/ppdet/modeling/assigners/simota_assigner.py b/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
index a0fe723..26618ac 100644
--- a/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
@@ -115,7 +115,10 @@ class SimOTAAssigner(object):
def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
match_matrix = np.zeros_like(cost_matrix.numpy())
# select candidate topk ious for dynamic-k calculation
- topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0)
+ topk_ious, _ = paddle.topk(
+ pairwise_ious,
+ min(self.candidate_topk, pairwise_ious.shape[0]),
+ axis=0)
# calculate dynamic k for each gt
dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
for gt_idx in range(num_gt):
diff --git a/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py b/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
index 6dae235..5c82b36 100644
--- a/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -21,10 +21,12 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
-from ..bbox_utils import iou_similarity
-from .utils import (pad_gt, gather_topk_anchors, check_points_inside_bboxes,
+from ..bbox_utils import batch_iou_similarity
+from .utils import (gather_topk_anchors, check_points_inside_bboxes,
compute_max_iou_anchor)
+__all__ = ['TaskAlignedAssigner']
+
@register
class TaskAlignedAssigner(nn.Layer):
@@ -43,8 +45,10 @@ class TaskAlignedAssigner(nn.Layer):
pred_scores,
pred_bboxes,
anchor_points,
+ num_anchors_list,
gt_labels,
gt_bboxes,
+ pad_gt_mask,
bg_index,
gt_scores=None):
r"""This code is based on
@@ -61,20 +65,18 @@ class TaskAlignedAssigner(nn.Layer):
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
- gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
- gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
+ num_anchors_list (List): num of anchors in each level, shape(L)
+ gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+ gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+ pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
- gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
- shape(B, n, 1), if None, then it will initialize with one_hot label
+ gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
assigned_scores (Tensor): (B, L, C)
"""
assert pred_scores.ndim == pred_bboxes.ndim
-
- gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
- gt_labels, gt_bboxes, gt_scores)
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
@@ -83,14 +85,15 @@ class TaskAlignedAssigner(nn.Layer):
# negative batch
if num_max_boxes == 0:
- assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
+ assigned_labels = paddle.full(
+ [batch_size, num_anchors], bg_index, dtype='int32')
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# compute iou between gt and pred bbox, [B, n, L]
- ious = iou_similarity(gt_bboxes, pred_bboxes)
+ ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
# gather pred bboxes class score
pred_scores = pred_scores.transpose([0, 2, 1])
batch_ind = paddle.arange(
@@ -109,9 +112,7 @@ class TaskAlignedAssigner(nn.Layer):
# select topk largest alignment metrics pred bbox as candidates
# for each gt, [B, n, L]
is_in_topk = gather_topk_anchors(
- alignment_metrics * is_in_gts,
- self.topk,
- topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool))
+ alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
# select positive sample, [B, n, L]
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
@@ -127,9 +128,6 @@ class TaskAlignedAssigner(nn.Layer):
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
- assert mask_positive_sum.max() == 1, \
- ("one anchor just assign one gt, but received not equals 1. "
- "Received: %f" % mask_positive_sum.max().item())
# assigned target
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
@@ -144,7 +142,11 @@ class TaskAlignedAssigner(nn.Layer):
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
- assigned_scores = F.one_hot(assigned_labels, num_classes)
+ assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
+ ind = list(range(num_classes + 1))
+ ind.remove(bg_index)
+ assigned_scores = paddle.index_select(
+ assigned_scores, paddle.to_tensor(ind), axis=-1)
# rescale alignment metrics
alignment_metrics *= mask_positive
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
diff --git a/paddlers/models/ppdet/modeling/assigners/utils.py b/paddlers/models/ppdet/modeling/assigners/utils.py
index 0b81b45..01feaba 100644
--- a/paddlers/models/ppdet/modeling/assigners/utils.py
+++ b/paddlers/models/ppdet/modeling/assigners/utils.py
@@ -88,7 +88,7 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
largest (bool) : largest is a flag, if set to true,
algorithm will sort by descending order, otherwise sort by
ascending order. Default: True
- topk_mask (Tensor, bool|None): shape[B, n, topk], ignore bbox mask,
+ topk_mask (Tensor, float32): shape[B, n, 1], ignore bbox mask,
Default: None
eps (float): Default: 1e-9
Returns:
@@ -98,20 +98,22 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
topk_metrics, topk_idxs = paddle.topk(
metrics, topk, axis=-1, largest=largest)
if topk_mask is None:
- topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > eps).tile(
- [1, 1, topk])
- topk_idxs = paddle.where(topk_mask, topk_idxs, paddle.zeros_like(topk_idxs))
- is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
- is_in_topk = paddle.where(is_in_topk > 1,
- paddle.zeros_like(is_in_topk), is_in_topk)
- return is_in_topk.astype(metrics.dtype)
+ topk_mask = (
+ topk_metrics.max(axis=-1, keepdim=True) > eps).astype(metrics.dtype)
+ is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
+ axis=-2).astype(metrics.dtype)
+ return is_in_topk * topk_mask
-def check_points_inside_bboxes(points, bboxes, eps=1e-9):
+def check_points_inside_bboxes(points,
+ bboxes,
+ center_radius_tensor=None,
+ eps=1e-9):
r"""
Args:
points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
+ center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
eps (float): Default: 1e-9
Returns:
is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
@@ -119,12 +121,28 @@ def check_points_inside_bboxes(points, bboxes, eps=1e-9):
points = points.unsqueeze([0, 1])
x, y = points.chunk(2, axis=-1)
xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
+ # check whether `points` is in `bboxes`
l = x - xmin
t = y - ymin
r = xmax - x
b = ymax - y
- bbox_ltrb = paddle.concat([l, t, r, b], axis=-1)
- return (bbox_ltrb.min(axis=-1) > eps).astype(bboxes.dtype)
+ delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
+ is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
+ if center_radius_tensor is not None:
+ # check whether `points` is in `center_radius`
+ center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
+ cx = (xmin + xmax) * 0.5
+ cy = (ymin + ymax) * 0.5
+ l = x - (cx - center_radius_tensor)
+ t = y - (cy - center_radius_tensor)
+ r = (cx + center_radius_tensor) - x
+ b = (cy + center_radius_tensor) - y
+ delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
+ is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
+ return (paddle.logical_and(is_in_bboxes, is_in_center),
+ paddle.logical_or(is_in_bboxes, is_in_center))
+
+ return is_in_bboxes.astype(bboxes.dtype)
def compute_max_iou_anchor(ious):
@@ -158,7 +176,8 @@ def compute_max_iou_gt(ious):
def generate_anchors_for_grid_cell(feats,
fpn_strides,
grid_cell_size=5.0,
- grid_cell_offset=0.5):
+ grid_cell_offset=0.5,
+ dtype='float32'):
r"""
Like ATSS, generate anchors based on grid size.
Args:
@@ -167,14 +186,16 @@ def generate_anchors_for_grid_cell(feats,
grid_cell_size (float): anchor size
grid_cell_offset (float): The range is between 0 and 1.
Returns:
- anchors (List[Tensor]): shape[s, (l, 4)]
- num_anchors_list (List[int]): shape[s]
- stride_tensor_list (List[Tensor]): shape[s, (l, 1)]
+ anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format.
+ anchor_points (Tensor): shape[l, 2], "x, y" format.
+ num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...].
+ stride_tensor (Tensor): shape[l, 1], contains the stride for each scale.
"""
assert len(feats) == len(fpn_strides)
anchors = []
+ anchor_points = []
num_anchors_list = []
- stride_tensor_list = []
+ stride_tensor = []
for feat, stride in zip(feats, fpn_strides):
_, _, h, w = feat.shape
cell_half_size = grid_cell_size * stride * 0.5
@@ -186,9 +207,19 @@ def generate_anchors_for_grid_cell(feats,
shift_x - cell_half_size, shift_y - cell_half_size,
shift_x + cell_half_size, shift_y + cell_half_size
],
- axis=-1).astype(feat.dtype)
+ axis=-1).astype(dtype)
+ anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(dtype)
+
anchors.append(anchor.reshape([-1, 4]))
+ anchor_points.append(anchor_point.reshape([-1, 2]))
num_anchors_list.append(len(anchors[-1]))
- stride_tensor_list.append(
- paddle.full([num_anchors_list[-1], 1], stride))
- return anchors, num_anchors_list, stride_tensor_list
+ stride_tensor.append(
+ paddle.full(
+ [num_anchors_list[-1], 1], stride, dtype=dtype))
+ anchors = paddle.concat(anchors)
+ anchors.stop_gradient = True
+ anchor_points = paddle.concat(anchor_points)
+ anchor_points.stop_gradient = True
+ stride_tensor = paddle.concat(stride_tensor)
+ stride_tensor.stop_gradient = True
+ return anchors, anchor_points, num_anchors_list, stride_tensor
diff --git a/paddlers/models/ppdet/modeling/backbones/__init__.py b/paddlers/models/ppdet/modeling/backbones/__init__.py
index 869955f..3447b7d 100644
--- a/paddlers/models/ppdet/modeling/backbones/__init__.py
+++ b/paddlers/models/ppdet/modeling/backbones/__init__.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from . import vgg
@@ -29,6 +29,11 @@ from . import swin_transformer
from . import lcnet
from . import hardnet
from . import esnet
+from . import cspresnet
+from . import csp_darknet
+from . import convnext
+from . import vision_transformer
+from . import mobileone
from .vgg import *
from .resnet import *
@@ -47,3 +52,9 @@ from .swin_transformer import *
from .lcnet import *
from .hardnet import *
from .esnet import *
+from .cspresnet import *
+from .csp_darknet import *
+from .convnext import *
+from .vision_transformer import *
+from .vision_transformer import *
+from .mobileone import *
diff --git a/paddlers/models/ppdet/modeling/backbones/blazenet.py b/paddlers/models/ppdet/modeling/backbones/blazenet.py
index 89a0c62..277b5fe 100644
--- a/paddlers/models/ppdet/modeling/backbones/blazenet.py
+++ b/paddlers/models/ppdet/modeling/backbones/blazenet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/paddlers/models/ppdet/modeling/backbones/convnext.py b/paddlers/models/ppdet/modeling/backbones/convnext.py
new file mode 100644
index 0000000..b7e4dff
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/backbones/convnext.py
@@ -0,0 +1,245 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''
+Modified from https://github.com/facebookresearch/ConvNeXt
+Copyright (c) Meta Platforms, Inc. and affiliates.
+All rights reserved.
+This source code is licensed under the license found in the
+LICENSE file in the root directory of this source tree.
+'''
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant
+
+import numpy as np
+
+from paddlers.models.ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+from .transformer_utils import DropPath, trunc_normal_, zeros_
+
+__all__ = ['ConvNeXt']
+
+
+class Block(nn.Layer):
+ r""" ConvNeXt Block. There are two equivalent implementations:
+ (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+ (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+ We use (2) as we find it slightly faster in Pypaddle
+
+ Args:
+ dim (int): Number of input channels.
+ drop_path (float): Stochastic depth rate. Default: 0.0
+ layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+ """
+
+ def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+ super().__init__()
+ self.dwconv = nn.Conv2D(
+ dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+ self.norm = LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(
+ dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.pwconv2 = nn.Linear(4 * dim, dim)
+
+ if layer_scale_init_value > 0:
+ self.gamma = self.create_parameter(
+ shape=(dim, ),
+ attr=ParamAttr(initializer=Constant(layer_scale_init_value)))
+ else:
+ self.gamma = None
+
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity(
+ )
+
+ def forward(self, x):
+ input = x
+ x = self.dwconv(x)
+ x = x.transpose([0, 2, 3, 1])
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.pwconv2(x)
+ if self.gamma is not None:
+ x = self.gamma * x
+ x = x.transpose([0, 3, 1, 2])
+ x = input + self.drop_path(x)
+ return x
+
+
+class LayerNorm(nn.Layer):
+ r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+ The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+ shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+ with shape (batch_size, channels, height, width).
+ """
+
+ def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+ super().__init__()
+
+ self.weight = self.create_parameter(
+ shape=(normalized_shape, ),
+ attr=ParamAttr(initializer=Constant(1.)))
+ self.bias = self.create_parameter(
+ shape=(normalized_shape, ),
+ attr=ParamAttr(initializer=Constant(0.)))
+
+ self.eps = eps
+ self.data_format = data_format
+ if self.data_format not in ["channels_last", "channels_first"]:
+ raise NotImplementedError
+ self.normalized_shape = (normalized_shape, )
+
+ def forward(self, x):
+ if self.data_format == "channels_last":
+ return F.layer_norm(x, self.normalized_shape, self.weight,
+ self.bias, self.eps)
+ elif self.data_format == "channels_first":
+ u = x.mean(1, keepdim=True)
+ s = (x - u).pow(2).mean(1, keepdim=True)
+ x = (x - u) / paddle.sqrt(s + self.eps)
+ x = self.weight[:, None, None] * x + self.bias[:, None, None]
+ return x
+
+
+@register
+@serializable
+class ConvNeXt(nn.Layer):
+ r""" ConvNeXt
+ A Pypaddle impl of : `A ConvNet for the 2020s` -
+ https://arxiv.org/pdf/2201.03545.pdf
+
+ Args:
+ in_chans (int): Number of input image channels. Default: 3
+ depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+ dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+ drop_path_rate (float): Stochastic depth rate. Default: 0.
+ layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+ """
+
+ arch_settings = {
+ 'tiny': {
+ 'depths': [3, 3, 9, 3],
+ 'dims': [96, 192, 384, 768]
+ },
+ 'small': {
+ 'depths': [3, 3, 27, 3],
+ 'dims': [96, 192, 384, 768]
+ },
+ 'base': {
+ 'depths': [3, 3, 27, 3],
+ 'dims': [128, 256, 512, 1024]
+ },
+ 'large': {
+ 'depths': [3, 3, 27, 3],
+ 'dims': [192, 384, 768, 1536]
+ },
+ 'xlarge': {
+ 'depths': [3, 3, 27, 3],
+ 'dims': [256, 512, 1024, 2048]
+ },
+ }
+
+ def __init__(
+ self,
+ arch='tiny',
+ in_chans=3,
+ drop_path_rate=0.,
+ layer_scale_init_value=1e-6,
+ return_idx=[1, 2, 3],
+ norm_output=True,
+ pretrained=None, ):
+ super().__init__()
+ depths = self.arch_settings[arch]['depths']
+ dims = self.arch_settings[arch]['dims']
+ self.downsample_layers = nn.LayerList(
+ ) # stem and 3 intermediate downsampling conv layers
+ stem = nn.Sequential(
+ nn.Conv2D(
+ in_chans, dims[0], kernel_size=4, stride=4),
+ LayerNorm(
+ dims[0], eps=1e-6, data_format="channels_first"))
+ self.downsample_layers.append(stem)
+ for i in range(3):
+ downsample_layer = nn.Sequential(
+ LayerNorm(
+ dims[i], eps=1e-6, data_format="channels_first"),
+ nn.Conv2D(
+ dims[i], dims[i + 1], kernel_size=2, stride=2), )
+ self.downsample_layers.append(downsample_layer)
+
+ self.stages = nn.LayerList(
+ ) # 4 feature resolution stages, each consisting of multiple residual blocks
+ dp_rates = [x for x in np.linspace(0, drop_path_rate, sum(depths))]
+ cur = 0
+ for i in range(4):
+ stage = nn.Sequential(*[
+ Block(
+ dim=dims[i],
+ drop_path=dp_rates[cur + j],
+ layer_scale_init_value=layer_scale_init_value)
+ for j in range(depths[i])
+ ])
+ self.stages.append(stage)
+ cur += depths[i]
+
+ self.return_idx = return_idx
+ self.dims = [dims[i] for i in return_idx] # [::-1]
+
+ self.norm_output = norm_output
+ if norm_output:
+ self.norms = nn.LayerList([
+ LayerNorm(
+ c, eps=1e-6, data_format="channels_first")
+ for c in self.dims
+ ])
+
+ self.apply(self._init_weights)
+
+ if pretrained is not None:
+ if 'http' in pretrained: #URL
+ path = paddle.utils.download.get_weights_path_from_url(
+ pretrained)
+ else: #model in local path
+ path = pretrained
+ self.set_state_dict(paddle.load(path))
+
+ def _init_weights(self, m):
+ if isinstance(m, (nn.Conv2D, nn.Linear)):
+ trunc_normal_(m.weight)
+ zeros_(m.bias)
+
+ def forward_features(self, x):
+ output = []
+ for i in range(4):
+ x = self.downsample_layers[i](x)
+ x = self.stages[i](x)
+ output.append(x)
+
+ outputs = [output[i] for i in self.return_idx]
+ if self.norm_output:
+ outputs = [self.norms[i](out) for i, out in enumerate(outputs)]
+
+ return outputs
+
+ def forward(self, x):
+ x = self.forward_features(x['image'])
+ return x
+
+ @property
+ def out_shape(self):
+ return [ShapeSpec(channels=c) for c in self.dims]
diff --git a/paddlers/models/ppdet/modeling/backbones/csp_darknet.py b/paddlers/models/ppdet/modeling/backbones/csp_darknet.py
new file mode 100644
index 0000000..f350f85
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/backbones/csp_darknet.py
@@ -0,0 +1,404 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddlers.models.ppdet.core.workspace import register, serializable
+from paddlers.models.ppdet.modeling.initializer import conv_init_
+from ..shape_spec import ShapeSpec
+
+__all__ = [
+ 'CSPDarkNet', 'BaseConv', 'DWConv', 'BottleNeck', 'SPPLayer', 'SPPFLayer'
+]
+
+
+class BaseConv(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ ksize,
+ stride,
+ groups=1,
+ bias=False,
+ act="silu"):
+ super(BaseConv, self).__init__()
+ self.conv = nn.Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size=ksize,
+ stride=stride,
+ padding=(ksize - 1) // 2,
+ groups=groups,
+ bias_attr=bias)
+ self.bn = nn.BatchNorm2D(
+ out_channels,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ self._init_weights()
+
+ def _init_weights(self):
+ conv_init_(self.conv)
+
+ def forward(self, x):
+ # use 'x * F.sigmoid(x)' replace 'silu'
+ x = self.bn(self.conv(x))
+ y = x * F.sigmoid(x)
+ return y
+
+
+class DWConv(nn.Layer):
+ """Depthwise Conv"""
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ ksize,
+ stride=1,
+ bias=False,
+ act="silu"):
+ super(DWConv, self).__init__()
+ self.dw_conv = BaseConv(
+ in_channels,
+ in_channels,
+ ksize=ksize,
+ stride=stride,
+ groups=in_channels,
+ bias=bias,
+ act=act)
+ self.pw_conv = BaseConv(
+ in_channels,
+ out_channels,
+ ksize=1,
+ stride=1,
+ groups=1,
+ bias=bias,
+ act=act)
+
+ def forward(self, x):
+ return self.pw_conv(self.dw_conv(x))
+
+
+class Focus(nn.Layer):
+ """Focus width and height information into channel space, used in YOLOX."""
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ ksize=3,
+ stride=1,
+ bias=False,
+ act="silu"):
+ super(Focus, self).__init__()
+ self.conv = BaseConv(
+ in_channels * 4,
+ out_channels,
+ ksize=ksize,
+ stride=stride,
+ bias=bias,
+ act=act)
+
+ def forward(self, inputs):
+ # inputs [bs, C, H, W] -> outputs [bs, 4C, W/2, H/2]
+ top_left = inputs[:, :, 0::2, 0::2]
+ top_right = inputs[:, :, 0::2, 1::2]
+ bottom_left = inputs[:, :, 1::2, 0::2]
+ bottom_right = inputs[:, :, 1::2, 1::2]
+ outputs = paddle.concat(
+ [top_left, bottom_left, top_right, bottom_right], 1)
+ return self.conv(outputs)
+
+
+class BottleNeck(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ shortcut=True,
+ expansion=0.5,
+ depthwise=False,
+ bias=False,
+ act="silu"):
+ super(BottleNeck, self).__init__()
+ hidden_channels = int(out_channels * expansion)
+ Conv = DWConv if depthwise else BaseConv
+ self.conv1 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.conv2 = Conv(
+ hidden_channels,
+ out_channels,
+ ksize=3,
+ stride=1,
+ bias=bias,
+ act=act)
+ self.add_shortcut = shortcut and in_channels == out_channels
+
+ def forward(self, x):
+ y = self.conv2(self.conv1(x))
+ if self.add_shortcut:
+ y = y + x
+ return y
+
+
+class SPPLayer(nn.Layer):
+ """Spatial Pyramid Pooling (SPP) layer used in YOLOv3-SPP and YOLOX"""
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_sizes=(5, 9, 13),
+ bias=False,
+ act="silu"):
+ super(SPPLayer, self).__init__()
+ hidden_channels = in_channels // 2
+ self.conv1 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.maxpoolings = nn.LayerList([
+ nn.MaxPool2D(
+ kernel_size=ks, stride=1, padding=ks // 2)
+ for ks in kernel_sizes
+ ])
+ conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
+ self.conv2 = BaseConv(
+ conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = paddle.concat([x] + [mp(x) for mp in self.maxpoolings], axis=1)
+ x = self.conv2(x)
+ return x
+
+
+class SPPFLayer(nn.Layer):
+ """ Spatial Pyramid Pooling - Fast (SPPF) layer used in YOLOv5 by Glenn Jocher,
+ equivalent to SPP(k=(5, 9, 13))
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ ksize=5,
+ bias=False,
+ act='silu'):
+ super(SPPFLayer, self).__init__()
+ hidden_channels = in_channels // 2
+ self.conv1 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.maxpooling = nn.MaxPool2D(
+ kernel_size=ksize, stride=1, padding=ksize // 2)
+ conv2_channels = hidden_channels * 4
+ self.conv2 = BaseConv(
+ conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ y1 = self.maxpooling(x)
+ y2 = self.maxpooling(y1)
+ y3 = self.maxpooling(y2)
+ concats = paddle.concat([x, y1, y2, y3], axis=1)
+ out = self.conv2(concats)
+ return out
+
+
+class CSPLayer(nn.Layer):
+ """CSP (Cross Stage Partial) layer with 3 convs, named C3 in YOLOv5"""
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_blocks=1,
+ shortcut=True,
+ expansion=0.5,
+ depthwise=False,
+ bias=False,
+ act="silu"):
+ super(CSPLayer, self).__init__()
+ hidden_channels = int(out_channels * expansion)
+ self.conv1 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.conv2 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.bottlenecks = nn.Sequential(*[
+ BottleNeck(
+ hidden_channels,
+ hidden_channels,
+ shortcut=shortcut,
+ expansion=1.0,
+ depthwise=depthwise,
+ bias=bias,
+ act=act) for _ in range(num_blocks)
+ ])
+ self.conv3 = BaseConv(
+ hidden_channels * 2,
+ out_channels,
+ ksize=1,
+ stride=1,
+ bias=bias,
+ act=act)
+
+ def forward(self, x):
+ x_1 = self.conv1(x)
+ x_1 = self.bottlenecks(x_1)
+ x_2 = self.conv2(x)
+ x = paddle.concat([x_1, x_2], axis=1)
+ x = self.conv3(x)
+ return x
+
+
+@register
+@serializable
+class CSPDarkNet(nn.Layer):
+ """
+ CSPDarkNet backbone.
+ Args:
+ arch (str): Architecture of CSPDarkNet, from {P5, P6, X}, default as X,
+ and 'X' means used in YOLOX, 'P5/P6' means used in YOLOv5.
+ depth_mult (float): Depth multiplier, multiply number of channels in
+ each layer, default as 1.0.
+ width_mult (float): Width multiplier, multiply number of blocks in
+ CSPLayer, default as 1.0.
+ depthwise (bool): Whether to use depth-wise conv layer.
+ act (str): Activation function type, default as 'silu'.
+ return_idx (list): Index of stages whose feature maps are returned.
+ """
+
+ __shared__ = ['depth_mult', 'width_mult', 'act', 'trt']
+
+ # in_channels, out_channels, num_blocks, add_shortcut, use_spp(use_sppf)
+ # 'X' means setting used in YOLOX, 'P5/P6' means setting used in YOLOv5.
+ arch_settings = {
+ 'X': [[64, 128, 3, True, False], [128, 256, 9, True, False],
+ [256, 512, 9, True, False], [512, 1024, 3, False, True]],
+ 'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False],
+ [256, 512, 9, True, False], [512, 1024, 3, True, True]],
+ 'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False],
+ [256, 512, 9, True, False], [512, 768, 3, True, False],
+ [768, 1024, 3, True, True]],
+ }
+
+ def __init__(self,
+ arch='X',
+ depth_mult=1.0,
+ width_mult=1.0,
+ depthwise=False,
+ act='silu',
+ trt=False,
+ return_idx=[2, 3, 4]):
+ super(CSPDarkNet, self).__init__()
+ self.arch = arch
+ self.return_idx = return_idx
+ Conv = DWConv if depthwise else BaseConv
+ arch_setting = self.arch_settings[arch]
+ base_channels = int(arch_setting[0][0] * width_mult)
+
+ # Note: differences between the latest YOLOv5 and the original YOLOX
+ # 1. self.stem, use SPPF(in YOLOv5) or SPP(in YOLOX)
+ # 2. use SPPF(in YOLOv5) or SPP(in YOLOX)
+ # 3. put SPPF before(YOLOv5) or SPP after(YOLOX) the last cspdark block's CSPLayer
+ # 4. whether SPPF(SPP)'CSPLayer add shortcut, True in YOLOv5, False in YOLOX
+ if arch in ['P5', 'P6']:
+ # in the latest YOLOv5, use Conv stem, and SPPF (fast, only single spp kernal size)
+ self.stem = Conv(
+ 3, base_channels, ksize=6, stride=2, bias=False, act=act)
+ spp_kernal_sizes = 5
+ elif arch in ['X']:
+ # in the original YOLOX, use Focus stem, and SPP (three spp kernal sizes)
+ self.stem = Focus(
+ 3, base_channels, ksize=3, stride=1, bias=False, act=act)
+ spp_kernal_sizes = (5, 9, 13)
+ else:
+ raise AttributeError("Unsupported arch type: {}".format(arch))
+
+ _out_channels = [base_channels]
+ layers_num = 1
+ self.csp_dark_blocks = []
+
+ for i, (in_channels, out_channels, num_blocks, shortcut,
+ use_spp) in enumerate(arch_setting):
+ in_channels = int(in_channels * width_mult)
+ out_channels = int(out_channels * width_mult)
+ _out_channels.append(out_channels)
+ num_blocks = max(round(num_blocks * depth_mult), 1)
+ stage = []
+
+ conv_layer = self.add_sublayer(
+ 'layers{}.stage{}.conv_layer'.format(layers_num, i + 1),
+ Conv(
+ in_channels, out_channels, 3, 2, bias=False, act=act))
+ stage.append(conv_layer)
+ layers_num += 1
+
+ if use_spp and arch in ['X']:
+ # in YOLOX use SPPLayer
+ spp_layer = self.add_sublayer(
+ 'layers{}.stage{}.spp_layer'.format(layers_num, i + 1),
+ SPPLayer(
+ out_channels,
+ out_channels,
+ kernel_sizes=spp_kernal_sizes,
+ bias=False,
+ act=act))
+ stage.append(spp_layer)
+ layers_num += 1
+
+ csp_layer = self.add_sublayer(
+ 'layers{}.stage{}.csp_layer'.format(layers_num, i + 1),
+ CSPLayer(
+ out_channels,
+ out_channels,
+ num_blocks=num_blocks,
+ shortcut=shortcut,
+ depthwise=depthwise,
+ bias=False,
+ act=act))
+ stage.append(csp_layer)
+ layers_num += 1
+
+ if use_spp and arch in ['P5', 'P6']:
+ # in latest YOLOv5 use SPPFLayer instead of SPPLayer
+ sppf_layer = self.add_sublayer(
+ 'layers{}.stage{}.sppf_layer'.format(layers_num, i + 1),
+ SPPFLayer(
+ out_channels,
+ out_channels,
+ ksize=5,
+ bias=False,
+ act=act))
+ stage.append(sppf_layer)
+ layers_num += 1
+
+ self.csp_dark_blocks.append(nn.Sequential(*stage))
+
+ self._out_channels = [_out_channels[i] for i in self.return_idx]
+ self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx]
+
+ def forward(self, inputs):
+ x = inputs['image']
+ outputs = []
+ x = self.stem(x)
+ for i, layer in enumerate(self.csp_dark_blocks):
+ x = layer(x)
+ if i + 1 in self.return_idx:
+ outputs.append(x)
+ return outputs
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=c, stride=s)
+ for c, s in zip(self._out_channels, self.strides)
+ ]
diff --git a/paddlers/models/ppdet/modeling/backbones/cspresnet.py b/paddlers/models/ppdet/modeling/backbones/cspresnet.py
new file mode 100644
index 0000000..f286c6d
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/backbones/cspresnet.py
@@ -0,0 +1,321 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Constant
+
+from paddlers.models.ppdet.modeling.ops import get_act_fn
+from paddlers.models.ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+
+__all__ = ['CSPResNet', 'BasicBlock', 'EffectiveSELayer', 'ConvBNLayer']
+
+
+class ConvBNLayer(nn.Layer):
+ def __init__(self,
+ ch_in,
+ ch_out,
+ filter_size=3,
+ stride=1,
+ groups=1,
+ padding=0,
+ act=None):
+ super(ConvBNLayer, self).__init__()
+
+ self.conv = nn.Conv2D(
+ in_channels=ch_in,
+ out_channels=ch_out,
+ kernel_size=filter_size,
+ stride=stride,
+ padding=padding,
+ groups=groups,
+ bias_attr=False)
+
+ self.bn = nn.BatchNorm2D(
+ ch_out,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ self.act = get_act_fn(act) if act is None or isinstance(act, (
+ str, dict)) else act
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ x = self.act(x)
+
+ return x
+
+
+class RepVggBlock(nn.Layer):
+ def __init__(self, ch_in, ch_out, act='relu', alpha=False):
+ super(RepVggBlock, self).__init__()
+ self.ch_in = ch_in
+ self.ch_out = ch_out
+ self.conv1 = ConvBNLayer(
+ ch_in, ch_out, 3, stride=1, padding=1, act=None)
+ self.conv2 = ConvBNLayer(
+ ch_in, ch_out, 1, stride=1, padding=0, act=None)
+ self.act = get_act_fn(act) if act is None or isinstance(act, (
+ str, dict)) else act
+ if alpha:
+ self.alpha = self.create_parameter(
+ shape=[1],
+ attr=ParamAttr(initializer=Constant(value=1.)),
+ dtype="float32")
+ else:
+ self.alpha = None
+
+ def forward(self, x):
+ if hasattr(self, 'conv'):
+ y = self.conv(x)
+ else:
+ if self.alpha:
+ y = self.conv1(x) + self.alpha * self.conv2(x)
+ else:
+ y = self.conv1(x) + self.conv2(x)
+ y = self.act(y)
+ return y
+
+ def convert_to_deploy(self):
+ if not hasattr(self, 'conv'):
+ self.conv = nn.Conv2D(
+ in_channels=self.ch_in,
+ out_channels=self.ch_out,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=1)
+ kernel, bias = self.get_equivalent_kernel_bias()
+ self.conv.weight.set_value(kernel)
+ self.conv.bias.set_value(bias)
+ self.__delattr__('conv1')
+ self.__delattr__('conv2')
+
+ def get_equivalent_kernel_bias(self):
+ kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
+ kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
+ if self.alpha:
+ return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor(
+ kernel1x1), bias3x3 + self.alpha * bias1x1
+ else:
+ return kernel3x3 + self._pad_1x1_to_3x3_tensor(
+ kernel1x1), bias3x3 + bias1x1
+
+ def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+ if kernel1x1 is None:
+ return 0
+ else:
+ return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
+
+ def _fuse_bn_tensor(self, branch):
+ if branch is None:
+ return 0, 0
+ kernel = branch.conv.weight
+ running_mean = branch.bn._mean
+ running_var = branch.bn._variance
+ gamma = branch.bn.weight
+ beta = branch.bn.bias
+ eps = branch.bn._epsilon
+ std = (running_var + eps).sqrt()
+ t = (gamma / std).reshape((-1, 1, 1, 1))
+ return kernel * t, beta - running_mean * gamma / std
+
+
+class BasicBlock(nn.Layer):
+ def __init__(self,
+ ch_in,
+ ch_out,
+ act='relu',
+ shortcut=True,
+ use_alpha=False):
+ super(BasicBlock, self).__init__()
+ assert ch_in == ch_out
+ self.conv1 = ConvBNLayer(ch_in, ch_out, 3, stride=1, padding=1, act=act)
+ self.conv2 = RepVggBlock(ch_out, ch_out, act=act, alpha=use_alpha)
+ self.shortcut = shortcut
+
+ def forward(self, x):
+ y = self.conv1(x)
+ y = self.conv2(y)
+ if self.shortcut:
+ return paddle.add(x, y)
+ else:
+ return y
+
+
+class EffectiveSELayer(nn.Layer):
+ """ Effective Squeeze-Excitation
+ From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
+ """
+
+ def __init__(self, channels, act='hardsigmoid'):
+ super(EffectiveSELayer, self).__init__()
+ self.fc = nn.Conv2D(channels, channels, kernel_size=1, padding=0)
+ self.act = get_act_fn(act) if act is None or isinstance(act, (
+ str, dict)) else act
+
+ def forward(self, x):
+ x_se = x.mean((2, 3), keepdim=True)
+ x_se = self.fc(x_se)
+ return x * self.act(x_se)
+
+
+class CSPResStage(nn.Layer):
+ def __init__(self,
+ block_fn,
+ ch_in,
+ ch_out,
+ n,
+ stride,
+ act='relu',
+ attn='eca',
+ use_alpha=False):
+ super(CSPResStage, self).__init__()
+
+ ch_mid = (ch_in + ch_out) // 2
+ if stride == 2:
+ self.conv_down = ConvBNLayer(
+ ch_in, ch_mid, 3, stride=2, padding=1, act=act)
+ else:
+ self.conv_down = None
+ self.conv1 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
+ self.conv2 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
+ self.blocks = nn.Sequential(*[
+ block_fn(
+ ch_mid // 2,
+ ch_mid // 2,
+ act=act,
+ shortcut=True,
+ use_alpha=use_alpha) for i in range(n)
+ ])
+ if attn:
+ self.attn = EffectiveSELayer(ch_mid, act='hardsigmoid')
+ else:
+ self.attn = None
+
+ self.conv3 = ConvBNLayer(ch_mid, ch_out, 1, act=act)
+
+ def forward(self, x):
+ if self.conv_down is not None:
+ x = self.conv_down(x)
+ y1 = self.conv1(x)
+ y2 = self.blocks(self.conv2(x))
+ y = paddle.concat([y1, y2], axis=1)
+ if self.attn is not None:
+ y = self.attn(y)
+ y = self.conv3(y)
+ return y
+
+
+@register
+@serializable
+class CSPResNet(nn.Layer):
+ __shared__ = ['width_mult', 'depth_mult', 'trt']
+
+ def __init__(self,
+ layers=[3, 6, 6, 3],
+ channels=[64, 128, 256, 512, 1024],
+ act='swish',
+ return_idx=[1, 2, 3],
+ depth_wise=False,
+ use_large_stem=False,
+ width_mult=1.0,
+ depth_mult=1.0,
+ trt=False,
+ use_checkpoint=False,
+ use_alpha=False,
+ **args):
+ super(CSPResNet, self).__init__()
+ self.use_checkpoint = use_checkpoint
+ channels = [max(round(c * width_mult), 1) for c in channels]
+ layers = [max(round(l * depth_mult), 1) for l in layers]
+ act = get_act_fn(
+ act, trt=trt) if act is None or isinstance(act,
+ (str, dict)) else act
+
+ if use_large_stem:
+ self.stem = nn.Sequential(
+ ('conv1', ConvBNLayer(
+ 3, channels[0] // 2, 3, stride=2, padding=1, act=act)),
+ ('conv2', ConvBNLayer(
+ channels[0] // 2,
+ channels[0] // 2,
+ 3,
+ stride=1,
+ padding=1,
+ act=act)), ('conv3', ConvBNLayer(
+ channels[0] // 2,
+ channels[0],
+ 3,
+ stride=1,
+ padding=1,
+ act=act)))
+ else:
+ self.stem = nn.Sequential(
+ ('conv1', ConvBNLayer(
+ 3, channels[0] // 2, 3, stride=2, padding=1, act=act)),
+ ('conv2', ConvBNLayer(
+ channels[0] // 2,
+ channels[0],
+ 3,
+ stride=1,
+ padding=1,
+ act=act)))
+
+ n = len(channels) - 1
+ self.stages = nn.Sequential(*[(str(i), CSPResStage(
+ BasicBlock,
+ channels[i],
+ channels[i + 1],
+ layers[i],
+ 2,
+ act=act,
+ use_alpha=use_alpha)) for i in range(n)])
+
+ self._out_channels = channels[1:]
+ self._out_strides = [4 * 2**i for i in range(n)]
+ self.return_idx = return_idx
+ if use_checkpoint:
+ paddle.seed(0)
+
+ def forward(self, inputs):
+ x = inputs['image']
+ x = self.stem(x)
+ outs = []
+ for idx, stage in enumerate(self.stages):
+ if self.use_checkpoint and self.training:
+ x = paddle.distributed.fleet.utils.recompute(
+ stage, x, **{"preserve_rng_state": True})
+ else:
+ x = stage(x)
+ if idx in self.return_idx:
+ outs.append(x)
+
+ return outs
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=self._out_channels[i], stride=self._out_strides[i])
+ for i in self.return_idx
+ ]
diff --git a/paddlers/models/ppdet/modeling/backbones/darknet.py b/paddlers/models/ppdet/modeling/backbones/darknet.py
old mode 100644
new mode 100755
index 160395e..f730e05
--- a/paddlers/models/ppdet/modeling/backbones/darknet.py
+++ b/paddlers/models/ppdet/modeling/backbones/darknet.py
@@ -77,8 +77,8 @@ class ConvBNLayer(nn.Layer):
out = self.batch_norm(out)
if self.act == 'leaky':
out = F.leaky_relu(out, 0.1)
- elif self.act == 'mish':
- out = mish(out)
+ else:
+ out = getattr(F, self.act)(out)
return out
@@ -149,9 +149,14 @@ class BasicBlock(nn.Layer):
super(BasicBlock, self).__init__()
+ assert ch_in == ch_out and (ch_in % 2) == 0, \
+ f"ch_in and ch_out should be the same even int, but the input \'ch_in is {ch_in}, \'ch_out is {ch_out}"
+ # example:
+ # --------------{conv1} --> {conv2}
+ # channel route: 10-->5 --> 5-->10
self.conv1 = ConvBNLayer(
ch_in=ch_in,
- ch_out=ch_out,
+ ch_out=int(ch_out / 2),
filter_size=1,
stride=1,
padding=0,
@@ -160,8 +165,8 @@ class BasicBlock(nn.Layer):
freeze_norm=freeze_norm,
data_format=data_format)
self.conv2 = ConvBNLayer(
- ch_in=ch_out,
- ch_out=ch_out * 2,
+ ch_in=int(ch_out / 2),
+ ch_out=ch_out,
filter_size=3,
stride=1,
padding=1,
@@ -215,7 +220,7 @@ class Blocks(nn.Layer):
res_out = self.add_sublayer(
block_name,
BasicBlock(
- ch_out * 2,
+ ch_out,
ch_out,
norm_type=norm_type,
norm_decay=norm_decay,
@@ -296,7 +301,7 @@ class DarkNet(nn.Layer):
name,
Blocks(
int(ch_in[i]),
- 32 * (2**i),
+ int(ch_in[i]),
stage,
norm_type=norm_type,
norm_decay=norm_decay,
@@ -305,14 +310,14 @@ class DarkNet(nn.Layer):
name=name))
self.darknet_conv_block_list.append(conv_block)
if i in return_idx:
- self._out_channels.append(64 * (2**i))
+ self._out_channels.append(int(ch_in[i]))
for i in range(num_stages - 1):
down_name = 'stage.{}.downsample'.format(i)
downsample = self.add_sublayer(
down_name,
DownSample(
- ch_in=32 * (2**(i + 1)),
- ch_out=32 * (2**(i + 2)),
+ ch_in=int(ch_in[i]),
+ ch_out=int(ch_in[i + 1]),
norm_type=norm_type,
norm_decay=norm_decay,
freeze_norm=freeze_norm,
diff --git a/paddlers/models/ppdet/modeling/backbones/dla.py b/paddlers/models/ppdet/modeling/backbones/dla.py
old mode 100644
new mode 100755
index 17966fa..70c52a8
--- a/paddlers/models/ppdet/modeling/backbones/dla.py
+++ b/paddlers/models/ppdet/modeling/backbones/dla.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
diff --git a/paddlers/models/ppdet/modeling/backbones/esnet.py b/paddlers/models/ppdet/modeling/backbones/esnet.py
index 9a18d9b..fb13330 100644
--- a/paddlers/models/ppdet/modeling/backbones/esnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/esnet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
from paddle.nn.initializer import KaimingNormal
from paddle.regularizer import L2Decay
diff --git a/paddlers/models/ppdet/modeling/backbones/ghostnet.py b/paddlers/models/ppdet/modeling/backbones/ghostnet.py
index 4236f04..ef4ac8a 100644
--- a/paddlers/models/ppdet/modeling/backbones/ghostnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/ghostnet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -299,17 +299,17 @@ class GhostBottleneck(nn.Layer):
class GhostNet(nn.Layer):
__shared__ = ['norm_type']
- def __init__(self,
- scale=1.3,
- feature_maps=[6, 12, 15],
- with_extra_blocks=False,
- extra_block_filters=[[256, 512], [128, 256], [128, 256],
- [64, 128]],
- lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
- conv_decay=0.,
- norm_type='bn',
- norm_decay=0.0,
- freeze_norm=False):
+ def __init__(
+ self,
+ scale=1.3,
+ feature_maps=[6, 12, 15],
+ with_extra_blocks=False,
+ extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
+ lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+ conv_decay=0.,
+ norm_type='bn',
+ norm_decay=0.0,
+ freeze_norm=False):
super(GhostNet, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
diff --git a/paddlers/models/ppdet/modeling/backbones/hardnet.py b/paddlers/models/ppdet/modeling/backbones/hardnet.py
index 71c5a09..87a2c51 100644
--- a/paddlers/models/ppdet/modeling/backbones/hardnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/hardnet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -146,7 +146,7 @@ class HarDBlock(nn.Layer):
class HarDNet(nn.Layer):
def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85):
super(HarDNet, self).__init__()
- assert arch in [39, 68, 85], "HarDNet-{} not support.".format(arch)
+ assert arch in [68, 85], "HarDNet-{} is not supported.".format(arch)
if arch == 85:
first_ch = [48, 96]
second_kernel = 3
@@ -161,6 +161,8 @@ class HarDNet(nn.Layer):
grmul = 1.7
gr = [14, 16, 20, 40]
n_layers = [8, 16, 16, 16]
+ else:
+ raise ValueError("HarDNet-{} is not supported.".format(arch))
self.return_idx = return_idx
self._out_channels = [96, 214, 458, 784]
diff --git a/paddlers/models/ppdet/modeling/backbones/lcnet.py b/paddlers/models/ppdet/modeling/backbones/lcnet.py
index 3ac51ae..90bbcc8 100644
--- a/paddlers/models/ppdet/modeling/backbones/lcnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/lcnet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -68,7 +68,8 @@ class ConvBNLayer(nn.Layer):
filter_size,
num_filters,
stride,
- num_groups=1):
+ num_groups=1,
+ act='hard_swish'):
super().__init__()
self.conv = Conv2D(
@@ -85,12 +86,15 @@ class ConvBNLayer(nn.Layer):
num_filters,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
- self.hardswish = nn.Hardswish()
+ if act == 'hard_swish':
+ self.act = nn.Hardswish()
+ elif act == 'relu6':
+ self.act = nn.ReLU6()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
- x = self.hardswish(x)
+ x = self.act(x)
return x
@@ -100,7 +104,8 @@ class DepthwiseSeparable(nn.Layer):
num_filters,
stride,
dw_size=3,
- use_se=False):
+ use_se=False,
+ act='hard_swish'):
super().__init__()
self.use_se = use_se
self.dw_conv = ConvBNLayer(
@@ -108,14 +113,16 @@ class DepthwiseSeparable(nn.Layer):
num_filters=num_channels,
filter_size=dw_size,
stride=stride,
- num_groups=num_channels)
+ num_groups=num_channels,
+ act=act)
if use_se:
self.se = SEModule(num_channels)
self.pw_conv = ConvBNLayer(
num_channels=num_channels,
filter_size=1,
num_filters=num_filters,
- stride=1)
+ stride=1,
+ act=act)
def forward(self, x):
x = self.dw_conv(x)
@@ -158,7 +165,7 @@ class SEModule(nn.Layer):
@register
@serializable
class LCNet(nn.Layer):
- def __init__(self, scale=1.0, feature_maps=[3, 4, 5]):
+ def __init__(self, scale=1.0, feature_maps=[3, 4, 5], act='hard_swish'):
super().__init__()
self.scale = scale
self.feature_maps = feature_maps
@@ -169,7 +176,8 @@ class LCNet(nn.Layer):
num_channels=3,
filter_size=3,
num_filters=make_divisible(16 * scale),
- stride=2)
+ stride=2,
+ act=act)
self.blocks2 = nn.Sequential(*[
DepthwiseSeparable(
@@ -177,7 +185,8 @@ class LCNet(nn.Layer):
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
- use_se=se)
+ use_se=se,
+ act=act)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
])
@@ -187,7 +196,8 @@ class LCNet(nn.Layer):
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
- use_se=se)
+ use_se=se,
+ act=act)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
])
@@ -200,7 +210,8 @@ class LCNet(nn.Layer):
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
- use_se=se)
+ use_se=se,
+ act=act)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
])
@@ -213,7 +224,8 @@ class LCNet(nn.Layer):
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
- use_se=se)
+ use_se=se,
+ act=act)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
])
@@ -226,7 +238,8 @@ class LCNet(nn.Layer):
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
- use_se=se)
+ use_se=se,
+ act=act)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
])
diff --git a/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py b/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
index 0e8b5e1..a839efe 100644
--- a/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py b/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
index c7e75bb..be4e7e9 100644
--- a/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -282,19 +282,19 @@ class ExtraBlockDW(nn.Layer):
class MobileNetV3(nn.Layer):
__shared__ = ['norm_type']
- def __init__(self,
- scale=1.0,
- model_name="large",
- feature_maps=[6, 12, 15],
- with_extra_blocks=False,
- extra_block_filters=[[256, 512], [128, 256], [128, 256],
- [64, 128]],
- lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
- conv_decay=0.0,
- multiplier=1.0,
- norm_type='bn',
- norm_decay=0.0,
- freeze_norm=False):
+ def __init__(
+ self,
+ scale=1.0,
+ model_name="large",
+ feature_maps=[6, 12, 15],
+ with_extra_blocks=False,
+ extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
+ lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+ conv_decay=0.0,
+ multiplier=1.0,
+ norm_type='bn',
+ norm_decay=0.0,
+ freeze_norm=False):
super(MobileNetV3, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
diff --git a/paddlers/models/ppdet/modeling/backbones/mobileone.py b/paddlers/models/ppdet/modeling/backbones/mobileone.py
new file mode 100644
index 0000000..fe09e45
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/backbones/mobileone.py
@@ -0,0 +1,266 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is the paddle implementation of MobileOne block, see: https://arxiv.org/pdf/2206.04040.pdf.
+Some codes are based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
+Ths copyright of microsoft/Swin-Transformer is as follows:
+MIT License [see LICENSE for details]
+"""
+
+import paddle
+import paddle.nn as nn
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant
+
+from paddlers.models.ppdet.modeling.ops import get_act_fn
+from paddlers.models.ppdet.modeling.layers import ConvNormLayer
+
+
+class MobileOneBlock(nn.Layer):
+ def __init__(
+ self,
+ ch_in,
+ ch_out,
+ stride,
+ kernel_size,
+ conv_num=1,
+ norm_type='bn',
+ norm_decay=0.,
+ norm_groups=32,
+ bias_on=False,
+ lr_scale=1.,
+ freeze_norm=False,
+ initializer=Normal(
+ mean=0., std=0.01),
+ skip_quant=False,
+ act='relu', ):
+ super(MobileOneBlock, self).__init__()
+
+ self.ch_in = ch_in
+ self.ch_out = ch_out
+ self.kernel_size = kernel_size
+ self.stride = stride
+ self.padding = (kernel_size - 1) // 2
+ self.k = conv_num
+
+ self.depth_conv = nn.LayerList()
+ self.point_conv = nn.LayerList()
+ for _ in range(self.k):
+ self.depth_conv.append(
+ ConvNormLayer(
+ ch_in,
+ ch_in,
+ kernel_size,
+ stride=stride,
+ groups=ch_in,
+ norm_type=norm_type,
+ norm_decay=norm_decay,
+ norm_groups=norm_groups,
+ bias_on=bias_on,
+ lr_scale=lr_scale,
+ freeze_norm=freeze_norm,
+ initializer=initializer,
+ skip_quant=skip_quant))
+ self.point_conv.append(
+ ConvNormLayer(
+ ch_in,
+ ch_out,
+ 1,
+ stride=1,
+ groups=1,
+ norm_type=norm_type,
+ norm_decay=norm_decay,
+ norm_groups=norm_groups,
+ bias_on=bias_on,
+ lr_scale=lr_scale,
+ freeze_norm=freeze_norm,
+ initializer=initializer,
+ skip_quant=skip_quant))
+ self.rbr_1x1 = ConvNormLayer(
+ ch_in,
+ ch_in,
+ 1,
+ stride=self.stride,
+ groups=ch_in,
+ norm_type=norm_type,
+ norm_decay=norm_decay,
+ norm_groups=norm_groups,
+ bias_on=bias_on,
+ lr_scale=lr_scale,
+ freeze_norm=freeze_norm,
+ initializer=initializer,
+ skip_quant=skip_quant)
+ self.rbr_identity_st1 = nn.BatchNorm2D(
+ num_features=ch_in,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(
+ 0.0))) if ch_in == ch_out and self.stride == 1 else None
+ self.rbr_identity_st2 = nn.BatchNorm2D(
+ num_features=ch_out,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(
+ 0.0))) if ch_in == ch_out and self.stride == 1 else None
+ self.act = get_act_fn(act) if act is None or isinstance(act, (
+ str, dict)) else act
+
+ def forward(self, x):
+ if hasattr(self, "conv1") and hasattr(self, "conv2"):
+ y = self.act(self.conv2(self.act(self.conv1(x))))
+ else:
+ if self.rbr_identity_st1 is None:
+ id_out_st1 = 0
+ else:
+ id_out_st1 = self.rbr_identity_st1(x)
+
+ x1_1 = 0
+ for i in range(self.k):
+ x1_1 += self.depth_conv[i](x)
+
+ x1_2 = self.rbr_1x1(x)
+ x1 = self.act(x1_1 + x1_2 + id_out_st1)
+
+ if self.rbr_identity_st2 is None:
+ id_out_st2 = 0
+ else:
+ id_out_st2 = self.rbr_identity_st2(x1)
+
+ x2_1 = 0
+ for i in range(self.k):
+ x2_1 += self.point_conv[i](x1)
+ y = self.act(x2_1 + id_out_st2)
+
+ return y
+
+ def convert_to_deploy(self):
+ if not hasattr(self, 'conv1'):
+ self.conv1 = nn.Conv2D(
+ in_channels=self.ch_in,
+ out_channels=self.ch_in,
+ kernel_size=self.kernel_size,
+ stride=self.stride,
+ padding=self.padding,
+ groups=self.ch_in,
+ bias_attr=ParamAttr(
+ initializer=Constant(value=0.), learning_rate=1.))
+ if not hasattr(self, 'conv2'):
+ self.conv2 = nn.Conv2D(
+ in_channels=self.ch_in,
+ out_channels=self.ch_out,
+ kernel_size=1,
+ stride=1,
+ padding='SAME',
+ groups=1,
+ bias_attr=ParamAttr(
+ initializer=Constant(value=0.), learning_rate=1.))
+
+ conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias(
+ )
+ self.conv1.weight.set_value(conv1_kernel)
+ self.conv1.bias.set_value(conv1_bias)
+ self.conv2.weight.set_value(conv2_kernel)
+ self.conv2.bias.set_value(conv2_bias)
+ self.__delattr__('depth_conv')
+ self.__delattr__('point_conv')
+ self.__delattr__('rbr_1x1')
+ if hasattr(self, 'rbr_identity_st1'):
+ self.__delattr__('rbr_identity_st1')
+ if hasattr(self, 'rbr_identity_st2'):
+ self.__delattr__('rbr_identity_st2')
+
+ def get_equivalent_kernel_bias(self):
+ st1_kernel3x3, st1_bias3x3 = self._fuse_bn_tensor(self.depth_conv)
+ st1_kernel1x1, st1_bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
+ st1_kernelid, st1_biasid = self._fuse_bn_tensor(
+ self.rbr_identity_st1, kernel_size=self.kernel_size)
+
+ st2_kernel1x1, st2_bias1x1 = self._fuse_bn_tensor(self.point_conv)
+ st2_kernelid, st2_biasid = self._fuse_bn_tensor(
+ self.rbr_identity_st2, kernel_size=1)
+
+ conv1_kernel = st1_kernel3x3 + self._pad_1x1_to_3x3_tensor(
+ st1_kernel1x1) + st1_kernelid
+
+ conv1_bias = st1_bias3x3 + st1_bias1x1 + st1_biasid
+
+ conv2_kernel = st2_kernel1x1 + st2_kernelid
+ conv2_bias = st2_bias1x1 + st2_biasid
+
+ return conv1_kernel, conv1_bias, conv2_kernel, conv2_bias
+
+ def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+ if kernel1x1 is None:
+ return 0
+ else:
+ padding_size = (self.kernel_size - 1) // 2
+ return nn.functional.pad(
+ kernel1x1,
+ [padding_size, padding_size, padding_size, padding_size])
+
+ def _fuse_bn_tensor(self, branch, kernel_size=3):
+ if branch is None:
+ return 0, 0
+
+ if isinstance(branch, nn.LayerList):
+ fused_kernels = []
+ fused_bias = []
+ for block in branch:
+ kernel = block.conv.weight
+ running_mean = block.norm._mean
+ running_var = block.norm._variance
+ gamma = block.norm.weight
+ beta = block.norm.bias
+ eps = block.norm._epsilon
+
+ std = (running_var + eps).sqrt()
+ t = (gamma / std).reshape((-1, 1, 1, 1))
+
+ fused_kernels.append(kernel * t)
+ fused_bias.append(beta - running_mean * gamma / std)
+
+ return sum(fused_kernels), sum(fused_bias)
+
+ elif isinstance(branch, ConvNormLayer):
+ kernel = branch.conv.weight
+ running_mean = branch.norm._mean
+ running_var = branch.norm._variance
+ gamma = branch.norm.weight
+ beta = branch.norm.bias
+ eps = branch.norm._epsilon
+ else:
+ assert isinstance(branch, nn.BatchNorm2D)
+ input_dim = self.ch_in if kernel_size == 1 else 1
+ kernel_value = paddle.zeros(
+ shape=[self.ch_in, input_dim, kernel_size, kernel_size],
+ dtype='float32')
+ if kernel_size > 1:
+ for i in range(self.ch_in):
+ kernel_value[i, i % input_dim, (kernel_size - 1) // 2, (
+ kernel_size - 1) // 2] = 1
+ elif kernel_size == 1:
+ for i in range(self.ch_in):
+ kernel_value[i, i % input_dim, 0, 0] = 1
+ else:
+ raise ValueError("Invalid kernel size recieved!")
+ kernel = paddle.to_tensor(kernel_value, place=branch.weight.place)
+ running_mean = branch._mean
+ running_var = branch._variance
+ gamma = branch.weight
+ beta = branch.bias
+ eps = branch._epsilon
+
+ std = (running_var + eps).sqrt()
+ t = (gamma / std).reshape((-1, 1, 1, 1))
+
+ return kernel * t, beta - running_mean * gamma / std
diff --git a/paddlers/models/ppdet/modeling/backbones/resnet.py b/paddlers/models/ppdet/modeling/backbones/resnet.py
old mode 100644
new mode 100755
index 9287cd4..d46a664
--- a/paddlers/models/ppdet/modeling/backbones/resnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/resnet.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import math
@@ -446,13 +446,13 @@ class ResNet(nn.Layer):
std_senet=False):
"""
Residual Network, see https://arxiv.org/abs/1512.03385
-
+
Args:
depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
ch_in (int): output channel of first stage, default 64
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
- lower learning rate ratio is need for pretrained model
+ lower learning rate ratio is need for pretrained model
got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
groups (int): group convolution cardinality
base_width (int): base width of each group convolution
diff --git a/paddlers/models/ppdet/modeling/backbones/senet.py b/paddlers/models/ppdet/modeling/backbones/senet.py
index d83dc42..de61e85 100644
--- a/paddlers/models/ppdet/modeling/backbones/senet.py
+++ b/paddlers/models/ppdet/modeling/backbones/senet.py
@@ -1,21 +1,23 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn as nn
from paddlers.models.ppdet.core.workspace import register, serializable
from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
+from ..shape_spec import ShapeSpec
+from .name_adapter import NameAdapter
__all__ = ['SENet', 'SERes5Head']
@@ -41,12 +43,12 @@ class SENet(ResNet):
num_stages=4):
"""
Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
-
+
Args:
depth (int): SENet depth, should be 50, 101, 152
variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
- lower learning rate ratio is need for pretrained model
+ lower learning rate ratio is need for pretrained model
got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
groups (int): group convolution cardinality
base_width (int): base width of each group convolution
@@ -103,7 +105,7 @@ class SERes5Head(nn.Layer):
norm_decay (float): weight decay for normalization layer weights
dcn_v2_stages (list): index of stages who select deformable conv v2
std_senet (bool): whether use senet, default True
-
+
"""
super(SERes5Head, self).__init__()
ch_out = 512
diff --git a/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py b/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
index be3b86f..ce9a82d 100644
--- a/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
+++ b/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -188,11 +188,10 @@ class ShuffleNetV2(nn.Layer):
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
- stage_out_channels = [-1, 24, 224, 488, 976, 2048]
+ stage_out_channels = [-1, 24, 244, 488, 976, 2048]
else:
raise NotImplementedError("This scale size:[" + str(scale) +
"] is not implemented!")
-
self._out_channels = []
self._feature_idx = 0
# 1. conv1
diff --git a/paddlers/models/ppdet/modeling/backbones/swin_transformer.py b/paddlers/models/ppdet/modeling/backbones/swin_transformer.py
index b35fe71..616964f 100644
--- a/paddlers/models/ppdet/modeling/backbones/swin_transformer.py
+++ b/paddlers/models/ppdet/modeling/backbones/swin_transformer.py
@@ -20,62 +20,13 @@ MIT License [see LICENSE for details]
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
-from paddle.nn.initializer import TruncatedNormal, Constant, Assign
from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
from paddlers.models.ppdet.core.workspace import register, serializable
import numpy as np
-# Common initializations
-ones_ = Constant(value=1.)
-zeros_ = Constant(value=0.)
-trunc_normal_ = TruncatedNormal(std=.02)
-
-
-# Common Functions
-def to_2tuple(x):
- return tuple([x] * 2)
-
-
-def add_parameter(layer, datas, name=None):
- parameter = layer.create_parameter(
- shape=(datas.shape), default_initializer=Assign(datas))
- if name:
- layer.add_parameter(name, parameter)
- return parameter
-
-
-# Common Layers
-def drop_path(x, drop_prob=0., training=False):
- """
- Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
- the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
- See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
- """
- if drop_prob == 0. or not training:
- return x
- keep_prob = paddle.to_tensor(1 - drop_prob)
- shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
- random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
- random_tensor = paddle.floor(random_tensor) # binarize
- output = x.divide(keep_prob) * random_tensor
- return output
-
-
-class DropPath(nn.Layer):
- def __init__(self, drop_prob=None):
- super(DropPath, self).__init__()
- self.drop_prob = drop_prob
-
- def forward(self, x):
- return drop_path(x, self.drop_prob, self.training)
-
-
-class Identity(nn.Layer):
- def __init__(self):
- super(Identity, self).__init__()
-
- def forward(self, input):
- return input
+from .transformer_utils import DropPath, Identity
+from .transformer_utils import add_parameter, to_2tuple
+from .transformer_utils import ones_, zeros_, trunc_normal_
class Mlp(nn.Layer):
@@ -112,7 +63,7 @@ def window_partition(x, window_size):
"""
B, H, W, C = x.shape
x = x.reshape(
- [B, H // window_size, window_size, W // window_size, window_size, C])
+ [-1, H // window_size, window_size, W // window_size, window_size, C])
windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
[-1, window_size, window_size, C])
return windows
@@ -128,10 +79,11 @@ def window_reverse(windows, window_size, H, W):
Returns:
x: (B, H, W, C)
"""
+ _, _, _, C = windows.shape
B = int(windows.shape[0] / (H * W / window_size / window_size))
x = windows.reshape(
- [B, H // window_size, W // window_size, window_size, window_size, -1])
- x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1])
+ [-1, H // window_size, W // window_size, window_size, window_size, C])
+ x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C])
return x
@@ -206,14 +158,14 @@ class WindowAttention(nn.Layer):
"""
B_, N, C = x.shape
qkv = self.qkv(x).reshape(
- [B_, N, 3, self.num_heads, C // self.num_heads]).transpose(
+ [-1, N, 3, self.num_heads, C // self.num_heads]).transpose(
[2, 0, 3, 1, 4])
q, k, v = qkv[0], qkv[1], qkv[2]
q = q * self.scale
attn = paddle.mm(q, k.transpose([0, 1, 3, 2]))
- index = self.relative_position_index.reshape([-1])
+ index = self.relative_position_index.flatten()
relative_position_bias = paddle.index_select(
self.relative_position_bias_table, index)
@@ -227,7 +179,7 @@ class WindowAttention(nn.Layer):
if mask is not None:
nW = mask.shape[0]
- attn = attn.reshape([B_ // nW, nW, self.num_heads, N, N
+ attn = attn.reshape([-1, nW, self.num_heads, N, N
]) + mask.unsqueeze(1).unsqueeze(0)
attn = attn.reshape([-1, self.num_heads, N, N])
attn = self.softmax(attn)
@@ -237,7 +189,7 @@ class WindowAttention(nn.Layer):
attn = self.attn_drop(attn)
# x = (attn @ v).transpose(1, 2).reshape([B_, N, C])
- x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([B_, N, C])
+ x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([-1, N, C])
x = self.proj(x)
x = self.proj_drop(x)
return x
@@ -315,7 +267,7 @@ class SwinTransformerBlock(nn.Layer):
shortcut = x
x = self.norm1(x)
- x = x.reshape([B, H, W, C])
+ x = x.reshape([-1, H, W, C])
# pad feature maps to multiples of window size
pad_l = pad_t = 0
@@ -337,7 +289,7 @@ class SwinTransformerBlock(nn.Layer):
x_windows = window_partition(
shifted_x, self.window_size) # nW*B, window_size, window_size, C
x_windows = x_windows.reshape(
- [-1, self.window_size * self.window_size,
+ [x_windows.shape[0], self.window_size * self.window_size,
C]) # nW*B, window_size*window_size, C
# W-MSA/SW-MSA
@@ -346,7 +298,7 @@ class SwinTransformerBlock(nn.Layer):
# merge windows
attn_windows = attn_windows.reshape(
- [-1, self.window_size, self.window_size, C])
+ [x_windows.shape[0], self.window_size, self.window_size, C])
shifted_x = window_reverse(attn_windows, self.window_size, Hp,
Wp) # B H' W' C
@@ -362,7 +314,7 @@ class SwinTransformerBlock(nn.Layer):
if pad_r > 0 or pad_b > 0:
x = x[:, :H, :W, :]
- x = x.reshape([B, H * W, C])
+ x = x.reshape([-1, H * W, C])
# FFN
x = shortcut + self.drop_path(x)
@@ -393,7 +345,7 @@ class PatchMerging(nn.Layer):
B, L, C = x.shape
assert L == H * W, "input feature has wrong size"
- x = x.reshape([B, H, W, C])
+ x = x.reshape([-1, H, W, C])
# padding
pad_input = (H % 2 == 1) or (W % 2 == 1)
@@ -405,7 +357,7 @@ class PatchMerging(nn.Layer):
x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C
x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C
x = paddle.concat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C
- x = x.reshape([B, H * W // 4, 4 * C]) # B H/2*W/2 4*C
+ x = x.reshape([-1, H * W // 4, 4 * C]) # B H/2*W/2 4*C
x = self.norm(x)
x = self.reduction(x)
@@ -482,8 +434,7 @@ class BasicLayer(nn.Layer):
# calculate attention mask for SW-MSA
Hp = int(np.ceil(H / self.window_size)) * self.window_size
Wp = int(np.ceil(W / self.window_size)) * self.window_size
- img_mask = paddle.fluid.layers.zeros(
- [1, Hp, Wp, 1], dtype='float32') # 1 Hp Wp 1
+ img_mask = paddle.zeros([1, Hp, Wp, 1], dtype='float32') # 1 Hp Wp 1
h_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
@@ -688,10 +639,10 @@ class SwinTransformer(nn.Layer):
if self.frozen_stages >= 0:
self.patch_embed.eval()
for param in self.patch_embed.parameters():
- param.requires_grad = False
+ param.stop_gradient = True
if self.frozen_stages >= 1 and self.ape:
- self.absolute_pos_embed.requires_grad = False
+ self.absolute_pos_embed.stop_gradient = True
if self.frozen_stages >= 2:
self.pos_drop.eval()
@@ -699,7 +650,7 @@ class SwinTransformer(nn.Layer):
m = self.layers[i]
m.eval()
for param in m.parameters():
- param.requires_grad = False
+ param.stop_gradient = True
def _init_weights(self, m):
if isinstance(m, nn.Linear):
@@ -713,7 +664,7 @@ class SwinTransformer(nn.Layer):
def forward(self, x):
"""Forward function."""
x = self.patch_embed(x['image'])
- _, _, Wh, Ww = x.shape
+ B, _, Wh, Ww = x.shape
if self.ape:
# interpolate the position embedding to the corresponding size
absolute_pos_embed = F.interpolate(
diff --git a/paddlers/models/ppdet/modeling/backbones/transformer_utils.py b/paddlers/models/ppdet/modeling/backbones/transformer_utils.py
new file mode 100644
index 0000000..bc10652
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/backbones/transformer_utils.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+
+from paddle.nn.initializer import TruncatedNormal, Constant, Assign
+
+# Common initializations
+ones_ = Constant(value=1.)
+zeros_ = Constant(value=0.)
+trunc_normal_ = TruncatedNormal(std=.02)
+
+
+# Common Layers
+def drop_path(x, drop_prob=0., training=False):
+ """
+ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+ See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+ """
+ if drop_prob == 0. or not training:
+ return x
+ keep_prob = paddle.to_tensor(1 - drop_prob)
+ shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
+ random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+ random_tensor = paddle.floor(random_tensor) # binarize
+ output = x.divide(keep_prob) * random_tensor
+ return output
+
+
+class DropPath(nn.Layer):
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+
+class Identity(nn.Layer):
+ def __init__(self):
+ super(Identity, self).__init__()
+
+ def forward(self, input):
+ return input
+
+
+# common funcs
+
+
+def to_2tuple(x):
+ if isinstance(x, (list, tuple)):
+ return x
+ return tuple([x] * 2)
+
+
+def add_parameter(layer, datas, name=None):
+ parameter = layer.create_parameter(
+ shape=(datas.shape), default_initializer=Assign(datas))
+ if name:
+ layer.add_parameter(name, parameter)
+ return parameter
diff --git a/paddlers/models/ppdet/modeling/backbones/vgg.py b/paddlers/models/ppdet/modeling/backbones/vgg.py
old mode 100644
new mode 100755
index 1b9e19a..10ddd7b
--- a/paddlers/models/ppdet/modeling/backbones/vgg.py
+++ b/paddlers/models/ppdet/modeling/backbones/vgg.py
@@ -168,9 +168,9 @@ class VGG(nn.Layer):
self.norms = []
for i, n in enumerate(self.normalizations):
if n != -1:
- norm = self.add_sublayer(
- "norm{}".format(i),
- L2NormScale(self.extra_block_filters[i][1], n))
+ norm = self.add_sublayer("norm{}".format(i),
+ L2NormScale(
+ self.extra_block_filters[i][1], n))
else:
norm = None
self.norms.append(norm)
diff --git a/paddlers/models/ppdet/modeling/backbones/vision_transformer.py b/paddlers/models/ppdet/modeling/backbones/vision_transformer.py
new file mode 100644
index 0000000..586b6f2
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/backbones/vision_transformer.py
@@ -0,0 +1,634 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import numpy as np
+from paddle.nn.initializer import Constant
+
+from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
+from paddlers.models.ppdet.core.workspace import register, serializable
+
+from .transformer_utils import zeros_, DropPath, Identity
+
+
+class Mlp(nn.Layer):
+ def __init__(self,
+ in_features,
+ hidden_features=None,
+ out_features=None,
+ act_layer=nn.GELU,
+ drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.drop(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+
+class Attention(nn.Layer):
+ def __init__(self,
+ dim,
+ num_heads=8,
+ qkv_bias=False,
+ qk_scale=None,
+ attn_drop=0.,
+ proj_drop=0.,
+ window_size=None):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.scale = qk_scale or head_dim**-0.5
+
+ self.qkv = nn.Linear(dim, dim * 3, bias_attr=False)
+
+ if qkv_bias:
+ self.q_bias = self.create_parameter(
+ shape=([dim]), default_initializer=zeros_)
+ self.v_bias = self.create_parameter(
+ shape=([dim]), default_initializer=zeros_)
+ else:
+ self.q_bias = None
+ self.v_bias = None
+ if window_size:
+ self.window_size = window_size
+ self.num_relative_distance = (2 * window_size[0] - 1) * (
+ 2 * window_size[1] - 1) + 3
+ self.relative_position_bias_table = self.create_parameter(
+ shape=(self.num_relative_distance, num_heads),
+ default_initializer=zeros_) # 2*Wh-1 * 2*Ww-1, nH
+ # cls to token & token 2 cls & cls to cls
+
+ # get pair-wise relative position index for each token inside the window
+ coords_h = paddle.arange(window_size[0])
+ coords_w = paddle.arange(window_size[1])
+ coords = paddle.stack(paddle.meshgrid(
+ [coords_h, coords_w])) # 2, Wh, Ww
+ coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww
+ coords_flatten_1 = paddle.unsqueeze(coords_flatten, 2)
+ coords_flatten_2 = paddle.unsqueeze(coords_flatten, 1)
+ relative_coords = coords_flatten_1.clone() - coords_flatten_2.clone(
+ )
+
+ #relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Wh
+ relative_coords = relative_coords.transpose(
+ (1, 2, 0)) #.contiguous() # Wh*Ww, Wh*Ww, 2
+ relative_coords[:, :, 0] += window_size[
+ 0] - 1 # shift to start from 0
+ relative_coords[:, :, 1] += window_size[1] - 1
+ relative_coords[:, :, 0] *= 2 * window_size[1] - 1
+ relative_position_index = \
+ paddle.zeros(shape=(window_size[0] * window_size[1] + 1, ) * 2, dtype=relative_coords.dtype)
+ relative_position_index[1:, 1:] = relative_coords.sum(
+ -1) # Wh*Ww, Wh*Ww
+ relative_position_index[0, 0:] = self.num_relative_distance - 3
+ relative_position_index[0:, 0] = self.num_relative_distance - 2
+ relative_position_index[0, 0] = self.num_relative_distance - 1
+
+ self.register_buffer("relative_position_index",
+ relative_position_index)
+ # trunc_normal_(self.relative_position_bias_table, std=.0)
+ else:
+ self.window_size = None
+ self.relative_position_bias_table = None
+ self.relative_position_index = None
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x, rel_pos_bias=None):
+ x_shape = paddle.shape(x)
+ N, C = x_shape[1], x_shape[2]
+
+ qkv_bias = None
+ if self.q_bias is not None:
+ qkv_bias = paddle.concat(
+ (self.q_bias, paddle.zeros_like(self.v_bias), self.v_bias))
+ qkv = F.linear(x, weight=self.qkv.weight, bias=qkv_bias)
+
+ qkv = qkv.reshape((-1, N, 3, self.num_heads,
+ C // self.num_heads)).transpose((2, 0, 3, 1, 4))
+ q, k, v = qkv[0], qkv[1], qkv[2]
+ attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale
+
+ if self.relative_position_bias_table is not None:
+ relative_position_bias = self.relative_position_bias_table[
+ self.relative_position_index.reshape([-1])].reshape([
+ self.window_size[0] * self.window_size[1] + 1,
+ self.window_size[0] * self.window_size[1] + 1, -1
+ ]) # Wh*Ww,Wh*Ww,nH
+ relative_position_bias = relative_position_bias.transpose(
+ (2, 0, 1)) #.contiguous() # nH, Wh*Ww, Wh*Ww
+ attn = attn + relative_position_bias.unsqueeze(0)
+ if rel_pos_bias is not None:
+ attn = attn + rel_pos_bias
+
+ attn = nn.functional.softmax(attn, axis=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C))
+ x = self.proj(x)
+ x = self.proj_drop(x)
+ return x
+
+
+class Block(nn.Layer):
+ def __init__(self,
+ dim,
+ num_heads,
+ mlp_ratio=4.,
+ qkv_bias=False,
+ qk_scale=None,
+ drop=0.,
+ attn_drop=0.,
+ drop_path=0.,
+ window_size=None,
+ init_values=None,
+ act_layer=nn.GELU,
+ norm_layer='nn.LayerNorm',
+ epsilon=1e-5):
+ super().__init__()
+ self.norm1 = nn.LayerNorm(dim, epsilon=1e-6)
+ self.attn = Attention(
+ dim,
+ num_heads=num_heads,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ attn_drop=attn_drop,
+ proj_drop=drop,
+ window_size=window_size)
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+ self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim,
+ hidden_features=mlp_hidden_dim,
+ act_layer=act_layer,
+ drop=drop)
+ if init_values is not None:
+ self.gamma_1 = self.create_parameter(
+ shape=([dim]), default_initializer=Constant(value=init_values))
+ self.gamma_2 = self.create_parameter(
+ shape=([dim]), default_initializer=Constant(value=init_values))
+ else:
+ self.gamma_1, self.gamma_2 = None, None
+
+ def forward(self, x, rel_pos_bias=None):
+
+ if self.gamma_1 is None:
+ x = x + self.drop_path(
+ self.attn(
+ self.norm1(x), rel_pos_bias=rel_pos_bias))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ else:
+ x = x + self.drop_path(self.gamma_1 * self.attn(
+ self.norm1(x), rel_pos_bias=rel_pos_bias))
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+ return x
+
+
+class PatchEmbed(nn.Layer):
+ """ Image to Patch Embedding
+ """
+
+ def __init__(self,
+ img_size=[224, 224],
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768):
+ super().__init__()
+ self.num_patches_w = img_size[0] // patch_size
+ self.num_patches_h = img_size[1] // patch_size
+
+ num_patches = self.num_patches_w * self.num_patches_h
+ self.patch_shape = (img_size[0] // patch_size,
+ img_size[1] // patch_size)
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2D(
+ in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+ @property
+ def num_patches_in_h(self):
+ return self.img_size[1] // self.patch_size
+
+ @property
+ def num_patches_in_w(self):
+ return self.img_size[0] // self.patch_size
+
+ def forward(self, x, mask=None):
+ B, C, H, W = x.shape
+ return self.proj(x)
+
+
+class RelativePositionBias(nn.Layer):
+ def __init__(self, window_size, num_heads):
+ super().__init__()
+ self.window_size = window_size
+ self.num_relative_distance = (2 * window_size[0] - 1) * (
+ 2 * window_size[1] - 1) + 3
+ self.relative_position_bias_table = self.create_parameter(
+ shape=(self.num_relative_distance, num_heads),
+ default_initialize=zeros_)
+ # cls to token & token 2 cls & cls to cls
+
+ # get pair-wise relative position index for each token inside the window
+ coords_h = paddle.arange(window_size[0])
+ coords_w = paddle.arange(window_size[1])
+ coords = paddle.stack(paddle.meshgrid(
+ [coords_h, coords_w])) # 2, Wh, Ww
+ coords_flatten = coords.flatten(1) # 2, Wh*Ww
+
+ relative_coords = coords_flatten[:, :,
+ None] - coords_flatten[:,
+ None, :] # 2, Wh*Ww, Wh*Ww
+ relative_coords = relative_coords.transpos(
+ (1, 2, 0)) # Wh*Ww, Wh*Ww, 2
+ relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0
+ relative_coords[:, :, 1] += window_size[1] - 1
+ relative_coords[:, :, 0] *= 2 * window_size[1] - 1
+ relative_position_index = \
+ paddle.zeros(size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype)
+ relative_position_index[1:, 1:] = relative_coords.sum(
+ -1) # Wh*Ww, Wh*Ww
+ relative_position_index[0, 0:] = self.num_relative_distance - 3
+ relative_position_index[0:, 0] = self.num_relative_distance - 2
+ relative_position_index[0, 0] = self.num_relative_distance - 1
+ self.register_buffer("relative_position_index", relative_position_index)
+
+ def forward(self):
+ relative_position_bias = \
+ self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+ self.window_size[0] * self.window_size[1] + 1,
+ self.window_size[0] * self.window_size[1] + 1, -1) # Wh*Ww,Wh*Ww,nH
+ return relative_position_bias.transpose((2, 0, 1)) # nH, Wh*Ww, Wh*Ww
+
+
+def get_sinusoid_encoding_table(n_position, d_hid, token=False):
+ ''' Sinusoid position encoding table '''
+
+ def get_position_angle_vec(position):
+ return [
+ position / np.power(10000, 2 * (hid_j // 2) / d_hid)
+ for hid_j in range(d_hid)
+ ]
+
+ sinusoid_table = np.array(
+ [get_position_angle_vec(pos_i) for pos_i in range(n_position)])
+ sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i
+ sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1
+ if token:
+ sinusoid_table = np.concatenate(
+ [sinusoid_table, np.zeros([1, d_hid])], dim=0)
+
+ return paddle.to_tensor(sinusoid_table, dtype=paddle.float32).unsqueeze(0)
+
+
+@register
+@serializable
+class VisionTransformer(nn.Layer):
+ """ Vision Transformer with support for patch input
+ """
+
+ def __init__(self,
+ img_size=[672, 1092],
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4,
+ qkv_bias=False,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.,
+ norm_layer='nn.LayerNorm',
+ init_values=None,
+ use_rel_pos_bias=False,
+ use_shared_rel_pos_bias=False,
+ epsilon=1e-5,
+ final_norm=False,
+ pretrained=None,
+ out_indices=[3, 5, 7, 11],
+ use_abs_pos_emb=False,
+ use_sincos_pos_emb=True,
+ with_fpn=True,
+ use_checkpoint=False,
+ **args):
+ super().__init__()
+ self.img_size = img_size
+ self.embed_dim = embed_dim
+ self.with_fpn = with_fpn
+ self.use_checkpoint = use_checkpoint
+ self.use_sincos_pos_emb = use_sincos_pos_emb
+ self.use_rel_pos_bias = use_rel_pos_bias
+ self.final_norm = final_norm
+
+ if use_checkpoint:
+ paddle.seed(0)
+
+ self.patch_embed = PatchEmbed(
+ img_size=img_size,
+ patch_size=patch_size,
+ in_chans=in_chans,
+ embed_dim=embed_dim)
+
+ self.pos_w = self.patch_embed.num_patches_in_w
+ self.pos_h = self.patch_embed.num_patches_in_h
+
+ self.cls_token = self.create_parameter(
+ shape=(1, 1, embed_dim),
+ default_initializer=paddle.nn.initializer.Constant(value=0.))
+
+ if use_abs_pos_emb:
+ self.pos_embed = self.create_parameter(
+ shape=(1, self.pos_w * self.pos_h + 1, embed_dim),
+ default_initializer=paddle.nn.initializer.TruncatedNormal(
+ std=.02))
+ elif use_sincos_pos_emb:
+ pos_embed = self.build_2d_sincos_position_embedding(embed_dim)
+
+ self.pos_embed = pos_embed
+ self.pos_embed = self.create_parameter(shape=pos_embed.shape)
+ self.pos_embed.set_value(pos_embed.numpy())
+ self.pos_embed.stop_gradient = True
+
+ else:
+ self.pos_embed = None
+
+ self.pos_drop = nn.Dropout(p=drop_rate)
+
+ if use_shared_rel_pos_bias:
+ self.rel_pos_bias = RelativePositionBias(
+ window_size=self.patch_embed.patch_shape, num_heads=num_heads)
+ else:
+ self.rel_pos_bias = None
+
+ dpr = np.linspace(0, drop_path_rate, depth)
+
+ self.blocks = nn.LayerList([
+ Block(
+ dim=embed_dim,
+ num_heads=num_heads,
+ mlp_ratio=mlp_ratio,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ drop=drop_rate,
+ attn_drop=attn_drop_rate,
+ drop_path=dpr[i],
+ norm_layer=norm_layer,
+ init_values=init_values,
+ window_size=self.patch_embed.patch_shape
+ if use_rel_pos_bias else None,
+ epsilon=epsilon) for i in range(depth)
+ ])
+
+ self.pretrained = pretrained
+ self.init_weight()
+
+ assert len(out_indices) <= 4, ''
+ self.out_indices = out_indices
+ self.out_channels = [embed_dim for _ in range(len(out_indices))]
+ self.out_strides = [4, 8, 16, 32][-len(out_indices):] if with_fpn else [
+ 8 for _ in range(len(out_indices))
+ ]
+
+ self.norm = Identity()
+
+ if self.with_fpn:
+ self.init_fpn(
+ embed_dim=embed_dim,
+ patch_size=patch_size, )
+
+ def init_weight(self):
+ pretrained = self.pretrained
+
+ if pretrained:
+ if 'http' in pretrained: #URL
+ path = paddle.utils.download.get_weights_path_from_url(
+ pretrained)
+ else: #model in local path
+ path = pretrained
+
+ load_state_dict = paddle.load(path)
+ model_state_dict = self.state_dict()
+ pos_embed_name = "pos_embed"
+
+ if pos_embed_name in load_state_dict.keys():
+ load_pos_embed = paddle.to_tensor(
+ load_state_dict[pos_embed_name], dtype="float32")
+ if self.pos_embed.shape != load_pos_embed.shape:
+ pos_size = int(math.sqrt(load_pos_embed.shape[1] - 1))
+ model_state_dict[pos_embed_name] = self.resize_pos_embed(
+ load_pos_embed, (pos_size, pos_size),
+ (self.pos_h, self.pos_w))
+
+ # self.set_state_dict(model_state_dict)
+ load_state_dict[pos_embed_name] = model_state_dict[
+ pos_embed_name]
+
+ print("Load pos_embed and resize it from {} to {} .".format(
+ load_pos_embed.shape, self.pos_embed.shape))
+
+ self.set_state_dict(load_state_dict)
+ print("Load load_state_dict....")
+
+ def init_fpn(self, embed_dim=768, patch_size=16, out_with_norm=False):
+ if patch_size == 16:
+ self.fpn1 = nn.Sequential(
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2),
+ nn.BatchNorm2D(embed_dim),
+ nn.GELU(),
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2), )
+
+ self.fpn2 = nn.Sequential(
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2), )
+
+ self.fpn3 = Identity()
+
+ self.fpn4 = nn.MaxPool2D(kernel_size=2, stride=2)
+ elif patch_size == 8:
+ self.fpn1 = nn.Sequential(
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2), )
+
+ self.fpn2 = Identity()
+
+ self.fpn3 = nn.Sequential(nn.MaxPool2D(kernel_size=2, stride=2), )
+
+ self.fpn4 = nn.Sequential(nn.MaxPool2D(kernel_size=4, stride=4), )
+
+ if not out_with_norm:
+ self.norm = Identity()
+ else:
+ self.norm = nn.LayerNorm(embed_dim, epsilon=1e-6)
+
+ def interpolate_pos_encoding(self, x, w, h):
+ npatch = x.shape[1] - 1
+ N = self.pos_embed.shape[1] - 1
+ w0 = w // self.patch_embed.patch_size
+ h0 = h // self.patch_embed.patch_size
+ if npatch == N and w0 == self.patch_embed.num_patches_w and h0 == self.patch_embed.num_patches_h:
+ return self.pos_embed
+ class_pos_embed = self.pos_embed[:, 0]
+ patch_pos_embed = self.pos_embed[:, 1:]
+ dim = x.shape[-1]
+ # we add a small number to avoid floating point error in the interpolation
+ # see discussion at https://github.com/facebookresearch/dino/issues/8
+ w0, h0 = w0 + 0.1, h0 + 0.1
+
+ patch_pos_embed = nn.functional.interpolate(
+ patch_pos_embed.reshape([
+ 1, self.patch_embed.num_patches_w,
+ self.patch_embed.num_patches_h, dim
+ ]).transpose((0, 3, 1, 2)),
+ scale_factor=(w0 / self.patch_embed.num_patches_w,
+ h0 / self.patch_embed.num_patches_h),
+ mode='bicubic', )
+ assert int(w0) == patch_pos_embed.shape[-2] and int(
+ h0) == patch_pos_embed.shape[-1]
+ patch_pos_embed = patch_pos_embed.transpose(
+ (0, 2, 3, 1)).reshape([1, -1, dim])
+ return paddle.concat(
+ (class_pos_embed.unsqueeze(0), patch_pos_embed), axis=1)
+
+ def resize_pos_embed(self, pos_embed, old_hw, new_hw):
+ """
+ Resize pos_embed weight.
+ Args:
+ pos_embed (Tensor): the pos_embed weight
+ old_hw (list[int]): the height and width of old pos_embed
+ new_hw (list[int]): the height and width of new pos_embed
+ Returns:
+ Tensor: the resized pos_embed weight
+ """
+ cls_pos_embed = pos_embed[:, :1, :]
+ pos_embed = pos_embed[:, 1:, :]
+
+ pos_embed = pos_embed.transpose([0, 2, 1])
+ pos_embed = pos_embed.reshape([1, -1, old_hw[0], old_hw[1]])
+ pos_embed = F.interpolate(
+ pos_embed, new_hw, mode='bicubic', align_corners=False)
+ pos_embed = pos_embed.flatten(2).transpose([0, 2, 1])
+ pos_embed = paddle.concat([cls_pos_embed, pos_embed], axis=1)
+
+ return pos_embed
+
+ def build_2d_sincos_position_embedding(
+ self,
+ embed_dim=768,
+ temperature=10000., ):
+ h, w = self.patch_embed.patch_shape
+ grid_w = paddle.arange(w, dtype=paddle.float32)
+ grid_h = paddle.arange(h, dtype=paddle.float32)
+ grid_w, grid_h = paddle.meshgrid(grid_w, grid_h)
+ assert embed_dim % 4 == 0, 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding'
+ pos_dim = embed_dim // 4
+ omega = paddle.arange(pos_dim, dtype=paddle.float32) / pos_dim
+ omega = 1. / (temperature**omega)
+
+ out_w = grid_w.flatten()[..., None] @omega[None]
+ out_h = grid_h.flatten()[..., None] @omega[None]
+
+ pos_emb = paddle.concat(
+ [
+ paddle.sin(out_w), paddle.cos(out_w), paddle.sin(out_h),
+ paddle.cos(out_h)
+ ],
+ axis=1)[None, :, :]
+
+ pe_token = paddle.zeros([1, 1, embed_dim], dtype=paddle.float32)
+ pos_embed = paddle.concat([pe_token, pos_emb], axis=1)
+ # pos_embed.stop_gradient = True
+
+ return pos_embed
+
+ def forward(self, x):
+ x = x['image'] if isinstance(x, dict) else x
+ _, _, h, w = x.shape
+
+ x = self.patch_embed(x)
+
+ B, D, Hp, Wp = x.shape # b * c * h * w
+
+ cls_tokens = self.cls_token.expand(
+ (B, self.cls_token.shape[-2], self.cls_token.shape[-1]))
+ x = x.flatten(2).transpose([0, 2, 1]) # b * hw * c
+ x = paddle.concat([cls_tokens, x], axis=1)
+
+ if self.pos_embed is not None:
+ # x = x + self.interpolate_pos_encoding(x, w, h)
+ x = x + self.interpolate_pos_encoding(x, h, w)
+
+ x = self.pos_drop(x)
+
+ rel_pos_bias = self.rel_pos_bias(
+ ) if self.rel_pos_bias is not None else None
+
+ feats = []
+ for idx, blk in enumerate(self.blocks):
+ if self.use_checkpoint and self.training:
+ x = paddle.distributed.fleet.utils.recompute(
+ blk, x, rel_pos_bias, **{"preserve_rng_state": True})
+ else:
+ x = blk(x, rel_pos_bias)
+
+ if idx in self.out_indices:
+ xp = paddle.reshape(
+ paddle.transpose(
+ self.norm(x[:, 1:, :]), perm=[0, 2, 1]),
+ shape=[B, D, Hp, Wp])
+ feats.append(xp)
+
+ if self.with_fpn:
+ fpns = [self.fpn1, self.fpn2, self.fpn3, self.fpn4]
+ for i in range(len(feats)):
+ feats[i] = fpns[i](feats[i])
+
+ return feats
+
+ @property
+ def num_layers(self):
+ return len(self.blocks)
+
+ @property
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=c, stride=s)
+ for c, s in zip(self.out_channels, self.out_strides)
+ ]
diff --git a/paddlers/models/ppdet/modeling/bbox_utils.py b/paddlers/models/ppdet/modeling/bbox_utils.py
index d5d376c..a656c35 100644
--- a/paddlers/models/ppdet/modeling/bbox_utils.py
+++ b/paddlers/models/ppdet/modeling/bbox_utils.py
@@ -278,8 +278,8 @@ def decode_yolo(box, anchor, downsample_ratio):
return [x1, y1, w1, h1]
-def iou_similarity(box1, box2, eps=1e-9):
- """Calculate iou of box1 and box2
+def batch_iou_similarity(box1, box2, eps=1e-9):
+ """Calculate iou of box1 and box2 in batch
Args:
box1 (Tensor): box with the shape [N, M1, 4]
@@ -359,295 +359,6 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
return iou
-def rect2rbox(bboxes):
- """
- :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
- :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
- """
- bboxes = bboxes.reshape(-1, 4)
- num_boxes = bboxes.shape[0]
-
- x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
- y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
- edges1 = np.abs(bboxes[:, 2] - bboxes[:, 0])
- edges2 = np.abs(bboxes[:, 3] - bboxes[:, 1])
- angles = np.zeros([num_boxes], dtype=bboxes.dtype)
-
- inds = edges1 < edges2
-
- rboxes = np.stack((x_ctr, y_ctr, edges1, edges2, angles), axis=1)
- rboxes[inds, 2] = edges2[inds]
- rboxes[inds, 3] = edges1[inds]
- rboxes[inds, 4] = np.pi / 2.0
- return rboxes
-
-
-def delta2rbox(rrois,
- deltas,
- means=[0, 0, 0, 0, 0],
- stds=[1, 1, 1, 1, 1],
- wh_ratio_clip=1e-6):
- """
- :param rrois: (cx, cy, w, h, theta)
- :param deltas: (dx, dy, dw, dh, dtheta)
- :param means:
- :param stds:
- :param wh_ratio_clip:
- :return:
- """
- means = paddle.to_tensor(means)
- stds = paddle.to_tensor(stds)
- deltas = paddle.reshape(deltas, [-1, deltas.shape[-1]])
- denorm_deltas = deltas * stds + means
-
- dx = denorm_deltas[:, 0]
- dy = denorm_deltas[:, 1]
- dw = denorm_deltas[:, 2]
- dh = denorm_deltas[:, 3]
- dangle = denorm_deltas[:, 4]
-
- max_ratio = np.abs(np.log(wh_ratio_clip))
- dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
- dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
-
- rroi_x = rrois[:, 0]
- rroi_y = rrois[:, 1]
- rroi_w = rrois[:, 2]
- rroi_h = rrois[:, 3]
- rroi_angle = rrois[:, 4]
-
- gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
- rroi_angle) + rroi_x
- gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos(
- rroi_angle) + rroi_y
- gw = rroi_w * dw.exp()
- gh = rroi_h * dh.exp()
- ga = np.pi * dangle + rroi_angle
- ga = (ga + np.pi / 4) % np.pi - np.pi / 4
- ga = paddle.to_tensor(ga)
-
- gw = paddle.to_tensor(gw, dtype='float32')
- gh = paddle.to_tensor(gh, dtype='float32')
- bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
- return bboxes
-
-
-def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
- """
-
- Args:
- proposals:
- gt:
- means: 1x5
- stds: 1x5
-
- Returns:
-
- """
- proposals = proposals.astype(np.float64)
-
- PI = np.pi
-
- gt_widths = gt[..., 2]
- gt_heights = gt[..., 3]
- gt_angle = gt[..., 4]
-
- proposals_widths = proposals[..., 2]
- proposals_heights = proposals[..., 3]
- proposals_angle = proposals[..., 4]
-
- coord = gt[..., 0:2] - proposals[..., 0:2]
- dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
- * coord[..., 1]) / proposals_widths
- dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
- * coord[..., 1]) / proposals_heights
- dw = np.log(gt_widths / proposals_widths)
- dh = np.log(gt_heights / proposals_heights)
- da = (gt_angle - proposals_angle)
-
- da = (da + PI / 4) % PI - PI / 4
- da /= PI
-
- deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
- means = np.array(means, dtype=deltas.dtype)
- stds = np.array(stds, dtype=deltas.dtype)
- deltas = (deltas - means) / stds
- deltas = deltas.astype(np.float32)
- return deltas
-
-
-def bbox_decode(bbox_preds,
- anchors,
- means=[0, 0, 0, 0, 0],
- stds=[1, 1, 1, 1, 1]):
- """decode bbox from deltas
- Args:
- bbox_preds: [N,H,W,5]
- anchors: [H*W,5]
- return:
- bboxes: [N,H,W,5]
- """
- means = paddle.to_tensor(means)
- stds = paddle.to_tensor(stds)
- num_imgs, H, W, _ = bbox_preds.shape
- bboxes_list = []
- for img_id in range(num_imgs):
- bbox_pred = bbox_preds[img_id]
- # bbox_pred.shape=[5,H,W]
- bbox_delta = bbox_pred
- anchors = paddle.to_tensor(anchors)
- bboxes = delta2rbox(
- anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6)
- bboxes = paddle.reshape(bboxes, [H, W, 5])
- bboxes_list.append(bboxes)
- return paddle.stack(bboxes_list, axis=0)
-
-
-def poly2rbox(polys):
- """
- poly:[x0,y0,x1,y1,x2,y2,x3,y3]
- to
- rotated_boxes:[x_ctr,y_ctr,w,h,angle]
- """
- rotated_boxes = []
- for poly in polys:
- poly = np.array(poly[:8], dtype=np.float32)
-
- pt1 = (poly[0], poly[1])
- pt2 = (poly[2], poly[3])
- pt3 = (poly[4], poly[5])
- pt4 = (poly[6], poly[7])
-
- edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[
- 1]) * (pt1[1] - pt2[1]))
- edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[
- 1]) * (pt2[1] - pt3[1]))
-
- width = max(edge1, edge2)
- height = min(edge1, edge2)
-
- rbox_angle = 0
- if edge1 > edge2:
- rbox_angle = np.arctan2(
- float(pt2[1] - pt1[1]), float(pt2[0] - pt1[0]))
- elif edge2 >= edge1:
- rbox_angle = np.arctan2(
- float(pt4[1] - pt1[1]), float(pt4[0] - pt1[0]))
-
- def norm_angle(angle, range=[-np.pi / 4, np.pi]):
- return (angle - range[0]) % range[1] + range[0]
-
- rbox_angle = norm_angle(rbox_angle)
-
- x_ctr = float(pt1[0] + pt3[0]) / 2
- y_ctr = float(pt1[1] + pt3[1]) / 2
- rotated_box = np.array([x_ctr, y_ctr, width, height, rbox_angle])
- rotated_boxes.append(rotated_box)
- ret_rotated_boxes = np.array(rotated_boxes)
- assert ret_rotated_boxes.shape[1] == 5
- return ret_rotated_boxes
-
-
-def cal_line_length(point1, point2):
- import math
- return math.sqrt(
- math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
-
-
-def get_best_begin_point_single(coordinate):
- x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
- xmin = min(x1, x2, x3, x4)
- ymin = min(y1, y2, y3, y4)
- xmax = max(x1, x2, x3, x4)
- ymax = max(y1, y2, y3, y4)
- combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
- [[x4, y4], [x1, y1], [x2, y2], [x3, y3]],
- [[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
- [[x2, y2], [x3, y3], [x4, y4], [x1, y1]]]
- dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
- force = 100000000.0
- force_flag = 0
- for i in range(4):
- temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) \
- + cal_line_length(combinate[i][1], dst_coordinate[1]) \
- + cal_line_length(combinate[i][2], dst_coordinate[2]) \
- + cal_line_length(combinate[i][3], dst_coordinate[3])
- if temp_force < force:
- force = temp_force
- force_flag = i
- if force_flag != 0:
- pass
- return np.array(combinate[force_flag]).reshape(8)
-
-
-def rbox2poly_np(rrects):
- """
- rrect:[x_ctr,y_ctr,w,h,angle]
- to
- poly:[x0,y0,x1,y1,x2,y2,x3,y3]
- """
- polys = []
- for i in range(rrects.shape[0]):
- rrect = rrects[i]
- # x_ctr, y_ctr, width, height, angle = rrect[:5]
- x_ctr = rrect[0]
- y_ctr = rrect[1]
- width = rrect[2]
- height = rrect[3]
- angle = rrect[4]
- tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
- rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
- R = np.array([[np.cos(angle), -np.sin(angle)],
- [np.sin(angle), np.cos(angle)]])
- poly = R.dot(rect)
- x0, x1, x2, x3 = poly[0, :4] + x_ctr
- y0, y1, y2, y3 = poly[1, :4] + y_ctr
- poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
- poly = get_best_begin_point_single(poly)
- polys.append(poly)
- polys = np.array(polys)
- return polys
-
-
-def rbox2poly(rrects):
- """
- rrect:[x_ctr,y_ctr,w,h,angle]
- to
- poly:[x0,y0,x1,y1,x2,y2,x3,y3]
- """
- N = paddle.shape(rrects)[0]
-
- x_ctr = rrects[:, 0]
- y_ctr = rrects[:, 1]
- width = rrects[:, 2]
- height = rrects[:, 3]
- angle = rrects[:, 4]
-
- tl_x, tl_y, br_x, br_y = -width * 0.5, -height * 0.5, width * 0.5, height * 0.5
-
- normal_rects = paddle.stack(
- [tl_x, br_x, br_x, tl_x, tl_y, tl_y, br_y, br_y], axis=0)
- normal_rects = paddle.reshape(normal_rects, [2, 4, N])
- normal_rects = paddle.transpose(normal_rects, [2, 0, 1])
-
- sin, cos = paddle.sin(angle), paddle.cos(angle)
- # M.shape=[N,2,2]
- M = paddle.stack([cos, -sin, sin, cos], axis=0)
- M = paddle.reshape(M, [2, 2, N])
- M = paddle.transpose(M, [2, 0, 1])
-
- # polys:[N,8]
- polys = paddle.matmul(M, normal_rects)
- polys = paddle.transpose(polys, [2, 1, 0])
- polys = paddle.reshape(polys, [-1, N])
- polys = paddle.transpose(polys, [1, 0])
-
- tmp = paddle.stack(
- [x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
- polys = polys + tmp
- return polys
-
-
def bbox_iou_np_expand(box1, box2, x1y1x2y2=True, eps=1e-16):
"""
Calculate the iou of box1 and box2 with numpy.
@@ -744,9 +455,9 @@ def distance2bbox(points, distance, max_shape=None):
def bbox_center(boxes):
"""Get bbox centers from boxes.
Args:
- boxes (Tensor): boxes with shape (N, 4), "xmin, ymin, xmax, ymax" format.
+ boxes (Tensor): boxes with shape (..., 4), "xmin, ymin, xmax, ymax" format.
Returns:
- Tensor: boxes centers with shape (N, 2), "cx, cy" format.
+ Tensor: boxes centers with shape (..., 2), "cx, cy" format.
"""
boxes_cx = (boxes[..., 0] + boxes[..., 2]) / 2
boxes_cy = (boxes[..., 1] + boxes[..., 3]) / 2
@@ -756,20 +467,136 @@ def bbox_center(boxes):
def batch_distance2bbox(points, distance, max_shapes=None):
"""Decode distance prediction to bounding box for batch.
Args:
- points (Tensor): [B, ..., 2]
- distance (Tensor): [B, ..., 4]
- max_shapes (tuple): [B, 2], "h,w" format, Shape of the image.
+ points (Tensor): [B, ..., 2], "xy" format
+ distance (Tensor): [B, ..., 4], "ltrb" format
+ max_shapes (Tensor): [B, 2], "h,w" format, Shape of the image.
Returns:
- Tensor: Decoded bboxes.
+ Tensor: Decoded bboxes, "x1y1x2y2" format.
"""
- x1 = points[..., 0] - distance[..., 0]
- y1 = points[..., 1] - distance[..., 1]
- x2 = points[..., 0] + distance[..., 2]
- y2 = points[..., 1] + distance[..., 3]
+ lt, rb = paddle.split(distance, 2, -1)
+ # while tensor add parameters, parameters should be better placed on the second place
+ x1y1 = -lt + points
+ x2y2 = rb + points
+ out_bbox = paddle.concat([x1y1, x2y2], -1)
if max_shapes is not None:
- for i, max_shape in enumerate(max_shapes):
- x1[i] = x1[i].clip(min=0, max=max_shape[1])
- y1[i] = y1[i].clip(min=0, max=max_shape[0])
- x2[i] = x2[i].clip(min=0, max=max_shape[1])
- y2[i] = y2[i].clip(min=0, max=max_shape[0])
- return paddle.stack([x1, y1, x2, y2], -1)
+ max_shapes = max_shapes.flip(-1).tile([1, 2])
+ delta_dim = out_bbox.ndim - max_shapes.ndim
+ for _ in range(delta_dim):
+ max_shapes.unsqueeze_(1)
+ out_bbox = paddle.where(out_bbox < max_shapes, out_bbox, max_shapes)
+ out_bbox = paddle.where(out_bbox > 0, out_bbox,
+ paddle.zeros_like(out_bbox))
+ return out_bbox
+
+
+def delta2bbox_v2(rois,
+ deltas,
+ means=(0.0, 0.0, 0.0, 0.0),
+ stds=(1.0, 1.0, 1.0, 1.0),
+ max_shape=None,
+ wh_ratio_clip=16.0 / 1000.0,
+ ctr_clip=None):
+ """Transform network output(delta) to bboxes.
+ Based on https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/
+ bbox/coder/delta_xywh_bbox_coder.py
+ Args:
+ rois (Tensor): shape [..., 4], base bboxes, typical examples include
+ anchor and rois
+ deltas (Tensor): shape [..., 4], offset relative to base bboxes
+ means (list[float]): the mean that was used to normalize deltas,
+ must be of size 4
+ stds (list[float]): the std that was used to normalize deltas,
+ must be of size 4
+ max_shape (list[float] or None): height and width of image, will be
+ used to clip bboxes if not None
+ wh_ratio_clip (float): to clip delta wh of decoded bboxes
+ ctr_clip (float or None): whether to clip delta xy of decoded bboxes
+ """
+ if rois.size == 0:
+ return paddle.empty_like(rois)
+ means = paddle.to_tensor(means)
+ stds = paddle.to_tensor(stds)
+ deltas = deltas * stds + means
+
+ dxy = deltas[..., :2]
+ dwh = deltas[..., 2:]
+
+ pxy = (rois[..., :2] + rois[..., 2:]) * 0.5
+ pwh = rois[..., 2:] - rois[..., :2]
+ dxy_wh = pwh * dxy
+
+ max_ratio = np.abs(np.log(wh_ratio_clip))
+ if ctr_clip is not None:
+ dxy_wh = paddle.clip(dxy_wh, max=ctr_clip, min=-ctr_clip)
+ dwh = paddle.clip(dwh, max=max_ratio)
+ else:
+ dwh = dwh.clip(min=-max_ratio, max=max_ratio)
+
+ gxy = pxy + dxy_wh
+ gwh = pwh * dwh.exp()
+ x1y1 = gxy - (gwh * 0.5)
+ x2y2 = gxy + (gwh * 0.5)
+ bboxes = paddle.concat([x1y1, x2y2], axis=-1)
+ if max_shape is not None:
+ bboxes[..., 0::2] = bboxes[..., 0::2].clip(min=0, max=max_shape[1])
+ bboxes[..., 1::2] = bboxes[..., 1::2].clip(min=0, max=max_shape[0])
+ return bboxes
+
+
+def bbox2delta_v2(src_boxes,
+ tgt_boxes,
+ means=(0.0, 0.0, 0.0, 0.0),
+ stds=(1.0, 1.0, 1.0, 1.0)):
+ """Encode bboxes to deltas.
+ Modified from paddlers.models.ppdet.modeling.bbox_utils.bbox2delta.
+ Args:
+ src_boxes (Tensor[..., 4]): base bboxes
+ tgt_boxes (Tensor[..., 4]): target bboxes
+ means (list[float]): the mean that will be used to normalize delta
+ stds (list[float]): the std that will be used to normalize delta
+ """
+ if src_boxes.size == 0:
+ return paddle.empty_like(src_boxes)
+ src_w = src_boxes[..., 2] - src_boxes[..., 0]
+ src_h = src_boxes[..., 3] - src_boxes[..., 1]
+ src_ctr_x = src_boxes[..., 0] + 0.5 * src_w
+ src_ctr_y = src_boxes[..., 1] + 0.5 * src_h
+
+ tgt_w = tgt_boxes[..., 2] - tgt_boxes[..., 0]
+ tgt_h = tgt_boxes[..., 3] - tgt_boxes[..., 1]
+ tgt_ctr_x = tgt_boxes[..., 0] + 0.5 * tgt_w
+ tgt_ctr_y = tgt_boxes[..., 1] + 0.5 * tgt_h
+
+ dx = (tgt_ctr_x - src_ctr_x) / src_w
+ dy = (tgt_ctr_y - src_ctr_y) / src_h
+ dw = paddle.log(tgt_w / src_w)
+ dh = paddle.log(tgt_h / src_h)
+
+ deltas = paddle.stack((dx, dy, dw, dh), axis=1) # [n, 4]
+ means = paddle.to_tensor(means, place=src_boxes.place)
+ stds = paddle.to_tensor(stds, place=src_boxes.place)
+ deltas = (deltas - means) / stds
+ return deltas
+
+
+def iou_similarity(box1, box2, eps=1e-10):
+ """Calculate iou of box1 and box2
+
+ Args:
+ box1 (Tensor): box with the shape [M1, 4]
+ box2 (Tensor): box with the shape [M2, 4]
+
+ Return:
+ iou (Tensor): iou between box1 and box2 with the shape [M1, M2]
+ """
+ box1 = box1.unsqueeze(1) # [M1, 4] -> [M1, 1, 4]
+ box2 = box2.unsqueeze(0) # [M2, 4] -> [1, M2, 4]
+ px1y1, px2y2 = box1[:, :, 0:2], box1[:, :, 2:4]
+ gx1y1, gx2y2 = box2[:, :, 0:2], box2[:, :, 2:4]
+ x1y1 = paddle.maximum(px1y1, gx1y1)
+ x2y2 = paddle.minimum(px2y2, gx2y2)
+ overlap = (x2y2 - x1y1).clip(0).prod(-1)
+ area1 = (px2y2 - px1y1).clip(0).prod(-1)
+ area2 = (gx2y2 - gx1y1).clip(0).prod(-1)
+ union = area1 + area2 - overlap + eps
+ return overlap / union
diff --git a/paddlers/models/ppdet/modeling/cls_utils.py b/paddlers/models/ppdet/modeling/cls_utils.py
new file mode 100644
index 0000000..3ae8d11
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/cls_utils.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def _get_class_default_kwargs(cls, *args, **kwargs):
+ """
+ Get default arguments of a class in dict format, if args and
+ kwargs is specified, it will replace default arguments
+ """
+ varnames = cls.__init__.__code__.co_varnames
+ argcount = cls.__init__.__code__.co_argcount
+ keys = varnames[:argcount]
+ assert keys[0] == 'self'
+ keys = keys[1:]
+
+ values = list(cls.__init__.__defaults__)
+ assert len(values) == len(keys)
+
+ if len(args) > 0:
+ for i, arg in enumerate(args):
+ values[i] = arg
+
+ default_kwargs = dict(zip(keys, values))
+
+ if len(kwargs) > 0:
+ for k, v in kwargs.items():
+ default_kwargs[k] = v
+
+ return default_kwargs
diff --git a/paddlers/models/ppdet/modeling/heads/__init__.py b/paddlers/models/ppdet/modeling/heads/__init__.py
index 46caf7a..ec2e227 100644
--- a/paddlers/models/ppdet/modeling/heads/__init__.py
+++ b/paddlers/models/ppdet/modeling/heads/__init__.py
@@ -31,6 +31,8 @@ from . import pico_head
from . import detr_head
from . import sparsercnn_head
from . import tood_head
+from . import retina_head
+from . import ppyoloe_head
from .bbox_head import *
from .mask_head import *
@@ -51,3 +53,5 @@ from .pico_head import *
from .detr_head import *
from .sparsercnn_head import *
from .tood_head import *
+from .retina_head import *
+from .ppyoloe_head import *
diff --git a/paddlers/models/ppdet/modeling/heads/bbox_head.py b/paddlers/models/ppdet/modeling/heads/bbox_head.py
index 2654dc9..fbb7f05 100644
--- a/paddlers/models/ppdet/modeling/heads/bbox_head.py
+++ b/paddlers/models/ppdet/modeling/heads/bbox_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
@@ -24,6 +24,7 @@ from paddlers.models.ppdet.core.workspace import register, create
from .roi_extractor import RoIAlign
from ..shape_spec import ShapeSpec
from ..bbox_utils import bbox2delta
+from ..cls_utils import _get_class_default_kwargs
from paddlers.models.ppdet.modeling.layers import ConvNormLayer
__all__ = ['TwoFCHead', 'XConvNormHead', 'BBoxHead']
@@ -89,7 +90,7 @@ class XConvNormHead(nn.Layer):
conv_dim (int): The number of channels for the conv layers
out_channel (int): Output channels
resolution (int): Resolution of input feature map
- norm_type (string): Norm type, bn, gn, sync_bn are available,
+ norm_type (string): Norm type, bn, gn, sync_bn are available,
default `gn`
freeze_norm (bool): Whether to freeze the norm
stage_name (string): Prefix name for conv layer, '' by default
@@ -168,22 +169,23 @@ class BBoxHead(nn.Layer):
head (nn.Layer): Extract feature in bbox head
in_channel (int): Input channel after RoI extractor
roi_extractor (object): The module of RoI Extractor
- bbox_assigner (object): The module of Box Assigner, label and sample the
+ bbox_assigner (object): The module of Box Assigner, label and sample the
box.
with_pool (bool): Whether to use pooling for the RoI feature.
num_classes (int): The number of classes
- bbox_weight (List[float]): The weight to get the decode box
+ bbox_weight (List[float]): The weight to get the decode box
"""
def __init__(self,
head,
in_channel,
- roi_extractor=RoIAlign().__dict__,
+ roi_extractor=_get_class_default_kwargs(RoIAlign),
bbox_assigner='BboxAssigner',
with_pool=False,
num_classes=80,
bbox_weight=[10., 10., 5., 5.],
- bbox_loss=None):
+ bbox_loss=None,
+ loss_normalize_pos=False):
super(BBoxHead, self).__init__()
self.head = head
self.roi_extractor = roi_extractor
@@ -195,6 +197,7 @@ class BBoxHead(nn.Layer):
self.num_classes = num_classes
self.bbox_weight = bbox_weight
self.bbox_loss = bbox_loss
+ self.loss_normalize_pos = loss_normalize_pos
self.bbox_score = nn.Linear(
in_channel,
@@ -249,14 +252,25 @@ class BBoxHead(nn.Layer):
deltas = self.bbox_delta(feat)
if self.training:
- loss = self.get_loss(scores, deltas, targets, rois,
- self.bbox_weight)
+ loss = self.get_loss(
+ scores,
+ deltas,
+ targets,
+ rois,
+ self.bbox_weight,
+ loss_normalize_pos=self.loss_normalize_pos)
return loss, bbox_feat
else:
pred = self.get_prediction(scores, deltas)
return pred, self.head
- def get_loss(self, scores, deltas, targets, rois, bbox_weight):
+ def get_loss(self,
+ scores,
+ deltas,
+ targets,
+ rois,
+ bbox_weight,
+ loss_normalize_pos=False):
"""
scores (Tensor): scores from bbox head outputs
deltas (Tensor): deltas from bbox head outputs
@@ -279,8 +293,15 @@ class BBoxHead(nn.Layer):
else:
tgt_labels = tgt_labels.cast('int64')
tgt_labels.stop_gradient = True
- loss_bbox_cls = F.cross_entropy(
- input=scores, label=tgt_labels, reduction='mean')
+
+ if not loss_normalize_pos:
+ loss_bbox_cls = F.cross_entropy(
+ input=scores, label=tgt_labels, reduction='mean')
+ else:
+ loss_bbox_cls = F.cross_entropy(
+ input=scores, label=tgt_labels,
+ reduction='none').sum() / (tgt_labels.shape[0] + 1e-7)
+
loss_bbox[cls_name] = loss_bbox_cls
# bbox reg
@@ -321,9 +342,16 @@ class BBoxHead(nn.Layer):
if self.bbox_loss is not None:
reg_delta = self.bbox_transform(reg_delta)
reg_target = self.bbox_transform(reg_target)
- loss_bbox_reg = self.bbox_loss(
- reg_delta, reg_target).sum() / tgt_labels.shape[0]
- loss_bbox_reg *= self.num_classes
+
+ if not loss_normalize_pos:
+ loss_bbox_reg = self.bbox_loss(
+ reg_delta, reg_target).sum() / tgt_labels.shape[0]
+ loss_bbox_reg *= self.num_classes
+
+ else:
+ loss_bbox_reg = self.bbox_loss(
+ reg_delta, reg_target).sum() / (tgt_labels.shape[0] + 1e-7)
+
else:
loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum(
) / tgt_labels.shape[0]
diff --git a/paddlers/models/ppdet/modeling/heads/cascade_head.py b/paddlers/models/ppdet/modeling/heads/cascade_head.py
index a9ad574..d2acf4a 100644
--- a/paddlers/models/ppdet/modeling/heads/cascade_head.py
+++ b/paddlers/models/ppdet/modeling/heads/cascade_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -22,6 +22,7 @@ from .bbox_head import BBoxHead, TwoFCHead, XConvNormHead
from .roi_extractor import RoIAlign
from ..shape_spec import ShapeSpec
from ..bbox_utils import delta2bbox, clip_bbox, nonempty_bbox
+from ..cls_utils import _get_class_default_kwargs
__all__ = ['CascadeTwoFCHead', 'CascadeXConvNormHead', 'CascadeHead']
@@ -82,7 +83,7 @@ class CascadeXConvNormHead(nn.Layer):
conv_dim (int): The number of channels for the conv layers
out_channel (int): Output channels
resolution (int): Resolution of input feature map
- norm_type (string): Norm type, bn, gn, sync_bn are available,
+ norm_type (string): Norm type, bn, gn, sync_bn are available,
default `gn`
freeze_norm (bool): Whether to freeze the norm
num_cascade_stage (int): The number of cascade stage, default 3
@@ -142,10 +143,10 @@ class CascadeHead(BBoxHead):
head (nn.Layer): Extract feature in bbox head
in_channel (int): Input channel after RoI extractor
roi_extractor (object): The module of RoI Extractor
- bbox_assigner (object): The module of Box Assigner, label and sample the
+ bbox_assigner (object): The module of Box Assigner, label and sample the
box.
num_classes (int): The number of classes
- bbox_weight (List[List[float]]): The weight to get the decode box and the
+ bbox_weight (List[List[float]]): The weight to get the decode box and the
length of weight is the number of cascade stage
num_cascade_stages (int): THe number of stage to refine the box
"""
@@ -153,13 +154,18 @@ class CascadeHead(BBoxHead):
def __init__(self,
head,
in_channel,
- roi_extractor=RoIAlign().__dict__,
+ roi_extractor=_get_class_default_kwargs(RoIAlign),
bbox_assigner='BboxAssigner',
num_classes=80,
bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0],
[30.0, 30.0, 15.0, 15.0]],
num_cascade_stages=3,
- bbox_loss=None):
+ bbox_loss=None,
+ reg_class_agnostic=True,
+ stage_loss_weights=None,
+ loss_normalize_pos=False,
+ add_gt_as_proposals=[True, False, False]):
+
nn.Layer.__init__(self, )
self.head = head
self.roi_extractor = roi_extractor
@@ -171,6 +177,18 @@ class CascadeHead(BBoxHead):
self.bbox_weight = bbox_weight
self.num_cascade_stages = num_cascade_stages
self.bbox_loss = bbox_loss
+ self.stage_loss_weights = [
+ 1. / num_cascade_stages for _ in range(num_cascade_stages)
+ ] if stage_loss_weights is None else stage_loss_weights
+ self.add_gt_as_proposals = add_gt_as_proposals
+
+ assert len(
+ self.stage_loss_weights
+ ) == num_cascade_stages, f'stage_loss_weights({len(self.stage_loss_weights)}) do not equal to num_cascade_stages({num_cascade_stages})'
+
+ self.reg_class_agnostic = reg_class_agnostic
+ num_bbox_delta = 4 if reg_class_agnostic else 4 * num_classes
+ self.loss_normalize_pos = loss_normalize_pos
self.bbox_score_list = []
self.bbox_delta_list = []
@@ -189,7 +207,7 @@ class CascadeHead(BBoxHead):
delta_name,
nn.Linear(
in_channel,
- 4,
+ num_bbox_delta,
weight_attr=paddle.ParamAttr(initializer=Normal(
mean=0.0, std=0.001))))
self.bbox_score_list.append(bbox_score)
@@ -206,7 +224,11 @@ class CascadeHead(BBoxHead):
"""
targets = []
if self.training:
- rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
+ rois, rois_num, targets = self.bbox_assigner(
+ rois,
+ rois_num,
+ inputs,
+ add_gt_as_proposals=self.add_gt_as_proposals[0])
targets_list = [targets]
self.assigned_rois = (rois, rois_num)
self.assigned_targets = targets
@@ -219,13 +241,32 @@ class CascadeHead(BBoxHead):
inputs['im_shape'])
if self.training:
rois, rois_num, targets = self.bbox_assigner(
- rois, rois_num, inputs, i, is_cascade=True)
+ rois,
+ rois_num,
+ inputs,
+ i,
+ is_cascade=True,
+ add_gt_as_proposals=self.add_gt_as_proposals[i])
targets_list.append(targets)
rois_feat = self.roi_extractor(body_feats, rois, rois_num)
bbox_feat = self.head(rois_feat, i)
scores = self.bbox_score_list[i](bbox_feat)
deltas = self.bbox_delta_list[i](bbox_feat)
+
+ # TODO (lyuwenyu) Is it correct for only one class ?
+ if not self.reg_class_agnostic and i < self.num_cascade_stages - 1:
+ deltas = deltas.reshape([deltas.shape[0], self.num_classes, 4])
+ labels = scores[:, :-1].argmax(axis=-1)
+
+ if self.training:
+ deltas = deltas[paddle.arange(deltas.shape[0]), labels]
+ else:
+ deltas = deltas[((deltas + 10000) * F.one_hot(
+ labels, num_classes=self.num_classes).unsqueeze(-1) != 0
+ ).nonzero(as_tuple=True)].reshape(
+ [deltas.shape[0], 4])
+
head_out_list.append([scores, deltas, rois])
pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i])
@@ -233,11 +274,16 @@ class CascadeHead(BBoxHead):
loss = {}
for stage, value in enumerate(zip(head_out_list, targets_list)):
(scores, deltas, rois), targets = value
- loss_stage = self.get_loss(scores, deltas, targets, rois,
- self.bbox_weight[stage])
+ loss_stage = self.get_loss(
+ scores,
+ deltas,
+ targets,
+ rois,
+ self.bbox_weight[stage],
+ loss_normalize_pos=self.loss_normalize_pos)
for k, v in loss_stage.items():
loss[k + "_stage{}".format(
- stage)] = v / self.num_cascade_stages
+ stage)] = v * self.stage_loss_weights[stage]
return loss, bbox_feat
else:
@@ -266,6 +312,14 @@ class CascadeHead(BBoxHead):
num_prop = []
for p in proposals:
num_prop.append(p.shape[0])
+
+ # NOTE(dev): num_prob will be tagged as LoDTensorArray because it
+ # depends on batch_size under @to_static. However the argument
+ # num_or_sections in paddle.split does not support LoDTensorArray,
+ # so we use [-1] to replace it if num_prop is not list. The modification
+ # This ensures the correctness of both dynamic and static graphs.
+ if not isinstance(num_prop, list):
+ num_prop = [-1]
return pred_bbox.split(num_prop)
def get_prediction(self, head_out_list):
diff --git a/paddlers/models/ppdet/modeling/heads/centernet_head.py b/paddlers/models/ppdet/modeling/heads/centernet_head.py
old mode 100644
new mode 100755
diff --git a/paddlers/models/ppdet/modeling/heads/face_head.py b/paddlers/models/ppdet/modeling/heads/face_head.py
index 02dc13b..fa4b96c 100644
--- a/paddlers/models/ppdet/modeling/heads/face_head.py
+++ b/paddlers/models/ppdet/modeling/heads/face_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -17,6 +17,7 @@ import paddle.nn as nn
from paddlers.models.ppdet.core.workspace import register
from ..layers import AnchorGeneratorSSD
+from ..cls_utils import _get_class_default_kwargs
@register
@@ -39,7 +40,7 @@ class FaceHead(nn.Layer):
def __init__(self,
num_classes=80,
in_channels=[96, 96],
- anchor_generator=AnchorGeneratorSSD().__dict__,
+ anchor_generator=_get_class_default_kwargs(AnchorGeneratorSSD),
kernel_size=3,
padding=1,
conv_decay=0.,
diff --git a/paddlers/models/ppdet/modeling/heads/fcos_head.py b/paddlers/models/ppdet/modeling/heads/fcos_head.py
index 079d751..568053b 100644
--- a/paddlers/models/ppdet/modeling/heads/fcos_head.py
+++ b/paddlers/models/ppdet/modeling/heads/fcos_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -64,6 +64,8 @@ class FCOSFeat(nn.Layer):
norm_type='bn',
use_dcn=False):
super(FCOSFeat, self).__init__()
+ self.feat_in = feat_in
+ self.feat_out = feat_out
self.num_convs = num_convs
self.norm_type = norm_type
self.cls_subnet_convs = []
diff --git a/paddlers/models/ppdet/modeling/heads/gfl_head.py b/paddlers/models/ppdet/modeling/heads/gfl_head.py
index 15bbddd..5331b3b 100644
--- a/paddlers/models/ppdet/modeling/heads/gfl_head.py
+++ b/paddlers/models/ppdet/modeling/heads/gfl_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
@@ -29,7 +29,7 @@ from paddle.nn.initializer import Normal, Constant
from paddlers.models.ppdet.core.workspace import register
from paddlers.models.ppdet.modeling.layers import ConvNormLayer
-from paddlers.models.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance
+from paddlers.models.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance, batch_distance2bbox
from paddlers.models.ppdet.data.transform.atss_assigner import bbox_overlaps
@@ -79,7 +79,9 @@ class Integral(nn.Layer):
offsets from the box center in four directions, shape (N, 4).
"""
x = F.softmax(x.reshape([-1, self.reg_max + 1]), axis=1)
- x = F.linear(x, self.project).reshape([-1, 4])
+ x = F.linear(x, self.project)
+ if self.training:
+ x = x.reshape([-1, 4])
return x
@@ -241,18 +243,34 @@ class GFLHead(nn.Layer):
), "The size of fpn_feats is not equal to size of fpn_stride"
cls_logits_list = []
bboxes_reg_list = []
- for scale_reg, fpn_feat in zip(self.scales_regs, fpn_feats):
+ for stride, scale_reg, fpn_feat in zip(self.fpn_stride,
+ self.scales_regs, fpn_feats):
conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat)
- cls_logits = self.gfl_head_cls(conv_cls_feat)
- bbox_reg = scale_reg(self.gfl_head_reg(conv_reg_feat))
+ cls_score = self.gfl_head_cls(conv_cls_feat)
+ bbox_pred = scale_reg(self.gfl_head_reg(conv_reg_feat))
if self.dgqp_module:
- quality_score = self.dgqp_module(bbox_reg)
- cls_logits = F.sigmoid(cls_logits) * quality_score
+ quality_score = self.dgqp_module(bbox_pred)
+ cls_score = F.sigmoid(cls_score) * quality_score
if not self.training:
- cls_logits = F.sigmoid(cls_logits.transpose([0, 2, 3, 1]))
- bbox_reg = bbox_reg.transpose([0, 2, 3, 1])
- cls_logits_list.append(cls_logits)
- bboxes_reg_list.append(bbox_reg)
+ cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
+ bbox_pred = bbox_pred.transpose([0, 2, 3, 1])
+ b, cell_h, cell_w, _ = paddle.shape(cls_score)
+ y, x = self.get_single_level_center_point(
+ [cell_h, cell_w], stride, cell_offset=self.cell_offset)
+ center_points = paddle.stack([x, y], axis=-1)
+ cls_score = cls_score.reshape([b, -1, self.cls_out_channels])
+ bbox_pred = self.distribution_project(bbox_pred) * stride
+ bbox_pred = bbox_pred.reshape([-1, cell_h * cell_w, 4])
+
+ # NOTE: If keep_ratio=False and image shape value that
+ # multiples of 32, distance2bbox not set max_shapes parameter
+ # to speed up model prediction. If need to set max_shapes,
+ # please use inputs['im_shape'].
+ bbox_pred = batch_distance2bbox(
+ center_points, bbox_pred, max_shapes=None)
+
+ cls_logits_list.append(cls_score)
+ bboxes_reg_list.append(bbox_pred)
return (cls_logits_list, bboxes_reg_list)
@@ -370,7 +388,7 @@ class GFLHead(nn.Layer):
avg_factor = sum(avg_factor)
try:
- avg_factor = paddle.distributed.all_reduce(avg_factor.clone())
+ paddle.distributed.all_reduce(avg_factor)
avg_factor = paddle.clip(
avg_factor / paddle.distributed.get_world_size(), min=1)
except:
@@ -410,71 +428,13 @@ class GFLHead(nn.Layer):
x = x.flatten()
return y, x
- def get_bboxes_single(self,
- cls_scores,
- bbox_preds,
- img_shape,
- scale_factor,
- rescale=True,
- cell_offset=0):
- assert len(cls_scores) == len(bbox_preds)
- mlvl_bboxes = []
- mlvl_scores = []
- for stride, cls_score, bbox_pred in zip(self.fpn_stride, cls_scores,
- bbox_preds):
- featmap_size = [
- paddle.shape(cls_score)[0], paddle.shape(cls_score)[1]
- ]
- y, x = self.get_single_level_center_point(
- featmap_size, stride, cell_offset=cell_offset)
- center_points = paddle.stack([x, y], axis=-1)
- scores = cls_score.reshape([-1, self.cls_out_channels])
- bbox_pred = self.distribution_project(bbox_pred) * stride
-
- if scores.shape[0] > self.nms_pre:
- max_scores = scores.max(axis=1)
- _, topk_inds = max_scores.topk(self.nms_pre)
- center_points = center_points.gather(topk_inds)
- bbox_pred = bbox_pred.gather(topk_inds)
- scores = scores.gather(topk_inds)
-
- bboxes = distance2bbox(
- center_points, bbox_pred, max_shape=img_shape)
- mlvl_bboxes.append(bboxes)
- mlvl_scores.append(scores)
- mlvl_bboxes = paddle.concat(mlvl_bboxes)
- if rescale:
- # [h_scale, w_scale] to [w_scale, h_scale, w_scale, h_scale]
- im_scale = paddle.concat([scale_factor[::-1], scale_factor[::-1]])
- mlvl_bboxes /= im_scale
- mlvl_scores = paddle.concat(mlvl_scores)
- mlvl_scores = mlvl_scores.transpose([1, 0])
- return mlvl_bboxes, mlvl_scores
-
- def decode(self, cls_scores, bbox_preds, im_shape, scale_factor,
- cell_offset):
- batch_bboxes = []
- batch_scores = []
- for img_id in range(cls_scores[0].shape[0]):
- num_levels = len(cls_scores)
- cls_score_list = [cls_scores[i][img_id] for i in range(num_levels)]
- bbox_pred_list = [bbox_preds[i][img_id] for i in range(num_levels)]
- bboxes, scores = self.get_bboxes_single(
- cls_score_list,
- bbox_pred_list,
- im_shape[img_id],
- scale_factor[img_id],
- cell_offset=cell_offset)
- batch_bboxes.append(bboxes)
- batch_scores.append(scores)
- batch_bboxes = paddle.stack(batch_bboxes, axis=0)
- batch_scores = paddle.stack(batch_scores, axis=0)
-
- return batch_bboxes, batch_scores
-
def post_process(self, gfl_head_outs, im_shape, scale_factor):
cls_scores, bboxes_reg = gfl_head_outs
- bboxes, score = self.decode(cls_scores, bboxes_reg, im_shape,
- scale_factor, self.cell_offset)
- bbox_pred, bbox_num, _ = self.nms(bboxes, score)
+ bboxes = paddle.concat(bboxes_reg, axis=1)
+ # rescale: [h_scale, w_scale] -> [w_scale, h_scale, w_scale, h_scale]
+ im_scale = scale_factor.flip([1]).tile([1, 2]).unsqueeze(1)
+ bboxes /= im_scale
+ mlvl_scores = paddle.concat(cls_scores, axis=1)
+ mlvl_scores = mlvl_scores.transpose([0, 2, 1])
+ bbox_pred, bbox_num, _ = self.nms(bboxes, mlvl_scores)
return bbox_pred, bbox_num
diff --git a/paddlers/models/ppdet/modeling/heads/keypoint_hrhrnet_head.py b/paddlers/models/ppdet/modeling/heads/keypoint_hrhrnet_head.py
index 4c47474..fc9e2f5 100644
--- a/paddlers/models/ppdet/modeling/heads/keypoint_hrhrnet_head.py
+++ b/paddlers/models/ppdet/modeling/heads/keypoint_hrhrnet_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
diff --git a/paddlers/models/ppdet/modeling/heads/mask_head.py b/paddlers/models/ppdet/modeling/heads/mask_head.py
index 01fbc39..63f8d91 100644
--- a/paddlers/models/ppdet/modeling/heads/mask_head.py
+++ b/paddlers/models/ppdet/modeling/heads/mask_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -20,6 +20,7 @@ from paddle.nn.initializer import KaimingNormal
from paddlers.models.ppdet.core.workspace import register, create
from paddlers.models.ppdet.modeling.layers import ConvNormLayer
from .roi_extractor import RoIAlign
+from ..cls_utils import _get_class_default_kwargs
@register
@@ -103,7 +104,7 @@ class MaskFeat(nn.Layer):
@register
class MaskHead(nn.Layer):
- __shared__ = ['num_classes']
+ __shared__ = ['num_classes', 'export_onnx']
__inject__ = ['mask_assigner']
"""
RCNN mask head
@@ -111,7 +112,7 @@ class MaskHead(nn.Layer):
Args:
head (nn.Layer): Extract feature in mask head
roi_extractor (object): The module of RoI Extractor
- mask_assigner (object): The module of Mask Assigner,
+ mask_assigner (object): The module of Mask Assigner,
label and sample the mask
num_classes (int): The number of classes
share_bbox_feat (bool): Whether to share the feature from bbox head,
@@ -120,12 +121,14 @@ class MaskHead(nn.Layer):
def __init__(self,
head,
- roi_extractor=RoIAlign().__dict__,
+ roi_extractor=_get_class_default_kwargs(RoIAlign),
mask_assigner='MaskAssigner',
num_classes=80,
- share_bbox_feat=False):
+ share_bbox_feat=False,
+ export_onnx=False):
super(MaskHead, self).__init__()
self.num_classes = num_classes
+ self.export_onnx = export_onnx
self.roi_extractor = roi_extractor
if isinstance(roi_extractor, dict):
@@ -206,8 +209,8 @@ class MaskHead(nn.Layer):
rois_num (Tensor): The number of prediction for each batch
scale_factor (Tensor): The scale factor from origin size to input size
"""
- if rois.shape[0] == 0:
- mask_out = paddle.full([1, 1, 1, 1], -1)
+ if not self.export_onnx and rois.shape[0] == 0:
+ mask_out = paddle.full([1, 1, 1], -1)
else:
bbox = [rois[:, 2:]]
labels = rois[:, 0].cast('int32')
@@ -218,19 +221,17 @@ class MaskHead(nn.Layer):
mask_feat = self.head(rois_feat)
mask_logit = self.mask_fcn_logits(mask_feat)
- mask_num_class = mask_logit.shape[1]
- if mask_num_class == 1:
- mask_out = F.sigmoid(mask_logit)
+ if self.num_classes == 1:
+ mask_out = F.sigmoid(mask_logit)[:, 0, :, :]
else:
- num_masks = mask_logit.shape[0]
- mask_out = []
- # TODO: need to optimize gather
- for i in range(mask_logit.shape[0]):
- pred_masks = paddle.unsqueeze(
- mask_logit[i, :, :, :], axis=0)
- mask = paddle.gather(pred_masks, labels[i], axis=1)
- mask_out.append(mask)
- mask_out = F.sigmoid(paddle.concat(mask_out))
+ num_masks = paddle.shape(mask_logit)[0]
+ index = paddle.arange(num_masks).cast('int32')
+ mask_out = mask_logit[index, labels]
+ mask_out_shape = paddle.shape(mask_out)
+ mask_out = paddle.reshape(mask_out, [
+ paddle.shape(index), mask_out_shape[-2], mask_out_shape[-1]
+ ])
+ mask_out = F.sigmoid(mask_out)
return mask_out
def forward(self,
diff --git a/paddlers/models/ppdet/modeling/heads/pico_head.py b/paddlers/models/ppdet/modeling/heads/pico_head.py
index a6915f4..9f25051 100644
--- a/paddlers/models/ppdet/modeling/heads/pico_head.py
+++ b/paddlers/models/ppdet/modeling/heads/pico_head.py
@@ -24,9 +24,36 @@ import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Normal, Constant
+from paddlers.models.ppdet.modeling.ops import get_static_shape
+from ..initializer import normal_
+from ..assigners.utils import generate_anchors_for_grid_cell
+from ..bbox_utils import bbox_center, batch_distance2bbox, bbox2distance
from paddlers.models.ppdet.core.workspace import register
from paddlers.models.ppdet.modeling.layers import ConvNormLayer
from .simota_head import OTAVFLHead
+from .gfl_head import Integral, GFLHead
+from paddlers.models.ppdet.modeling.necks.csp_pan import DPModule
+
+eps = 1e-9
+
+__all__ = ['PicoHead', 'PicoHeadV2', 'PicoFeat']
+
+
+class PicoSE(nn.Layer):
+ def __init__(self, feat_channels):
+ super(PicoSE, self).__init__()
+ self.fc = nn.Conv2D(feat_channels, feat_channels, 1)
+ self.conv = ConvNormLayer(feat_channels, feat_channels, 1, 1)
+
+ self._init_weights()
+
+ def _init_weights(self):
+ normal_(self.fc.weight, std=0.001)
+
+ def forward(self, feat, avg_feat):
+ weight = F.sigmoid(self.fc(avg_feat))
+ out = self.conv(feat * weight)
+ return out
@register
@@ -39,6 +66,9 @@ class PicoFeat(nn.Layer):
feat_out (int): The channel number of output Tensor.
num_convs (int): The convolution number of the LiteGFLFeat.
norm_type (str): Normalization type, 'bn'/'sync_bn'/'gn'.
+ share_cls_reg (bool): Whether to share the cls and reg output.
+ act (str): The act of per layers.
+ use_se (bool): Whether to use se module.
"""
def __init__(self,
@@ -48,14 +78,20 @@ class PicoFeat(nn.Layer):
num_convs=2,
norm_type='bn',
share_cls_reg=False,
- act='hard_swish'):
+ act='hard_swish',
+ use_se=False):
super(PicoFeat, self).__init__()
self.num_convs = num_convs
self.norm_type = norm_type
self.share_cls_reg = share_cls_reg
self.act = act
+ self.use_se = use_se
self.cls_convs = []
self.reg_convs = []
+ if use_se:
+ assert share_cls_reg == True, \
+ 'In the case of using se, share_cls_reg must be set to True'
+ self.se = nn.LayerList()
for stage_idx in range(num_fpn_stride):
cls_subnet_convs = []
reg_subnet_convs = []
@@ -111,12 +147,16 @@ class PicoFeat(nn.Layer):
reg_subnet_convs.append(reg_conv_pw)
self.cls_convs.append(cls_subnet_convs)
self.reg_convs.append(reg_subnet_convs)
+ if use_se:
+ self.se.append(PicoSE(feat_out))
def act_func(self, x):
if self.act == "leaky_relu":
x = F.leaky_relu(x)
elif self.act == "hard_swish":
x = F.hardswish(x)
+ elif self.act == "relu6":
+ x = F.relu6(x)
return x
def forward(self, fpn_feat, stage_idx):
@@ -125,8 +165,13 @@ class PicoFeat(nn.Layer):
reg_feat = fpn_feat
for i in range(len(self.cls_convs[stage_idx])):
cls_feat = self.act_func(self.cls_convs[stage_idx][i](cls_feat))
+ reg_feat = cls_feat
if not self.share_cls_reg:
reg_feat = self.act_func(self.reg_convs[stage_idx][i](reg_feat))
+ if self.use_se:
+ avg_feat = F.adaptive_avg_pool2d(cls_feat, (1, 1))
+ se_feat = self.act_func(self.se[stage_idx](cls_feat, avg_feat))
+ return cls_feat, se_feat
return cls_feat, reg_feat
@@ -150,7 +195,7 @@ class PicoHead(OTAVFLHead):
'conv_feat', 'dgqp_module', 'loss_class', 'loss_dfl', 'loss_bbox',
'assigner', 'nms'
]
- __shared__ = ['num_classes']
+ __shared__ = ['num_classes', 'eval_size']
def __init__(self,
conv_feat='PicoFeat',
@@ -166,7 +211,8 @@ class PicoHead(OTAVFLHead):
feat_in_chan=96,
nms=None,
nms_pre=1000,
- cell_offset=0):
+ cell_offset=0,
+ eval_size=None):
super(PicoHead, self).__init__(
conv_feat=conv_feat,
dgqp_module=dgqp_module,
@@ -195,6 +241,7 @@ class PicoHead(OTAVFLHead):
self.nms = nms
self.nms_pre = nms_pre
self.cell_offset = cell_offset
+ self.eval_size = eval_size
self.use_sigmoid = self.loss_vfl.use_sigmoid
if self.use_sigmoid:
@@ -238,12 +285,50 @@ class PicoHead(OTAVFLHead):
bias_attr=ParamAttr(initializer=Constant(value=0))))
self.head_reg_list.append(head_reg)
- def forward(self, fpn_feats, deploy=False):
+ # initialize the anchor points
+ if self.eval_size:
+ self.anchor_points, self.stride_tensor = self._generate_anchors()
+
+ def forward(self, fpn_feats, export_post_process=True):
assert len(fpn_feats) == len(
self.fpn_stride
), "The size of fpn_feats is not equal to size of fpn_stride"
- cls_logits_list = []
- bboxes_reg_list = []
+
+ if self.training:
+ return self.forward_train(fpn_feats)
+ else:
+ return self.forward_eval(
+ fpn_feats, export_post_process=export_post_process)
+
+ def forward_train(self, fpn_feats):
+ cls_logits_list, bboxes_reg_list = [], []
+ for i, fpn_feat in enumerate(fpn_feats):
+ conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat, i)
+ if self.conv_feat.share_cls_reg:
+ cls_logits = self.head_cls_list[i](conv_cls_feat)
+ cls_score, bbox_pred = paddle.split(
+ cls_logits,
+ [self.cls_out_channels, 4 * (self.reg_max + 1)],
+ axis=1)
+ else:
+ cls_score = self.head_cls_list[i](conv_cls_feat)
+ bbox_pred = self.head_reg_list[i](conv_reg_feat)
+
+ if self.dgqp_module:
+ quality_score = self.dgqp_module(bbox_pred)
+ cls_score = F.sigmoid(cls_score) * quality_score
+
+ cls_logits_list.append(cls_score)
+ bboxes_reg_list.append(bbox_pred)
+
+ return (cls_logits_list, bboxes_reg_list)
+
+ def forward_eval(self, fpn_feats, export_post_process=True):
+ if self.eval_size:
+ anchor_points, stride_tensor = self.anchor_points, self.stride_tensor
+ else:
+ anchor_points, stride_tensor = self._generate_anchors(fpn_feats)
+ cls_logits_list, bboxes_reg_list = [], []
for i, fpn_feat in enumerate(fpn_feats):
conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat, i)
if self.conv_feat.share_cls_reg:
@@ -260,18 +345,439 @@ class PicoHead(OTAVFLHead):
quality_score = self.dgqp_module(bbox_pred)
cls_score = F.sigmoid(cls_score) * quality_score
- if deploy:
+ if not export_post_process:
# Now only supports batch size = 1 in deploy
# TODO(ygh): support batch size > 1
- cls_score = F.sigmoid(cls_score).reshape(
+ cls_score_out = F.sigmoid(cls_score).reshape(
[1, self.cls_out_channels, -1]).transpose([0, 2, 1])
bbox_pred = bbox_pred.reshape([1, (self.reg_max + 1) * 4,
-1]).transpose([0, 2, 1])
- elif not self.training:
- cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
+ else:
+ _, _, h, w = fpn_feat.shape
+ l = h * w
+ cls_score_out = F.sigmoid(
+ cls_score.reshape([-1, self.cls_out_channels, l]))
bbox_pred = bbox_pred.transpose([0, 2, 3, 1])
+ bbox_pred = self.distribution_project(bbox_pred)
+ bbox_pred = bbox_pred.reshape([-1, l, 4])
- cls_logits_list.append(cls_score)
+ cls_logits_list.append(cls_score_out)
bboxes_reg_list.append(bbox_pred)
+ if export_post_process:
+ cls_logits_list = paddle.concat(cls_logits_list, axis=-1)
+ bboxes_reg_list = paddle.concat(bboxes_reg_list, axis=1)
+ bboxes_reg_list = batch_distance2bbox(anchor_points,
+ bboxes_reg_list)
+ bboxes_reg_list *= stride_tensor
+
return (cls_logits_list, bboxes_reg_list)
+
+ def _generate_anchors(self, feats=None):
+ # just use in eval time
+ anchor_points = []
+ stride_tensor = []
+ for i, stride in enumerate(self.fpn_stride):
+ if feats is not None:
+ _, _, h, w = feats[i].shape
+ else:
+ h = math.ceil(self.eval_size[0] / stride)
+ w = math.ceil(self.eval_size[1] / stride)
+ shift_x = paddle.arange(end=w) + self.cell_offset
+ shift_y = paddle.arange(end=h) + self.cell_offset
+ shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
+ anchor_point = paddle.cast(
+ paddle.stack(
+ [shift_x, shift_y], axis=-1), dtype='float32')
+ anchor_points.append(anchor_point.reshape([-1, 2]))
+ stride_tensor.append(
+ paddle.full(
+ [h * w, 1], stride, dtype='float32'))
+ anchor_points = paddle.concat(anchor_points)
+ stride_tensor = paddle.concat(stride_tensor)
+ return anchor_points, stride_tensor
+
+ def post_process(self, head_outs, scale_factor, export_nms=True):
+ pred_scores, pred_bboxes = head_outs
+ if not export_nms:
+ return pred_bboxes, pred_scores
+ else:
+ # rescale: [h_scale, w_scale] -> [w_scale, h_scale, w_scale, h_scale]
+ scale_y, scale_x = paddle.split(scale_factor, 2, axis=-1)
+ scale_factor = paddle.concat(
+ [scale_x, scale_y, scale_x, scale_y],
+ axis=-1).reshape([-1, 1, 4])
+ # scale bbox to origin image size.
+ pred_bboxes /= scale_factor
+ bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
+ return bbox_pred, bbox_num
+
+
+@register
+class PicoHeadV2(GFLHead):
+ """
+ PicoHeadV2
+ Args:
+ conv_feat (object): Instance of 'PicoFeat'
+ num_classes (int): Number of classes
+ fpn_stride (list): The stride of each FPN Layer
+ prior_prob (float): Used to set the bias init for the class prediction layer
+ loss_class (object): Instance of VariFocalLoss.
+ loss_dfl (object): Instance of DistributionFocalLoss.
+ loss_bbox (object): Instance of bbox loss.
+ assigner (object): Instance of label assigner.
+ reg_max: Max value of integral set :math: `{0, ..., reg_max}`
+ n QFL setting. Default: 7.
+ """
+ __inject__ = [
+ 'conv_feat', 'dgqp_module', 'loss_class', 'loss_dfl', 'loss_bbox',
+ 'static_assigner', 'assigner', 'nms'
+ ]
+ __shared__ = ['num_classes', 'eval_size']
+
+ def __init__(self,
+ conv_feat='PicoFeatV2',
+ dgqp_module=None,
+ num_classes=80,
+ fpn_stride=[8, 16, 32],
+ prior_prob=0.01,
+ use_align_head=True,
+ loss_class='VariFocalLoss',
+ loss_dfl='DistributionFocalLoss',
+ loss_bbox='GIoULoss',
+ static_assigner_epoch=60,
+ static_assigner='ATSSAssigner',
+ assigner='TaskAlignedAssigner',
+ reg_max=16,
+ feat_in_chan=96,
+ nms=None,
+ nms_pre=1000,
+ cell_offset=0,
+ act='hard_swish',
+ grid_cell_scale=5.0,
+ eval_size=None):
+ super(PicoHeadV2, self).__init__(
+ conv_feat=conv_feat,
+ dgqp_module=dgqp_module,
+ num_classes=num_classes,
+ fpn_stride=fpn_stride,
+ prior_prob=prior_prob,
+ loss_class=loss_class,
+ loss_dfl=loss_dfl,
+ loss_bbox=loss_bbox,
+ reg_max=reg_max,
+ feat_in_chan=feat_in_chan,
+ nms=nms,
+ nms_pre=nms_pre,
+ cell_offset=cell_offset, )
+ self.conv_feat = conv_feat
+ self.num_classes = num_classes
+ self.fpn_stride = fpn_stride
+ self.prior_prob = prior_prob
+ self.loss_vfl = loss_class
+ self.loss_dfl = loss_dfl
+ self.loss_bbox = loss_bbox
+
+ self.static_assigner_epoch = static_assigner_epoch
+ self.static_assigner = static_assigner
+ self.assigner = assigner
+
+ self.reg_max = reg_max
+ self.feat_in_chan = feat_in_chan
+ self.nms = nms
+ self.nms_pre = nms_pre
+ self.cell_offset = cell_offset
+ self.act = act
+ self.grid_cell_scale = grid_cell_scale
+ self.use_align_head = use_align_head
+ self.cls_out_channels = self.num_classes
+ self.eval_size = eval_size
+
+ bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
+ # Clear the super class initialization
+ self.gfl_head_cls = None
+ self.gfl_head_reg = None
+ self.scales_regs = None
+
+ self.head_cls_list = []
+ self.head_reg_list = []
+ self.cls_align = nn.LayerList()
+
+ for i in range(len(fpn_stride)):
+ head_cls = self.add_sublayer(
+ "head_cls" + str(i),
+ nn.Conv2D(
+ in_channels=self.feat_in_chan,
+ out_channels=self.cls_out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0., std=0.01)),
+ bias_attr=ParamAttr(
+ initializer=Constant(value=bias_init_value))))
+ self.head_cls_list.append(head_cls)
+ head_reg = self.add_sublayer(
+ "head_reg" + str(i),
+ nn.Conv2D(
+ in_channels=self.feat_in_chan,
+ out_channels=4 * (self.reg_max + 1),
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0., std=0.01)),
+ bias_attr=ParamAttr(initializer=Constant(value=0))))
+ self.head_reg_list.append(head_reg)
+ if self.use_align_head:
+ self.cls_align.append(
+ DPModule(
+ self.feat_in_chan,
+ 1,
+ 5,
+ act=self.act,
+ use_act_in_out=False))
+
+ # initialize the anchor points
+ if self.eval_size:
+ self.anchor_points, self.stride_tensor = self._generate_anchors()
+
+ def forward(self, fpn_feats, export_post_process=True):
+ assert len(fpn_feats) == len(
+ self.fpn_stride
+ ), "The size of fpn_feats is not equal to size of fpn_stride"
+
+ if self.training:
+ return self.forward_train(fpn_feats)
+ else:
+ return self.forward_eval(
+ fpn_feats, export_post_process=export_post_process)
+
+ def forward_train(self, fpn_feats):
+ cls_score_list, reg_list, box_list = [], [], []
+ for i, (fpn_feat, stride) in enumerate(zip(fpn_feats, self.fpn_stride)):
+ b, _, h, w = get_static_shape(fpn_feat)
+ # task decomposition
+ conv_cls_feat, se_feat = self.conv_feat(fpn_feat, i)
+ cls_logit = self.head_cls_list[i](se_feat)
+ reg_pred = self.head_reg_list[i](se_feat)
+
+ # cls prediction and alignment
+ if self.use_align_head:
+ cls_prob = F.sigmoid(self.cls_align[i](conv_cls_feat))
+ cls_score = (F.sigmoid(cls_logit) * cls_prob + eps).sqrt()
+ else:
+ cls_score = F.sigmoid(cls_logit)
+
+ cls_score_out = cls_score.transpose([0, 2, 3, 1])
+ bbox_pred = reg_pred.transpose([0, 2, 3, 1])
+ b, cell_h, cell_w, _ = paddle.shape(cls_score_out)
+ y, x = self.get_single_level_center_point(
+ [cell_h, cell_w], stride, cell_offset=self.cell_offset)
+ center_points = paddle.stack([x, y], axis=-1)
+ cls_score_out = cls_score_out.reshape(
+ [b, -1, self.cls_out_channels])
+ bbox_pred = self.distribution_project(bbox_pred) * stride
+ bbox_pred = bbox_pred.reshape([b, cell_h * cell_w, 4])
+ bbox_pred = batch_distance2bbox(
+ center_points, bbox_pred, max_shapes=None)
+ cls_score_list.append(cls_score.flatten(2).transpose([0, 2, 1]))
+ reg_list.append(reg_pred.flatten(2).transpose([0, 2, 1]))
+ box_list.append(bbox_pred / stride)
+
+ cls_score_list = paddle.concat(cls_score_list, axis=1)
+ box_list = paddle.concat(box_list, axis=1)
+ reg_list = paddle.concat(reg_list, axis=1)
+ return cls_score_list, reg_list, box_list, fpn_feats
+
+ def forward_eval(self, fpn_feats, export_post_process=True):
+ if self.eval_size:
+ anchor_points, stride_tensor = self.anchor_points, self.stride_tensor
+ else:
+ anchor_points, stride_tensor = self._generate_anchors(fpn_feats)
+ cls_score_list, box_list = [], []
+ for i, (fpn_feat, stride) in enumerate(zip(fpn_feats, self.fpn_stride)):
+ _, _, h, w = fpn_feat.shape
+ # task decomposition
+ conv_cls_feat, se_feat = self.conv_feat(fpn_feat, i)
+ cls_logit = self.head_cls_list[i](se_feat)
+ reg_pred = self.head_reg_list[i](se_feat)
+
+ # cls prediction and alignment
+ if self.use_align_head:
+ cls_prob = F.sigmoid(self.cls_align[i](conv_cls_feat))
+ cls_score = (F.sigmoid(cls_logit) * cls_prob + eps).sqrt()
+ else:
+ cls_score = F.sigmoid(cls_logit)
+
+ if not export_post_process:
+ # Now only supports batch size = 1 in deploy
+ cls_score_list.append(
+ cls_score.reshape([1, self.cls_out_channels, -1]).transpose(
+ [0, 2, 1]))
+ box_list.append(
+ reg_pred.reshape([1, (self.reg_max + 1) * 4, -1]).transpose(
+ [0, 2, 1]))
+ else:
+ l = h * w
+ cls_score_out = cls_score.reshape(
+ [-1, self.cls_out_channels, l])
+ bbox_pred = reg_pred.transpose([0, 2, 3, 1])
+ bbox_pred = self.distribution_project(bbox_pred)
+ bbox_pred = bbox_pred.reshape([-1, l, 4])
+ cls_score_list.append(cls_score_out)
+ box_list.append(bbox_pred)
+
+ if export_post_process:
+ cls_score_list = paddle.concat(cls_score_list, axis=-1)
+ box_list = paddle.concat(box_list, axis=1)
+ box_list = batch_distance2bbox(anchor_points, box_list)
+ box_list *= stride_tensor
+
+ return cls_score_list, box_list
+
+ def get_loss(self, head_outs, gt_meta):
+ pred_scores, pred_regs, pred_bboxes, fpn_feats = head_outs
+ gt_labels = gt_meta['gt_class']
+ gt_bboxes = gt_meta['gt_bbox']
+ gt_scores = gt_meta['gt_score'] if 'gt_score' in gt_meta else None
+ num_imgs = gt_meta['im_id'].shape[0]
+ pad_gt_mask = gt_meta['pad_gt_mask']
+
+ anchors, _, num_anchors_list, stride_tensor_list = generate_anchors_for_grid_cell(
+ fpn_feats, self.fpn_stride, self.grid_cell_scale, self.cell_offset)
+
+ centers = bbox_center(anchors)
+
+ # label assignment
+ if gt_meta['epoch_id'] < self.static_assigner_epoch:
+ assigned_labels, assigned_bboxes, assigned_scores = self.static_assigner(
+ anchors,
+ num_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ pad_gt_mask,
+ bg_index=self.num_classes,
+ gt_scores=gt_scores,
+ pred_bboxes=pred_bboxes.detach() * stride_tensor_list)
+
+ else:
+ assigned_labels, assigned_bboxes, assigned_scores = self.assigner(
+ pred_scores.detach(),
+ pred_bboxes.detach() * stride_tensor_list,
+ centers,
+ num_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ pad_gt_mask,
+ bg_index=self.num_classes,
+ gt_scores=gt_scores)
+
+ assigned_bboxes /= stride_tensor_list
+
+ centers_shape = centers.shape
+ flatten_centers = centers.expand(
+ [num_imgs, centers_shape[0], centers_shape[1]]).reshape([-1, 2])
+ flatten_strides = stride_tensor_list.expand(
+ [num_imgs, centers_shape[0], 1]).reshape([-1, 1])
+ flatten_cls_preds = pred_scores.reshape([-1, self.num_classes])
+ flatten_regs = pred_regs.reshape([-1, 4 * (self.reg_max + 1)])
+ flatten_bboxes = pred_bboxes.reshape([-1, 4])
+ flatten_bbox_targets = assigned_bboxes.reshape([-1, 4])
+ flatten_labels = assigned_labels.reshape([-1])
+ flatten_assigned_scores = assigned_scores.reshape(
+ [-1, self.num_classes])
+
+ pos_inds = paddle.nonzero(
+ paddle.logical_and((flatten_labels >= 0),
+ (flatten_labels < self.num_classes)),
+ as_tuple=False).squeeze(1)
+
+ num_total_pos = len(pos_inds)
+
+ if num_total_pos > 0:
+ pos_bbox_targets = paddle.gather(
+ flatten_bbox_targets, pos_inds, axis=0)
+ pos_decode_bbox_pred = paddle.gather(
+ flatten_bboxes, pos_inds, axis=0)
+ pos_reg = paddle.gather(flatten_regs, pos_inds, axis=0)
+ pos_strides = paddle.gather(flatten_strides, pos_inds, axis=0)
+ pos_centers = paddle.gather(
+ flatten_centers, pos_inds, axis=0) / pos_strides
+
+ weight_targets = flatten_assigned_scores.detach()
+ weight_targets = paddle.gather(
+ weight_targets.max(axis=1, keepdim=True), pos_inds, axis=0)
+
+ pred_corners = pos_reg.reshape([-1, self.reg_max + 1])
+ target_corners = bbox2distance(pos_centers, pos_bbox_targets,
+ self.reg_max).reshape([-1])
+ # regression loss
+ loss_bbox = paddle.sum(
+ self.loss_bbox(pos_decode_bbox_pred,
+ pos_bbox_targets) * weight_targets)
+
+ # dfl loss
+ loss_dfl = self.loss_dfl(
+ pred_corners,
+ target_corners,
+ weight=weight_targets.expand([-1, 4]).reshape([-1]),
+ avg_factor=4.0)
+ else:
+ loss_bbox = paddle.zeros([1])
+ loss_dfl = paddle.zeros([1])
+
+ avg_factor = flatten_assigned_scores.sum()
+ if paddle.distributed.get_world_size() > 1:
+ paddle.distributed.all_reduce(avg_factor)
+ avg_factor = paddle.clip(
+ avg_factor / paddle.distributed.get_world_size(), min=1)
+ loss_vfl = self.loss_vfl(
+ flatten_cls_preds, flatten_assigned_scores, avg_factor=avg_factor)
+
+ loss_bbox = loss_bbox / avg_factor
+ loss_dfl = loss_dfl / avg_factor
+
+ loss_states = dict(
+ loss_vfl=loss_vfl, loss_bbox=loss_bbox, loss_dfl=loss_dfl)
+
+ return loss_states
+
+ def _generate_anchors(self, feats=None):
+ # just use in eval time
+ anchor_points = []
+ stride_tensor = []
+ for i, stride in enumerate(self.fpn_stride):
+ if feats is not None:
+ _, _, h, w = feats[i].shape
+ else:
+ h = math.ceil(self.eval_size[0] / stride)
+ w = math.ceil(self.eval_size[1] / stride)
+ shift_x = paddle.arange(end=w) + self.cell_offset
+ shift_y = paddle.arange(end=h) + self.cell_offset
+ shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
+ anchor_point = paddle.cast(
+ paddle.stack(
+ [shift_x, shift_y], axis=-1), dtype='float32')
+ anchor_points.append(anchor_point.reshape([-1, 2]))
+ stride_tensor.append(
+ paddle.full(
+ [h * w, 1], stride, dtype='float32'))
+ anchor_points = paddle.concat(anchor_points)
+ stride_tensor = paddle.concat(stride_tensor)
+ return anchor_points, stride_tensor
+
+ def post_process(self, head_outs, scale_factor, export_nms=True):
+ pred_scores, pred_bboxes = head_outs
+ if not export_nms:
+ return pred_bboxes, pred_scores
+ else:
+ # rescale: [h_scale, w_scale] -> [w_scale, h_scale, w_scale, h_scale]
+ scale_y, scale_x = paddle.split(scale_factor, 2, axis=-1)
+ scale_factor = paddle.concat(
+ [scale_x, scale_y, scale_x, scale_y],
+ axis=-1).reshape([-1, 1, 4])
+ # scale bbox to origin image size.
+ pred_bboxes /= scale_factor
+ bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
+ return bbox_pred, bbox_num
diff --git a/paddlers/models/ppdet/modeling/heads/ppyoloe_head.py b/paddlers/models/ppdet/modeling/heads/ppyoloe_head.py
new file mode 100644
index 0000000..48a2af7
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/heads/ppyoloe_head.py
@@ -0,0 +1,388 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddlers.models.ppdet.core.workspace import register
+
+from ..bbox_utils import batch_distance2bbox
+from ..losses import GIoULoss
+from ..initializer import bias_init_with_prob, constant_, normal_
+from ..assigners.utils import generate_anchors_for_grid_cell
+from paddlers.models.ppdet.modeling.backbones.cspresnet import ConvBNLayer
+from paddlers.models.ppdet.modeling.ops import get_static_shape, get_act_fn
+from paddlers.models.ppdet.modeling.layers import MultiClassNMS
+
+__all__ = ['PPYOLOEHead']
+
+
+class ESEAttn(nn.Layer):
+ def __init__(self, feat_channels, act='swish'):
+ super(ESEAttn, self).__init__()
+ self.fc = nn.Conv2D(feat_channels, feat_channels, 1)
+ self.conv = ConvBNLayer(feat_channels, feat_channels, 1, act=act)
+
+ self._init_weights()
+
+ def _init_weights(self):
+ normal_(self.fc.weight, std=0.001)
+
+ def forward(self, feat, avg_feat):
+ weight = F.sigmoid(self.fc(avg_feat))
+ return self.conv(feat * weight)
+
+
+@register
+class PPYOLOEHead(nn.Layer):
+ __shared__ = [
+ 'num_classes', 'eval_size', 'trt', 'exclude_nms', 'exclude_post_process'
+ ]
+ __inject__ = ['static_assigner', 'assigner', 'nms']
+
+ def __init__(self,
+ in_channels=[1024, 512, 256],
+ num_classes=80,
+ act='swish',
+ fpn_strides=(32, 16, 8),
+ grid_cell_scale=5.0,
+ grid_cell_offset=0.5,
+ reg_max=16,
+ static_assigner_epoch=4,
+ use_varifocal_loss=True,
+ static_assigner='ATSSAssigner',
+ assigner='TaskAlignedAssigner',
+ nms='MultiClassNMS',
+ eval_size=None,
+ loss_weight={
+ 'class': 1.0,
+ 'iou': 2.5,
+ 'dfl': 0.5,
+ },
+ trt=False,
+ exclude_nms=False,
+ exclude_post_process=False):
+ super(PPYOLOEHead, self).__init__()
+ assert len(in_channels) > 0, "len(in_channels) should > 0"
+ self.in_channels = in_channels
+ self.num_classes = num_classes
+ self.fpn_strides = fpn_strides
+ self.grid_cell_scale = grid_cell_scale
+ self.grid_cell_offset = grid_cell_offset
+ self.reg_max = reg_max
+ self.iou_loss = GIoULoss()
+ self.loss_weight = loss_weight
+ self.use_varifocal_loss = use_varifocal_loss
+ self.eval_size = eval_size
+
+ self.static_assigner_epoch = static_assigner_epoch
+ self.static_assigner = static_assigner
+ self.assigner = assigner
+ self.nms = nms
+ if isinstance(self.nms, MultiClassNMS) and trt:
+ self.nms.trt = trt
+ self.exclude_nms = exclude_nms
+ self.exclude_post_process = exclude_post_process
+ # stem
+ self.stem_cls = nn.LayerList()
+ self.stem_reg = nn.LayerList()
+ act = get_act_fn(
+ act, trt=trt) if act is None or isinstance(act,
+ (str, dict)) else act
+ for in_c in self.in_channels:
+ self.stem_cls.append(ESEAttn(in_c, act=act))
+ self.stem_reg.append(ESEAttn(in_c, act=act))
+ # pred head
+ self.pred_cls = nn.LayerList()
+ self.pred_reg = nn.LayerList()
+ for in_c in self.in_channels:
+ self.pred_cls.append(
+ nn.Conv2D(
+ in_c, self.num_classes, 3, padding=1))
+ self.pred_reg.append(
+ nn.Conv2D(
+ in_c, 4 * (self.reg_max + 1), 3, padding=1))
+ # projection conv
+ self.proj_conv = nn.Conv2D(self.reg_max + 1, 1, 1, bias_attr=False)
+ self.proj_conv.skip_quant = True
+ self._init_weights()
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
+
+ def _init_weights(self):
+ bias_cls = bias_init_with_prob(0.01)
+ for cls_, reg_ in zip(self.pred_cls, self.pred_reg):
+ constant_(cls_.weight)
+ constant_(cls_.bias, bias_cls)
+ constant_(reg_.weight)
+ constant_(reg_.bias, 1.0)
+
+ proj = paddle.linspace(0, self.reg_max, self.reg_max + 1).reshape(
+ [1, self.reg_max + 1, 1, 1])
+ self.proj_conv.weight.set_value(proj)
+ self.proj_conv.weight.stop_gradient = True
+ if self.eval_size:
+ anchor_points, stride_tensor = self._generate_anchors()
+ self.anchor_points = anchor_points
+ self.stride_tensor = stride_tensor
+
+ def forward_train(self, feats, targets):
+ anchors, anchor_points, num_anchors_list, stride_tensor = \
+ generate_anchors_for_grid_cell(
+ feats, self.fpn_strides, self.grid_cell_scale,
+ self.grid_cell_offset)
+
+ cls_score_list, reg_distri_list = [], []
+ for i, feat in enumerate(feats):
+ avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))
+ cls_logit = self.pred_cls[i](self.stem_cls[i](feat, avg_feat) +
+ feat)
+ reg_distri = self.pred_reg[i](self.stem_reg[i](feat, avg_feat))
+ # cls and reg
+ cls_score = F.sigmoid(cls_logit)
+ cls_score_list.append(cls_score.flatten(2).transpose([0, 2, 1]))
+ reg_distri_list.append(reg_distri.flatten(2).transpose([0, 2, 1]))
+ cls_score_list = paddle.concat(cls_score_list, axis=1)
+ reg_distri_list = paddle.concat(reg_distri_list, axis=1)
+
+ return self.get_loss([
+ cls_score_list, reg_distri_list, anchors, anchor_points,
+ num_anchors_list, stride_tensor
+ ], targets)
+
+ def _generate_anchors(self, feats=None, dtype='float32'):
+ # just use in eval time
+ anchor_points = []
+ stride_tensor = []
+ for i, stride in enumerate(self.fpn_strides):
+ if feats is not None:
+ _, _, h, w = feats[i].shape
+ else:
+ h = int(self.eval_size[0] / stride)
+ w = int(self.eval_size[1] / stride)
+ shift_x = paddle.arange(end=w) + self.grid_cell_offset
+ shift_y = paddle.arange(end=h) + self.grid_cell_offset
+ shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
+ anchor_point = paddle.cast(
+ paddle.stack(
+ [shift_x, shift_y], axis=-1), dtype=dtype)
+ anchor_points.append(anchor_point.reshape([-1, 2]))
+ stride_tensor.append(paddle.full([h * w, 1], stride, dtype=dtype))
+ anchor_points = paddle.concat(anchor_points)
+ stride_tensor = paddle.concat(stride_tensor)
+ return anchor_points, stride_tensor
+
+ def forward_eval(self, feats):
+ if self.eval_size:
+ anchor_points, stride_tensor = self.anchor_points, self.stride_tensor
+ else:
+ anchor_points, stride_tensor = self._generate_anchors(feats)
+ cls_score_list, reg_dist_list = [], []
+ for i, feat in enumerate(feats):
+ _, _, h, w = feat.shape
+ l = h * w
+ avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))
+ cls_logit = self.pred_cls[i](self.stem_cls[i](feat, avg_feat) +
+ feat)
+ reg_dist = self.pred_reg[i](self.stem_reg[i](feat, avg_feat))
+ reg_dist = reg_dist.reshape([-1, 4, self.reg_max + 1, l]).transpose(
+ [0, 2, 3, 1])
+ reg_dist = self.proj_conv(F.softmax(reg_dist, axis=1)).squeeze(1)
+ # cls and reg
+ cls_score = F.sigmoid(cls_logit)
+ cls_score_list.append(cls_score.reshape([-1, self.num_classes, l]))
+ reg_dist_list.append(reg_dist)
+
+ cls_score_list = paddle.concat(cls_score_list, axis=-1)
+ reg_dist_list = paddle.concat(reg_dist_list, axis=1)
+
+ return cls_score_list, reg_dist_list, anchor_points, stride_tensor
+
+ def forward(self, feats, targets=None):
+ assert len(feats) == len(self.fpn_strides), \
+ "The size of feats is not equal to size of fpn_strides"
+
+ if self.training:
+ return self.forward_train(feats, targets)
+ else:
+ return self.forward_eval(feats)
+
+ @staticmethod
+ def _focal_loss(score, label, alpha=0.25, gamma=2.0):
+ weight = (score - label).pow(gamma)
+ if alpha > 0:
+ alpha_t = alpha * label + (1 - alpha) * (1 - label)
+ weight *= alpha_t
+ loss = F.binary_cross_entropy(
+ score, label, weight=weight, reduction='sum')
+ return loss
+
+ @staticmethod
+ def _varifocal_loss(pred_score, gt_score, label, alpha=0.75, gamma=2.0):
+ weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
+ loss = F.binary_cross_entropy(
+ pred_score, gt_score, weight=weight, reduction='sum')
+ return loss
+
+ def _bbox_decode(self, anchor_points, pred_dist):
+ _, l, _ = get_static_shape(pred_dist)
+ pred_dist = F.softmax(pred_dist.reshape([-1, l, 4, self.reg_max + 1]))
+ pred_dist = self.proj_conv(pred_dist.transpose([0, 3, 1, 2])).squeeze(1)
+ return batch_distance2bbox(anchor_points, pred_dist)
+
+ def _bbox2distance(self, points, bbox):
+ x1y1, x2y2 = paddle.split(bbox, 2, -1)
+ lt = points - x1y1
+ rb = x2y2 - points
+ return paddle.concat([lt, rb], -1).clip(0, self.reg_max - 0.01)
+
+ def _df_loss(self, pred_dist, target):
+ target_left = paddle.cast(target, 'int64')
+ target_right = target_left + 1
+ weight_left = target_right.astype('float32') - target
+ weight_right = 1 - weight_left
+ loss_left = F.cross_entropy(
+ pred_dist, target_left, reduction='none') * weight_left
+ loss_right = F.cross_entropy(
+ pred_dist, target_right, reduction='none') * weight_right
+ return (loss_left + loss_right).mean(-1, keepdim=True)
+
+ def _bbox_loss(self, pred_dist, pred_bboxes, anchor_points, assigned_labels,
+ assigned_bboxes, assigned_scores, assigned_scores_sum):
+ # select positive samples mask
+ mask_positive = (assigned_labels != self.num_classes)
+ num_pos = mask_positive.sum()
+ # pos/neg loss
+ if num_pos > 0:
+ # l1 + iou
+ bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 4])
+ pred_bboxes_pos = paddle.masked_select(pred_bboxes,
+ bbox_mask).reshape([-1, 4])
+ assigned_bboxes_pos = paddle.masked_select(
+ assigned_bboxes, bbox_mask).reshape([-1, 4])
+ bbox_weight = paddle.masked_select(
+ assigned_scores.sum(-1), mask_positive).unsqueeze(-1)
+
+ loss_l1 = F.l1_loss(pred_bboxes_pos, assigned_bboxes_pos)
+
+ loss_iou = self.iou_loss(pred_bboxes_pos,
+ assigned_bboxes_pos) * bbox_weight
+ loss_iou = loss_iou.sum() / assigned_scores_sum
+
+ dist_mask = mask_positive.unsqueeze(-1).tile(
+ [1, 1, (self.reg_max + 1) * 4])
+ pred_dist_pos = paddle.masked_select(
+ pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1])
+ assigned_ltrb = self._bbox2distance(anchor_points, assigned_bboxes)
+ assigned_ltrb_pos = paddle.masked_select(
+ assigned_ltrb, bbox_mask).reshape([-1, 4])
+ loss_dfl = self._df_loss(pred_dist_pos,
+ assigned_ltrb_pos) * bbox_weight
+ loss_dfl = loss_dfl.sum() / assigned_scores_sum
+ else:
+ loss_l1 = paddle.zeros([1])
+ loss_iou = paddle.zeros([1])
+ loss_dfl = pred_dist.sum() * 0.
+ return loss_l1, loss_iou, loss_dfl
+
+ def get_loss(self, head_outs, gt_meta):
+ pred_scores, pred_distri, anchors,\
+ anchor_points, num_anchors_list, stride_tensor = head_outs
+
+ anchor_points_s = anchor_points / stride_tensor
+ pred_bboxes = self._bbox_decode(anchor_points_s, pred_distri)
+
+ gt_labels = gt_meta['gt_class']
+ gt_bboxes = gt_meta['gt_bbox']
+ pad_gt_mask = gt_meta['pad_gt_mask']
+ # label assignment
+ if gt_meta['epoch_id'] < self.static_assigner_epoch:
+ assigned_labels, assigned_bboxes, assigned_scores = \
+ self.static_assigner(
+ anchors,
+ num_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ pad_gt_mask,
+ bg_index=self.num_classes,
+ pred_bboxes=pred_bboxes.detach() * stride_tensor)
+ alpha_l = 0.25
+ else:
+ assigned_labels, assigned_bboxes, assigned_scores = \
+ self.assigner(
+ pred_scores.detach(),
+ pred_bboxes.detach() * stride_tensor,
+ anchor_points,
+ num_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ pad_gt_mask,
+ bg_index=self.num_classes)
+ alpha_l = -1
+ # rescale bbox
+ assigned_bboxes /= stride_tensor
+ # cls loss
+ if self.use_varifocal_loss:
+ one_hot_label = F.one_hot(assigned_labels,
+ self.num_classes + 1)[..., :-1]
+ loss_cls = self._varifocal_loss(pred_scores, assigned_scores,
+ one_hot_label)
+ else:
+ loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l)
+
+ assigned_scores_sum = assigned_scores.sum()
+ if paddle.distributed.get_world_size() > 1:
+ paddle.distributed.all_reduce(assigned_scores_sum)
+ assigned_scores_sum /= paddle.distributed.get_world_size()
+ assigned_scores_sum = paddle.clip(assigned_scores_sum, min=1.)
+ loss_cls /= assigned_scores_sum
+
+ loss_l1, loss_iou, loss_dfl = \
+ self._bbox_loss(pred_distri, pred_bboxes, anchor_points_s,
+ assigned_labels, assigned_bboxes, assigned_scores,
+ assigned_scores_sum)
+ loss = self.loss_weight['class'] * loss_cls + \
+ self.loss_weight['iou'] * loss_iou + \
+ self.loss_weight['dfl'] * loss_dfl
+ out_dict = {
+ 'loss': loss,
+ 'loss_cls': loss_cls,
+ 'loss_iou': loss_iou,
+ 'loss_dfl': loss_dfl,
+ 'loss_l1': loss_l1,
+ }
+ return out_dict
+
+ def post_process(self, head_outs, scale_factor):
+ pred_scores, pred_dist, anchor_points, stride_tensor = head_outs
+ pred_bboxes = batch_distance2bbox(anchor_points, pred_dist)
+ pred_bboxes *= stride_tensor
+ if self.exclude_post_process:
+ return paddle.concat(
+ [pred_bboxes, pred_scores.transpose([0, 2, 1])], axis=-1), None
+ else:
+ # scale bbox to origin
+ scale_y, scale_x = paddle.split(scale_factor, 2, axis=-1)
+ scale_factor = paddle.concat(
+ [scale_x, scale_y, scale_x, scale_y],
+ axis=-1).reshape([-1, 1, 4])
+ pred_bboxes /= scale_factor
+ if self.exclude_nms:
+ # `exclude_nms=True` just use in benchmark
+ return pred_bboxes, pred_scores
+ else:
+ bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
+ return bbox_pred, bbox_num
diff --git a/paddlers/models/ppdet/modeling/heads/retina_head.py b/paddlers/models/ppdet/modeling/heads/retina_head.py
new file mode 100644
index 0000000..b9939f4
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/heads/retina_head.py
@@ -0,0 +1,249 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Normal, Constant
+from paddlers.models.ppdet.modeling.bbox_utils import bbox2delta, delta2bbox
+from paddlers.models.ppdet.modeling.heads.fcos_head import FCOSFeat
+
+from paddlers.models.ppdet.core.workspace import register
+
+__all__ = ['RetinaHead']
+
+
+@register
+class RetinaFeat(FCOSFeat):
+ """We use FCOSFeat to construct conv layers in RetinaNet.
+ We rename FCOSFeat to RetinaFeat to avoid confusion.
+ """
+ pass
+
+
+@register
+class RetinaHead(nn.Layer):
+ """Used in RetinaNet proposed in paper https://arxiv.org/pdf/1708.02002.pdf
+ """
+ __shared__ = ['num_classes']
+ __inject__ = [
+ 'conv_feat', 'anchor_generator', 'bbox_assigner', 'loss_class',
+ 'loss_bbox', 'nms'
+ ]
+
+ def __init__(self,
+ num_classes=80,
+ conv_feat='RetinaFeat',
+ anchor_generator='RetinaAnchorGenerator',
+ bbox_assigner='MaxIoUAssigner',
+ loss_class='FocalLoss',
+ loss_bbox='SmoothL1Loss',
+ nms='MultiClassNMS',
+ prior_prob=0.01,
+ nms_pre=1000,
+ weights=[1., 1., 1., 1.]):
+ super(RetinaHead, self).__init__()
+ self.num_classes = num_classes
+ self.conv_feat = conv_feat
+ self.anchor_generator = anchor_generator
+ self.bbox_assigner = bbox_assigner
+ self.loss_class = loss_class
+ self.loss_bbox = loss_bbox
+ self.nms = nms
+ self.nms_pre = nms_pre
+ self.weights = weights
+
+ bias_init_value = -math.log((1 - prior_prob) / prior_prob)
+ num_anchors = self.anchor_generator.num_anchors
+ self.retina_cls = nn.Conv2D(
+ in_channels=self.conv_feat.feat_out,
+ out_channels=self.num_classes * num_anchors,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0.0, std=0.01)),
+ bias_attr=ParamAttr(initializer=Constant(value=bias_init_value)))
+ self.retina_reg = nn.Conv2D(
+ in_channels=self.conv_feat.feat_out,
+ out_channels=4 * num_anchors,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0.0, std=0.01)),
+ bias_attr=ParamAttr(initializer=Constant(value=0)))
+
+ def forward(self, neck_feats, targets=None):
+ cls_logits_list = []
+ bboxes_reg_list = []
+ for neck_feat in neck_feats:
+ conv_cls_feat, conv_reg_feat = self.conv_feat(neck_feat)
+ cls_logits = self.retina_cls(conv_cls_feat)
+ bbox_reg = self.retina_reg(conv_reg_feat)
+ cls_logits_list.append(cls_logits)
+ bboxes_reg_list.append(bbox_reg)
+
+ if self.training:
+ return self.get_loss([cls_logits_list, bboxes_reg_list], targets)
+ else:
+ return [cls_logits_list, bboxes_reg_list]
+
+ def get_loss(self, head_outputs, targets):
+ """Here we calculate loss for a batch of images.
+ We assign anchors to gts in each image and gather all the assigned
+ postive and negative samples. Then loss is calculated on the gathered
+ samples.
+ """
+ cls_logits_list, bboxes_reg_list = head_outputs
+ anchors = self.anchor_generator(cls_logits_list)
+ anchors = paddle.concat(anchors)
+
+ # matches: contain gt_inds
+ # match_labels: -1(ignore), 0(neg) or 1(pos)
+ matches_list, match_labels_list = [], []
+ # assign anchors to gts, no sampling is involved
+ for gt_bbox in targets['gt_bbox']:
+ matches, match_labels = self.bbox_assigner(anchors, gt_bbox)
+ matches_list.append(matches)
+ match_labels_list.append(match_labels)
+
+ # reshape network outputs
+ cls_logits = [
+ _.transpose([0, 2, 3, 1]).reshape([0, -1, self.num_classes])
+ for _ in cls_logits_list
+ ]
+ bboxes_reg = [
+ _.transpose([0, 2, 3, 1]).reshape([0, -1, 4])
+ for _ in bboxes_reg_list
+ ]
+ cls_logits = paddle.concat(cls_logits, axis=1)
+ bboxes_reg = paddle.concat(bboxes_reg, axis=1)
+
+ cls_pred_list, cls_tar_list = [], []
+ reg_pred_list, reg_tar_list = [], []
+ # find and gather preds and targets in each image
+ for matches, match_labels, cls_logit, bbox_reg, gt_bbox, gt_class in \
+ zip(matches_list, match_labels_list, cls_logits, bboxes_reg,
+ targets['gt_bbox'], targets['gt_class']):
+ pos_mask = (match_labels == 1)
+ neg_mask = (match_labels == 0)
+ chosen_mask = paddle.logical_or(pos_mask, neg_mask)
+
+ gt_class = gt_class.reshape([-1])
+ bg_class = paddle.to_tensor(
+ [self.num_classes], dtype=gt_class.dtype)
+ # a trick to assign num_classes to negative targets
+ gt_class = paddle.concat([gt_class, bg_class], axis=-1)
+ matches = paddle.where(neg_mask,
+ paddle.full_like(matches, gt_class.size - 1),
+ matches)
+
+ cls_pred = cls_logit[chosen_mask]
+ cls_tar = gt_class[matches[chosen_mask]]
+ reg_pred = bbox_reg[pos_mask].reshape([-1, 4])
+ reg_tar = gt_bbox[matches[pos_mask]].reshape([-1, 4])
+ reg_tar = bbox2delta(anchors[pos_mask], reg_tar, self.weights)
+ cls_pred_list.append(cls_pred)
+ cls_tar_list.append(cls_tar)
+ reg_pred_list.append(reg_pred)
+ reg_tar_list.append(reg_tar)
+ cls_pred = paddle.concat(cls_pred_list)
+ cls_tar = paddle.concat(cls_tar_list)
+ reg_pred = paddle.concat(reg_pred_list)
+ reg_tar = paddle.concat(reg_tar_list)
+
+ avg_factor = max(1.0, reg_pred.shape[0])
+ cls_loss = self.loss_class(
+ cls_pred, cls_tar, reduction='sum') / avg_factor
+
+ if reg_pred.shape[0] == 0:
+ reg_loss = paddle.zeros([1])
+ reg_loss.stop_gradient = False
+ else:
+ reg_loss = self.loss_bbox(
+ reg_pred, reg_tar, reduction='sum') / avg_factor
+
+ loss = cls_loss + reg_loss
+ out_dict = {
+ 'loss_cls': cls_loss,
+ 'loss_reg': reg_loss,
+ 'loss': loss,
+ }
+ return out_dict
+
+ def get_bboxes_single(self,
+ anchors,
+ cls_scores_list,
+ bbox_preds_list,
+ im_shape,
+ scale_factor,
+ rescale=True):
+ assert len(cls_scores_list) == len(bbox_preds_list)
+ mlvl_bboxes = []
+ mlvl_scores = []
+ for anchor, cls_score, bbox_pred in zip(anchors, cls_scores_list,
+ bbox_preds_list):
+ cls_score = cls_score.reshape([-1, self.num_classes])
+ bbox_pred = bbox_pred.reshape([-1, 4])
+ if self.nms_pre is not None and cls_score.shape[0] > self.nms_pre:
+ max_score = cls_score.max(axis=1)
+ _, topk_inds = max_score.topk(self.nms_pre)
+ bbox_pred = bbox_pred.gather(topk_inds)
+ anchor = anchor.gather(topk_inds)
+ cls_score = cls_score.gather(topk_inds)
+ bbox_pred = delta2bbox(bbox_pred, anchor, self.weights).squeeze()
+ mlvl_bboxes.append(bbox_pred)
+ mlvl_scores.append(F.sigmoid(cls_score))
+ mlvl_bboxes = paddle.concat(mlvl_bboxes)
+ mlvl_bboxes = paddle.squeeze(mlvl_bboxes)
+ if rescale:
+ mlvl_bboxes = mlvl_bboxes / paddle.concat(
+ [scale_factor[::-1], scale_factor[::-1]])
+ mlvl_scores = paddle.concat(mlvl_scores)
+ mlvl_scores = mlvl_scores.transpose([1, 0])
+ return mlvl_bboxes, mlvl_scores
+
+ def decode(self, anchors, cls_logits, bboxes_reg, im_shape, scale_factor):
+ batch_bboxes = []
+ batch_scores = []
+ for img_id in range(cls_logits[0].shape[0]):
+ num_lvls = len(cls_logits)
+ cls_scores_list = [cls_logits[i][img_id] for i in range(num_lvls)]
+ bbox_preds_list = [bboxes_reg[i][img_id] for i in range(num_lvls)]
+ bboxes, scores = self.get_bboxes_single(
+ anchors, cls_scores_list, bbox_preds_list, im_shape[img_id],
+ scale_factor[img_id])
+ batch_bboxes.append(bboxes)
+ batch_scores.append(scores)
+ batch_bboxes = paddle.stack(batch_bboxes, axis=0)
+ batch_scores = paddle.stack(batch_scores, axis=0)
+ return batch_bboxes, batch_scores
+
+ def post_process(self, head_outputs, im_shape, scale_factor):
+ cls_logits_list, bboxes_reg_list = head_outputs
+ anchors = self.anchor_generator(cls_logits_list)
+ cls_logits = [_.transpose([0, 2, 3, 1]) for _ in cls_logits_list]
+ bboxes_reg = [_.transpose([0, 2, 3, 1]) for _ in bboxes_reg_list]
+ bboxes, scores = self.decode(anchors, cls_logits, bboxes_reg, im_shape,
+ scale_factor)
+
+ bbox_pred, bbox_num, _ = self.nms(bboxes, scores)
+ return bbox_pred, bbox_num
diff --git a/paddlers/models/ppdet/modeling/heads/roi_extractor.py b/paddlers/models/ppdet/modeling/heads/roi_extractor.py
index 78646e6..7c6991b 100644
--- a/paddlers/models/ppdet/modeling/heads/roi_extractor.py
+++ b/paddlers/models/ppdet/modeling/heads/roi_extractor.py
@@ -29,7 +29,7 @@ class RoIAlign(object):
RoI Align module
For more details, please refer to the document of roi_align in
- in ppdet/modeing/ops.py
+ in https://github.com/PaddlePaddle/Paddle/blob/release/2.5/python/paddle/vision/ops.py
Args:
resolution (int): The output size, default 14
@@ -38,9 +38,9 @@ class RoIAlign(object):
default 0.0625
sampling_ratio (int): The number of sampling points in the interpolation
grid, default 0
- canconical_level (int): The referring level of FPN layer with
+ canconical_level (int): The referring level of FPN layer with
specified level. default 4
- canonical_size (int): The referring scale of FPN layer with
+ canonical_size (int): The referring scale of FPN layer with
specified scale. default 224
start_level (int): The start level of FPN layer to extract RoI feature,
default 0
@@ -76,33 +76,43 @@ class RoIAlign(object):
def __call__(self, feats, roi, rois_num):
roi = paddle.concat(roi) if len(roi) > 1 else roi[0]
if len(feats) == 1:
- rois_feat = ops.roi_align(
- feats[self.start_level],
- roi,
- self.resolution,
- self.spatial_scale[0],
- rois_num=rois_num,
+ rois_feat = paddle.vision.ops.roi_align(
+ x=feats[self.start_level],
+ boxes=roi,
+ boxes_num=rois_num,
+ output_size=self.resolution,
+ spatial_scale=self.spatial_scale[0],
aligned=self.aligned)
else:
offset = 2
k_min = self.start_level + offset
k_max = self.end_level + offset
- rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals(
- roi,
- k_min,
- k_max,
- self.canconical_level,
- self.canonical_size,
- rois_num=rois_num)
+ if hasattr(paddle.vision.ops, "distribute_fpn_proposals"):
+ rois_dist, restore_index, rois_num_dist = paddle.vision.ops.distribute_fpn_proposals(
+ roi,
+ k_min,
+ k_max,
+ self.canconical_level,
+ self.canonical_size,
+ rois_num=rois_num)
+ else:
+ rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals(
+ roi,
+ k_min,
+ k_max,
+ self.canconical_level,
+ self.canonical_size,
+ rois_num=rois_num)
+
rois_feat_list = []
for lvl in range(self.start_level, self.end_level + 1):
- roi_feat = ops.roi_align(
- feats[lvl],
- rois_dist[lvl],
- self.resolution,
- self.spatial_scale[lvl],
+ roi_feat = paddle.vision.ops.roi_align(
+ x=feats[lvl],
+ boxes=rois_dist[lvl],
+ boxes_num=rois_num_dist[lvl],
+ output_size=self.resolution,
+ spatial_scale=self.spatial_scale[lvl],
sampling_ratio=self.sampling_ratio,
- rois_num=rois_num_dist[lvl],
aligned=self.aligned)
rois_feat_list.append(roi_feat)
rois_feat_shuffle = paddle.concat(rois_feat_list)
diff --git a/paddlers/models/ppdet/modeling/heads/s2anet_head.py b/paddlers/models/ppdet/modeling/heads/s2anet_head.py
index e2e745d..f13af64 100644
--- a/paddlers/models/ppdet/modeling/heads/s2anet_head.py
+++ b/paddlers/models/ppdet/modeling/heads/s2anet_head.py
@@ -20,181 +20,13 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from paddlers.models.ppdet.core.workspace import register
-from paddlers.models.ppdet.modeling import ops
-from paddlers.models.ppdet.modeling import bbox_utils
from paddlers.models.ppdet.modeling.proposal_generator.target_layer import RBoxAssigner
+from paddlers.models.ppdet.modeling.proposal_generator.anchor_generator import S2ANetAnchorGenerator
+from paddlers.models.ppdet.modeling.layers import AlignConv
+from ..cls_utils import _get_class_default_kwargs
import numpy as np
-class S2ANetAnchorGenerator(nn.Layer):
- """
- AnchorGenerator by paddle
- """
-
- def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
- super(S2ANetAnchorGenerator, self).__init__()
- self.base_size = base_size
- self.scales = paddle.to_tensor(scales)
- self.ratios = paddle.to_tensor(ratios)
- self.scale_major = scale_major
- self.ctr = ctr
- self.base_anchors = self.gen_base_anchors()
-
- @property
- def num_base_anchors(self):
- return self.base_anchors.shape[0]
-
- def gen_base_anchors(self):
- w = self.base_size
- h = self.base_size
- if self.ctr is None:
- x_ctr = 0.5 * (w - 1)
- y_ctr = 0.5 * (h - 1)
- else:
- x_ctr, y_ctr = self.ctr
-
- h_ratios = paddle.sqrt(self.ratios)
- w_ratios = 1 / h_ratios
- if self.scale_major:
- ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
- hs = (h * h_ratios[:] * self.scales[:]).reshape([-1])
- else:
- ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
- hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
-
- base_anchors = paddle.stack(
- [
- x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
- x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
- ],
- axis=-1)
- base_anchors = paddle.round(base_anchors)
- return base_anchors
-
- def _meshgrid(self, x, y, row_major=True):
- yy, xx = paddle.meshgrid(y, x)
- yy = yy.reshape([-1])
- xx = xx.reshape([-1])
- if row_major:
- return xx, yy
- else:
- return yy, xx
-
- def forward(self, featmap_size, stride=16):
- # featmap_size*stride project it to original area
-
- feat_h = featmap_size[0]
- feat_w = featmap_size[1]
- shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
- shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
- shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
- shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
-
- all_anchors = self.base_anchors[:, :] + shifts[:, :]
- all_anchors = all_anchors.reshape([feat_h * feat_w, 4])
- return all_anchors
-
- def valid_flags(self, featmap_size, valid_size):
- feat_h, feat_w = featmap_size
- valid_h, valid_w = valid_size
- assert valid_h <= feat_h and valid_w <= feat_w
- valid_x = paddle.zeros([feat_w], dtype='int32')
- valid_y = paddle.zeros([feat_h], dtype='int32')
- valid_x[:valid_w] = 1
- valid_y[:valid_h] = 1
- valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
- valid = valid_xx & valid_yy
- valid = paddle.reshape(valid, [-1, 1])
- valid = paddle.expand(valid, [-1, self.num_base_anchors]).reshape([-1])
- return valid
-
-
-class AlignConv(nn.Layer):
- def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
- super(AlignConv, self).__init__()
- self.kernel_size = kernel_size
- self.align_conv = paddle.vision.ops.DeformConv2D(
- in_channels,
- out_channels,
- kernel_size=self.kernel_size,
- padding=(self.kernel_size - 1) // 2,
- groups=groups,
- weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
- bias_attr=None)
-
- @paddle.no_grad()
- def get_offset(self, anchors, featmap_size, stride):
- """
- Args:
- anchors: [M,5] xc,yc,w,h,angle
- featmap_size: (feat_h, feat_w)
- stride: 8
- Returns:
-
- """
- anchors = paddle.reshape(anchors, [-1, 5]) # (NA,5)
- dtype = anchors.dtype
- feat_h = featmap_size[0]
- feat_w = featmap_size[1]
- pad = (self.kernel_size - 1) // 2
- idx = paddle.arange(-pad, pad + 1, dtype=dtype)
-
- yy, xx = paddle.meshgrid(idx, idx)
- xx = paddle.reshape(xx, [-1])
- yy = paddle.reshape(yy, [-1])
-
- # get sampling locations of default conv
- xc = paddle.arange(0, feat_w, dtype=dtype)
- yc = paddle.arange(0, feat_h, dtype=dtype)
- yc, xc = paddle.meshgrid(yc, xc)
-
- xc = paddle.reshape(xc, [-1, 1])
- yc = paddle.reshape(yc, [-1, 1])
- x_conv = xc + xx
- y_conv = yc + yy
-
- # get sampling locations of anchors
- # x_ctr, y_ctr, w, h, a = np.unbind(anchors, dim=1)
- x_ctr = anchors[:, 0]
- y_ctr = anchors[:, 1]
- w = anchors[:, 2]
- h = anchors[:, 3]
- a = anchors[:, 4]
-
- x_ctr = paddle.reshape(x_ctr, [-1, 1])
- y_ctr = paddle.reshape(y_ctr, [-1, 1])
- w = paddle.reshape(w, [-1, 1])
- h = paddle.reshape(h, [-1, 1])
- a = paddle.reshape(a, [-1, 1])
-
- x_ctr = x_ctr / stride
- y_ctr = y_ctr / stride
- w_s = w / stride
- h_s = h / stride
- cos, sin = paddle.cos(a), paddle.sin(a)
- dw, dh = w_s / self.kernel_size, h_s / self.kernel_size
- x, y = dw * xx, dh * yy
- xr = cos * x - sin * y
- yr = sin * x + cos * y
- x_anchor, y_anchor = xr + x_ctr, yr + y_ctr
- # get offset filed
- offset_x = x_anchor - x_conv
- offset_y = y_anchor - y_conv
- offset = paddle.stack([offset_y, offset_x], axis=-1)
- offset = paddle.reshape(
- offset, [feat_h * feat_w, self.kernel_size * self.kernel_size * 2])
- offset = paddle.transpose(offset, [1, 0])
- offset = paddle.reshape(
- offset,
- [1, self.kernel_size * self.kernel_size * 2, feat_h, feat_w])
- return offset
-
- def forward(self, x, refine_anchors, featmap_size, stride):
- offset = self.get_offset(refine_anchors, featmap_size, stride)
- x = F.relu(self.align_conv(x, offset))
- return x
-
-
@register
class S2ANetHead(nn.Layer):
"""
@@ -215,7 +47,7 @@ class S2ANetHead(nn.Layer):
reg_loss_weight (list): loss weight for regression
"""
__shared__ = ['num_classes']
- __inject__ = ['anchor_assign']
+ __inject__ = ['anchor_assign', 'nms']
def __init__(self,
stacked_convs=2,
@@ -230,10 +62,12 @@ class S2ANetHead(nn.Layer):
align_conv_type='AlignConv',
align_conv_size=3,
use_sigmoid_cls=True,
- anchor_assign=RBoxAssigner().__dict__,
+ anchor_assign=_get_class_default_kwargs(RBoxAssigner),
reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.1],
cls_loss_weight=[1.1, 1.05],
- reg_loss_type='l1'):
+ reg_loss_type='l1',
+ nms_pre=2000,
+ nms='MultiClassNMS'):
super(S2ANetHead, self).__init__()
self.stacked_convs = stacked_convs
self.feat_in = feat_in
@@ -251,7 +85,7 @@ class S2ANetHead(nn.Layer):
self.align_conv_size = align_conv_size
self.use_sigmoid_cls = use_sigmoid_cls
- self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1
+ self.cls_out_channels = num_classes if self.use_sigmoid_cls else num_classes + 1
self.sampling = False
self.anchor_assign = anchor_assign
self.reg_loss_weight = reg_loss_weight
@@ -259,7 +93,13 @@ class S2ANetHead(nn.Layer):
self.alpha = 1.0
self.beta = 1.0
self.reg_loss_type = reg_loss_type
- self.s2anet_head_out = None
+ self.nms_pre = nms_pre
+ self.nms = nms
+ self.fake_bbox = paddle.to_tensor(
+ np.array(
+ [[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
+ dtype='float32'))
+ self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
# anchor
self.anchor_generators = []
@@ -402,64 +242,49 @@ class S2ANetHead(nn.Layer):
weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
bias_attr=ParamAttr(initializer=Constant(0)))
- self.featmap_sizes = []
- self.base_anchors_list = []
- self.refine_anchor_list = []
+ def forward(self, feats, targets=None):
+ fam_reg_list, fam_cls_list = [], []
+ odm_reg_list, odm_cls_list = [], []
+ num_anchors_list, base_anchors_list, refine_anchors_list = [], [], []
- def forward(self, feats):
- fam_reg_branch_list = []
- fam_cls_branch_list = []
+ for i, feat in enumerate(feats):
+ # get shape
+ B = feat.shape[0]
+ H, W = paddle.shape(feat)[2], paddle.shape(feat)[3]
- odm_reg_branch_list = []
- odm_cls_branch_list = []
+ NA = H * W
+ num_anchors_list.append(NA)
- self.featmap_sizes_list = []
- self.base_anchors_list = []
- self.refine_anchor_list = []
-
- for feat_idx in range(len(feats)):
- feat = feats[feat_idx]
fam_cls_feat = self.fam_cls_convs(feat)
-
fam_cls = self.fam_cls(fam_cls_feat)
# [N, CLS, H, W] --> [N, H, W, CLS]
- fam_cls = fam_cls.transpose([0, 2, 3, 1])
- fam_cls_reshape = paddle.reshape(
- fam_cls, [fam_cls.shape[0], -1, self.cls_out_channels])
- fam_cls_branch_list.append(fam_cls_reshape)
+ fam_cls = fam_cls.transpose([0, 2, 3, 1]).reshape(
+ [B, NA, self.cls_out_channels])
+ fam_cls_list.append(fam_cls)
fam_reg_feat = self.fam_reg_convs(feat)
-
fam_reg = self.fam_reg(fam_reg_feat)
# [N, 5, H, W] --> [N, H, W, 5]
- fam_reg = fam_reg.transpose([0, 2, 3, 1])
- fam_reg_reshape = paddle.reshape(fam_reg, [fam_reg.shape[0], -1, 5])
- fam_reg_branch_list.append(fam_reg_reshape)
+ fam_reg = fam_reg.transpose([0, 2, 3, 1]).reshape([B, NA, 5])
+ fam_reg_list.append(fam_reg)
# prepare anchor
- featmap_size = (paddle.shape(feat)[2], paddle.shape(feat)[3])
- self.featmap_sizes_list.append(featmap_size)
- init_anchors = self.anchor_generators[feat_idx](
- featmap_size, self.anchor_strides[feat_idx])
-
- init_anchors = paddle.to_tensor(init_anchors, dtype='float32')
- NA = featmap_size[0] * featmap_size[1]
- init_anchors = paddle.reshape(init_anchors, [NA, 4])
- init_anchors = self.rect2rbox(init_anchors)
- self.base_anchors_list.append(init_anchors)
+ init_anchors = self.anchor_generators[i]((H, W),
+ self.anchor_strides[i])
+ init_anchors = init_anchors.reshape([1, NA, 5])
+ base_anchors_list.append(init_anchors.squeeze(0))
if self.training:
refine_anchor = self.bbox_decode(fam_reg.detach(), init_anchors)
else:
refine_anchor = self.bbox_decode(fam_reg, init_anchors)
- self.refine_anchor_list.append(refine_anchor)
+ refine_anchors_list.append(refine_anchor)
if self.align_conv_type == 'AlignConv':
align_feat = self.align_conv(feat,
- refine_anchor.clone(),
- featmap_size,
- self.anchor_strides[feat_idx])
+ refine_anchor.clone(), (H, W),
+ self.anchor_strides[i])
elif self.align_conv_type == 'DCN':
align_offset = self.align_conv_offset(feat)
align_feat = self.align_conv(feat, align_offset)
@@ -473,39 +298,140 @@ class S2ANetHead(nn.Layer):
odm_reg_feat = self.odm_reg_convs(odm_reg_feat)
odm_cls_feat = self.odm_cls_convs(odm_cls_feat)
- odm_cls_score = self.odm_cls(odm_cls_feat)
+ odm_cls = self.odm_cls(odm_cls_feat)
# [N, CLS, H, W] --> [N, H, W, CLS]
- odm_cls_score = odm_cls_score.transpose([0, 2, 3, 1])
- odm_cls_score_shape = odm_cls_score.shape
- odm_cls_score_reshape = paddle.reshape(odm_cls_score, [
- odm_cls_score_shape[0], odm_cls_score_shape[1] *
- odm_cls_score_shape[2], self.cls_out_channels
+ odm_cls = odm_cls.transpose([0, 2, 3, 1]).reshape(
+ [B, NA, self.cls_out_channels])
+ odm_cls_list.append(odm_cls)
+
+ odm_reg = self.odm_reg(odm_reg_feat)
+ # [N, 5, H, W] --> [N, H, W, 5]
+ odm_reg = odm_reg.transpose([0, 2, 3, 1]).reshape([B, NA, 5])
+ odm_reg_list.append(odm_reg)
+
+ if self.training:
+ return self.get_loss([
+ fam_cls_list, fam_reg_list, odm_cls_list, odm_reg_list,
+ num_anchors_list, base_anchors_list, refine_anchors_list
+ ], targets)
+ else:
+ odm_bboxes_list = []
+ for odm_reg, refine_anchor in zip(odm_reg_list,
+ refine_anchors_list):
+ odm_bboxes = self.bbox_decode(odm_reg, refine_anchor)
+ odm_bboxes_list.append(odm_bboxes)
+ return [odm_bboxes_list, odm_cls_list]
+
+ def get_bboxes(self, head_outs):
+ perd_bboxes_list, pred_scores_list = head_outs
+ batch = paddle.shape(pred_scores_list[0])[0]
+ bboxes, bbox_num = [], []
+ for i in range(batch):
+ pred_scores_per_image = [t[i] for t in pred_scores_list]
+ pred_bboxes_per_image = [t[i] for t in perd_bboxes_list]
+ bbox_per_image, bbox_num_per_image = self.get_bboxes_single(
+ pred_scores_per_image, pred_bboxes_per_image)
+ bboxes.append(bbox_per_image)
+ bbox_num.append(bbox_num_per_image)
+
+ bboxes = paddle.concat(bboxes)
+ bbox_num = paddle.concat(bbox_num)
+ return bboxes, bbox_num
+
+ def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
+ """
+ Rescale, clip and filter the bbox from the output of NMS to
+ get final prediction.
+ Args:
+ bboxes(Tensor): bboxes [N, 10]
+ bbox_num(Tensor): bbox_num
+ im_shape(Tensor): [1 2]
+ scale_factor(Tensor): [1 2]
+ Returns:
+ bbox_pred(Tensor): The output is the prediction with shape [N, 8]
+ including labels, scores and bboxes. The size of
+ bboxes are corresponding to the original image.
+ """
+ origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
+
+ origin_shape_list = []
+ scale_factor_list = []
+ # scale_factor: scale_y, scale_x
+ for i in range(bbox_num.shape[0]):
+ expand_shape = paddle.expand(origin_shape[i:i + 1, :],
+ [bbox_num[i], 2])
+ scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
+ scale = paddle.concat([
+ scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x,
+ scale_y
])
+ expand_scale = paddle.expand(scale, [bbox_num[i], 8])
+ origin_shape_list.append(expand_shape)
+ scale_factor_list.append(expand_scale)
+
+ origin_shape_list = paddle.concat(origin_shape_list)
+ scale_factor_list = paddle.concat(scale_factor_list)
+
+ # bboxes: [N, 10], label, score, bbox
+ pred_label_score = bboxes[:, 0:2]
+ pred_bbox = bboxes[:, 2:]
+
+ # rescale bbox to original image
+ pred_bbox = pred_bbox.reshape([-1, 8])
+ scaled_bbox = pred_bbox / scale_factor_list
+ origin_h = origin_shape_list[:, 0]
+ origin_w = origin_shape_list[:, 1]
+
+ bboxes = scaled_bbox
+ zeros = paddle.zeros_like(origin_h)
+ x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros)
+ y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros)
+ x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros)
+ y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros)
+ x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros)
+ y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros)
+ x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros)
+ y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros)
+ pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1)
+ pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1)
+ return pred_result
+
+ def get_bboxes_single(self, cls_score_list, bbox_pred_list):
+ mlvl_bboxes = []
+ mlvl_scores = []
- odm_cls_branch_list.append(odm_cls_score_reshape)
+ for cls_score, bbox_pred in zip(cls_score_list, bbox_pred_list):
+ if self.use_sigmoid_cls:
+ scores = F.sigmoid(cls_score)
+ else:
+ scores = F.softmax(cls_score, axis=-1)
- odm_bbox_pred = self.odm_reg(odm_reg_feat)
- # [N, 5, H, W] --> [N, H, W, 5]
- odm_bbox_pred = odm_bbox_pred.transpose([0, 2, 3, 1])
- odm_bbox_pred_reshape = paddle.reshape(odm_bbox_pred, [-1, 5])
- odm_bbox_pred_reshape = paddle.unsqueeze(
- odm_bbox_pred_reshape, axis=0)
- odm_reg_branch_list.append(odm_bbox_pred_reshape)
-
- self.s2anet_head_out = (fam_cls_branch_list, fam_reg_branch_list,
- odm_cls_branch_list, odm_reg_branch_list)
- return self.s2anet_head_out
-
- def get_prediction(self, nms_pre=2000):
- refine_anchors = self.refine_anchor_list
- fam_cls_branch_list = self.s2anet_head_out[0]
- fam_reg_branch_list = self.s2anet_head_out[1]
- odm_cls_branch_list = self.s2anet_head_out[2]
- odm_reg_branch_list = self.s2anet_head_out[3]
- pred_scores, pred_bboxes = self.get_bboxes(
- odm_cls_branch_list, odm_reg_branch_list, refine_anchors, nms_pre,
- self.cls_out_channels, self.use_sigmoid_cls)
- return pred_scores, pred_bboxes
+ if scores.shape[0] > self.nms_pre:
+ # Get maximum scores for foreground classes.
+ if self.use_sigmoid_cls:
+ max_scores = paddle.max(scores, axis=1)
+ else:
+ max_scores = paddle.max(scores[:, :-1], axis=1)
+
+ topk_val, topk_inds = paddle.topk(max_scores, self.nms_pre)
+ bbox_pred = paddle.gather(bbox_pred, topk_inds)
+ scores = paddle.gather(scores, topk_inds)
+
+ mlvl_bboxes.append(bbox_pred)
+ mlvl_scores.append(scores)
+
+ mlvl_bboxes = paddle.concat(mlvl_bboxes)
+ mlvl_scores = paddle.concat(mlvl_scores)
+
+ mlvl_polys = self.rbox2poly(mlvl_bboxes).unsqueeze(0)
+ mlvl_scores = paddle.transpose(mlvl_scores, [1, 0]).unsqueeze(0)
+
+ bbox, bbox_num, _ = self.nms(mlvl_polys, mlvl_scores)
+ if bbox.shape[0] <= 0:
+ bbox = self.fake_bbox
+ bbox_num = self.fake_bbox_num
+
+ return bbox, bbox_num
def smooth_l1_loss(self, pred, label, delta=1.0 / 9.0):
"""
@@ -522,10 +448,10 @@ class S2ANetHead(nn.Layer):
diff - 0.5 * delta)
return loss
- def get_fam_loss(self, fam_target, s2anet_head_out, reg_loss_type='gwd'):
+ def get_fam_loss(self, fam_target, s2anet_head_out, reg_loss_type='l1'):
(labels, label_weights, bbox_targets, bbox_weights, bbox_gt_bboxes,
pos_inds, neg_inds) = fam_target
- fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = s2anet_head_out
+ fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list, num_anchors_list = s2anet_head_out
fam_cls_losses = []
fam_bbox_losses = []
@@ -534,9 +460,7 @@ class S2ANetHead(nn.Layer):
neg_inds) if self.sampling else len(pos_inds)
num_total_samples = max(1, num_total_samples)
- for idx, feat_size in enumerate(self.featmap_sizes_list):
- feat_anchor_num = feat_size[0] * feat_size[1]
-
+ for idx, feat_anchor_num in enumerate(num_anchors_list):
# step1: get data
feat_labels = labels[st_idx:st_idx + feat_anchor_num]
feat_label_weights = label_weights[st_idx:st_idx + feat_anchor_num]
@@ -593,39 +517,8 @@ class S2ANetHead(nn.Layer):
feat_bbox_weights = paddle.to_tensor(
feat_bbox_weights, stop_gradient=True)
- if reg_loss_type == 'l1':
- fam_bbox = fam_bbox * feat_bbox_weights
- fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
- elif reg_loss_type == 'iou' or reg_loss_type == 'gwd':
- fam_bbox = paddle.sum(fam_bbox, axis=-1)
- feat_bbox_weights = paddle.sum(feat_bbox_weights, axis=-1)
- try:
- from rbox_iou_ops import rbox_iou
- except Exception as e:
- print("import custom_ops error, try install rbox_iou_ops " \
- "following ppdet/ext_op/README.md", e)
- sys.stdout.flush()
- sys.exit(-1)
- # calc iou
- fam_bbox_decode = self.delta2rbox(self.base_anchors_list[idx],
- fam_bbox_pred)
- bbox_gt_bboxes = paddle.to_tensor(
- bbox_gt_bboxes,
- dtype=fam_bbox_decode.dtype,
- place=fam_bbox_decode.place)
- bbox_gt_bboxes.stop_gradient = True
- iou = rbox_iou(fam_bbox_decode, bbox_gt_bboxes)
- iou = paddle.diag(iou)
-
- if reg_loss_type == 'gwd':
- bbox_gt_bboxes_level = bbox_gt_bboxes[st_idx:st_idx +
- feat_anchor_num, :]
- fam_bbox_total = self.gwd_loss(fam_bbox_decode,
- bbox_gt_bboxes_level)
- fam_bbox_total = fam_bbox_total * feat_bbox_weights
- fam_bbox_total = paddle.sum(
- fam_bbox_total) / num_total_samples
-
+ fam_bbox = fam_bbox * feat_bbox_weights
+ fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
fam_bbox_losses.append(fam_bbox_total)
st_idx += feat_anchor_num
@@ -636,10 +529,10 @@ class S2ANetHead(nn.Layer):
fam_reg_loss = paddle.add_n(fam_bbox_losses)
return fam_cls_loss, fam_reg_loss
- def get_odm_loss(self, odm_target, s2anet_head_out, reg_loss_type='gwd'):
+ def get_odm_loss(self, odm_target, s2anet_head_out, reg_loss_type='l1'):
(labels, label_weights, bbox_targets, bbox_weights, bbox_gt_bboxes,
pos_inds, neg_inds) = odm_target
- fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = s2anet_head_out
+ fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list, num_anchors_list = s2anet_head_out
odm_cls_losses = []
odm_bbox_losses = []
@@ -648,9 +541,7 @@ class S2ANetHead(nn.Layer):
neg_inds) if self.sampling else len(pos_inds)
num_total_samples = max(1, num_total_samples)
- for idx, feat_size in enumerate(self.featmap_sizes_list):
- feat_anchor_num = feat_size[0] * feat_size[1]
-
+ for idx, feat_anchor_num in enumerate(num_anchors_list):
# step1: get data
feat_labels = labels[st_idx:st_idx + feat_anchor_num]
feat_label_weights = label_weights[st_idx:st_idx + feat_anchor_num]
@@ -708,38 +599,8 @@ class S2ANetHead(nn.Layer):
feat_bbox_weights = paddle.to_tensor(
feat_bbox_weights, stop_gradient=True)
- if reg_loss_type == 'l1':
- odm_bbox = odm_bbox * feat_bbox_weights
- odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
- elif reg_loss_type == 'iou' or reg_loss_type == 'gwd':
- odm_bbox = paddle.sum(odm_bbox, axis=-1)
- feat_bbox_weights = paddle.sum(feat_bbox_weights, axis=-1)
- try:
- from rbox_iou_ops import rbox_iou
- except Exception as e:
- print("import custom_ops error, try install rbox_iou_ops " \
- "following ppdet/ext_op/README.md", e)
- sys.stdout.flush()
- sys.exit(-1)
- # calc iou
- odm_bbox_decode = self.delta2rbox(self.refine_anchor_list[idx],
- odm_bbox_pred)
- bbox_gt_bboxes = paddle.to_tensor(
- bbox_gt_bboxes,
- dtype=odm_bbox_decode.dtype,
- place=odm_bbox_decode.place)
- bbox_gt_bboxes.stop_gradient = True
- iou = rbox_iou(odm_bbox_decode, bbox_gt_bboxes)
- iou = paddle.diag(iou)
-
- if reg_loss_type == 'gwd':
- bbox_gt_bboxes_level = bbox_gt_bboxes[st_idx:st_idx +
- feat_anchor_num, :]
- odm_bbox_total = self.gwd_loss(odm_bbox_decode,
- bbox_gt_bboxes_level)
- odm_bbox_total = odm_bbox_total * feat_bbox_weights
- odm_bbox_total = paddle.sum(
- odm_bbox_total) / num_total_samples
+ odm_bbox = odm_bbox * feat_bbox_weights
+ odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
odm_bbox_losses.append(odm_bbox_total)
st_idx += feat_anchor_num
@@ -751,8 +612,9 @@ class S2ANetHead(nn.Layer):
odm_reg_loss = paddle.add_n(odm_bbox_losses)
return odm_cls_loss, odm_reg_loss
- def get_loss(self, inputs):
- # inputs: im_id image im_shape scale_factor gt_bbox gt_class is_crowd
+ def get_loss(self, head_outs, inputs):
+ fam_cls_list, fam_reg_list, odm_cls_list, odm_reg_list, \
+ num_anchors_list, base_anchors_list, refine_anchors_list = head_outs
# compute loss
fam_cls_loss_lst = []
@@ -760,29 +622,27 @@ class S2ANetHead(nn.Layer):
odm_cls_loss_lst = []
odm_reg_loss_lst = []
- im_shape = inputs['im_shape']
- for im_id in range(im_shape.shape[0]):
- np_im_shape = inputs['im_shape'][im_id].numpy()
- np_scale_factor = inputs['scale_factor'][im_id].numpy()
+ batch = len(inputs['gt_rbox'])
+ for i in range(batch):
# data_format: (xc, yc, w, h, theta)
- gt_bboxes = inputs['gt_rbox'][im_id].numpy()
- gt_labels = inputs['gt_class'][im_id].numpy()
- is_crowd = inputs['is_crowd'][im_id].numpy()
+ gt_mask = inputs['pad_gt_mask'][i, :, 0]
+ gt_idx = paddle.nonzero(gt_mask).squeeze(-1)
+ gt_bboxes = paddle.gather(inputs['gt_rbox'][i], gt_idx).numpy()
+ gt_labels = paddle.gather(inputs['gt_class'][i], gt_idx).numpy()
+ is_crowd = paddle.gather(inputs['is_crowd'][i], gt_idx).numpy()
gt_labels = gt_labels + 1
- # featmap_sizes
- anchors_list_all = np.concatenate(self.base_anchors_list)
-
- # get im_feat
- fam_cls_feats_list = [e[im_id] for e in self.s2anet_head_out[0]]
- fam_reg_feats_list = [e[im_id] for e in self.s2anet_head_out[1]]
- odm_cls_feats_list = [e[im_id] for e in self.s2anet_head_out[2]]
- odm_reg_feats_list = [e[im_id] for e in self.s2anet_head_out[3]]
- im_s2anet_head_out = (fam_cls_feats_list, fam_reg_feats_list,
- odm_cls_feats_list, odm_reg_feats_list)
+ anchors_per_image = np.concatenate(base_anchors_list)
+ fam_cls_per_image = [t[i] for t in fam_cls_list]
+ fam_reg_per_image = [t[i] for t in fam_reg_list]
+ odm_cls_per_image = [t[i] for t in odm_cls_list]
+ odm_reg_per_image = [t[i] for t in odm_reg_list]
+ im_s2anet_head_out = (fam_cls_per_image, fam_reg_per_image,
+ odm_cls_per_image, odm_reg_per_image,
+ num_anchors_list)
# FAM
- im_fam_target = self.anchor_assign(anchors_list_all, gt_bboxes,
+ im_fam_target = self.anchor_assign(anchors_per_image, gt_bboxes,
gt_labels, is_crowd)
if im_fam_target is not None:
im_fam_cls_loss, im_fam_reg_loss = self.get_fam_loss(
@@ -791,11 +651,10 @@ class S2ANetHead(nn.Layer):
fam_reg_loss_lst.append(im_fam_reg_loss)
# ODM
- np_refine_anchors_list = paddle.concat(
- self.refine_anchor_list).numpy()
- np_refine_anchors_list = np.concatenate(np_refine_anchors_list)
- np_refine_anchors_list = np_refine_anchors_list.reshape(-1, 5)
- im_odm_target = self.anchor_assign(np_refine_anchors_list,
+ refine_anchors_per_image = [t[i] for t in refine_anchors_list]
+ refine_anchors_per_image = paddle.concat(
+ refine_anchors_per_image).numpy()
+ im_odm_target = self.anchor_assign(refine_anchors_per_image,
gt_bboxes, gt_labels, is_crowd)
if im_odm_target is not None:
@@ -803,116 +662,38 @@ class S2ANetHead(nn.Layer):
im_odm_target, im_s2anet_head_out, self.reg_loss_type)
odm_cls_loss_lst.append(im_odm_cls_loss)
odm_reg_loss_lst.append(im_odm_reg_loss)
- fam_cls_loss = paddle.add_n(fam_cls_loss_lst)
- fam_reg_loss = paddle.add_n(fam_reg_loss_lst)
- odm_cls_loss = paddle.add_n(odm_cls_loss_lst)
- odm_reg_loss = paddle.add_n(odm_reg_loss_lst)
+
+ fam_cls_loss = paddle.add_n(fam_cls_loss_lst) / batch
+ fam_reg_loss = paddle.add_n(fam_reg_loss_lst) / batch
+ odm_cls_loss = paddle.add_n(odm_cls_loss_lst) / batch
+ odm_reg_loss = paddle.add_n(odm_reg_loss_lst) / batch
+ loss = fam_cls_loss + fam_reg_loss + odm_cls_loss + odm_reg_loss
+
return {
+ 'loss': loss,
'fam_cls_loss': fam_cls_loss,
'fam_reg_loss': fam_reg_loss,
'odm_cls_loss': odm_cls_loss,
'odm_reg_loss': odm_reg_loss
}
- def get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, nms_pre,
- cls_out_channels, use_sigmoid_cls):
- assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
-
- mlvl_bboxes = []
- mlvl_scores = []
-
- idx = 0
- for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list,
- mlvl_anchors):
- cls_score = paddle.reshape(cls_score, [-1, cls_out_channels])
- if use_sigmoid_cls:
- scores = F.sigmoid(cls_score)
- else:
- scores = F.softmax(cls_score, axis=-1)
-
- # bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
- bbox_pred = paddle.transpose(bbox_pred, [1, 2, 0])
- bbox_pred = paddle.reshape(bbox_pred, [-1, 5])
- anchors = paddle.reshape(anchors, [-1, 5])
-
- if scores.shape[0] > nms_pre:
- # Get maximum scores for foreground classes.
- if use_sigmoid_cls:
- max_scores = paddle.max(scores, axis=1)
- else:
- max_scores = paddle.max(scores[:, 1:], axis=1)
-
- topk_val, topk_inds = paddle.topk(max_scores, nms_pre)
- anchors = paddle.gather(anchors, topk_inds)
- bbox_pred = paddle.gather(bbox_pred, topk_inds)
- scores = paddle.gather(scores, topk_inds)
-
- bbox_delta = paddle.reshape(bbox_pred, [-1, 5])
- bboxes = self.delta2rbox(anchors, bbox_delta)
- mlvl_bboxes.append(bboxes)
- mlvl_scores.append(scores)
-
- idx += 1
-
- mlvl_bboxes = paddle.concat(mlvl_bboxes, axis=0)
- mlvl_scores = paddle.concat(mlvl_scores)
-
- return mlvl_scores, mlvl_bboxes
-
- def rect2rbox(self, bboxes):
- """
- :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
- :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
- """
- bboxes = paddle.reshape(bboxes, [-1, 4])
- num_boxes = paddle.shape(bboxes)[0]
- x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
- y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
- edges1 = paddle.abs(bboxes[:, 2] - bboxes[:, 0])
- edges2 = paddle.abs(bboxes[:, 3] - bboxes[:, 1])
-
- rbox_w = paddle.maximum(edges1, edges2)
- rbox_h = paddle.minimum(edges1, edges2)
-
- # set angle
- inds = edges1 < edges2
- inds = paddle.cast(inds, 'int32')
- rboxes_angle = inds * np.pi / 2.0
-
- rboxes = paddle.stack(
- (x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=1)
- return rboxes
-
- # deltas to rbox
- def delta2rbox(self, rrois, deltas, wh_ratio_clip=1e-6):
- """
- :param rrois: (cx, cy, w, h, theta)
- :param deltas: (dx, dy, dw, dh, dtheta)
- :param means: means of anchor
- :param stds: stds of anchor
- :param wh_ratio_clip: clip threshold of wh_ratio
- :return:
+ def bbox_decode(self, preds, anchors, wh_ratio_clip=1e-6):
+ """decode bbox from deltas
+ Args:
+ preds: [B, L, 5]
+ anchors: [1, L, 5]
+ return:
+ bboxes: [B, L, 5]
"""
- deltas = paddle.reshape(deltas, [-1, 5])
- rrois = paddle.reshape(rrois, [-1, 5])
- # fix dy2st bug denorm_deltas = deltas * self.stds + self.means
- denorm_deltas = paddle.add(
- paddle.multiply(deltas, self.stds), self.means)
-
- dx = denorm_deltas[:, 0]
- dy = denorm_deltas[:, 1]
- dw = denorm_deltas[:, 2]
- dh = denorm_deltas[:, 3]
- dangle = denorm_deltas[:, 4]
+ preds = paddle.add(paddle.multiply(preds, self.stds), self.means)
+
+ dx, dy, dw, dh, dangle = paddle.split(preds, 5, axis=-1)
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
- rroi_x = rrois[:, 0]
- rroi_y = rrois[:, 1]
- rroi_w = rrois[:, 2]
- rroi_h = rrois[:, 3]
- rroi_angle = rrois[:, 4]
+ rroi_x, rroi_y, rroi_w, rroi_h, rroi_angle = paddle.split(
+ anchors, 5, axis=-1)
gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
rroi_angle) + rroi_x
@@ -922,127 +703,43 @@ class S2ANetHead(nn.Layer):
gh = rroi_h * dh.exp()
ga = np.pi * dangle + rroi_angle
ga = (ga + np.pi / 4) % np.pi - np.pi / 4
- ga = paddle.to_tensor(ga)
- gw = paddle.to_tensor(gw, dtype='float32')
- gh = paddle.to_tensor(gh, dtype='float32')
- bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
+ bboxes = paddle.concat([gx, gy, gw, gh, ga], axis=-1)
return bboxes
- def bbox_decode(self, bbox_preds, anchors):
- """decode bbox from deltas
- Args:
- bbox_preds: [N,H,W,5]
- anchors: [H*W,5]
- return:
- bboxes: [N,H,W,5]
+ def rbox2poly(self, rboxes):
"""
- num_imgs, H, W, _ = bbox_preds.shape
- bbox_delta = paddle.reshape(bbox_preds, [-1, 5])
- bboxes = self.delta2rbox(anchors, bbox_delta)
- return bboxes
-
- def trace(self, A):
- tr = paddle.diagonal(A, axis1=-2, axis2=-1)
- tr = paddle.sum(tr, axis=-1)
- return tr
-
- def sqrt_newton_schulz_autograd(self, A, numIters):
- A_shape = A.shape
- batchSize = A_shape[0]
- dim = A_shape[1]
-
- normA = A * A
- normA = paddle.sum(normA, axis=1)
- normA = paddle.sum(normA, axis=1)
- normA = paddle.sqrt(normA)
- normA1 = normA.reshape([batchSize, 1, 1])
- Y = paddle.divide(A, paddle.expand_as(normA1, A))
- I = paddle.eye(dim, dim).reshape([1, dim, dim])
- l0 = []
- for i in range(batchSize):
- l0.append(I)
- I = paddle.concat(l0, axis=0)
- I.stop_gradient = False
- Z = paddle.eye(dim, dim).reshape([1, dim, dim])
- l1 = []
- for i in range(batchSize):
- l1.append(Z)
- Z = paddle.concat(l1, axis=0)
- Z.stop_gradient = False
-
- for i in range(numIters):
- T = 0.5 * (3.0 * I - Z.bmm(Y))
- Y = Y.bmm(T)
- Z = T.bmm(Z)
- sA = Y * paddle.sqrt(normA1).reshape([batchSize, 1, 1])
- sA = paddle.expand_as(sA, A)
- return sA
-
- def wasserstein_distance_sigma(sigma1, sigma2):
- wasserstein_distance_item2 = paddle.matmul(
- sigma1, sigma1) + paddle.matmul(
- sigma2, sigma2) - 2 * self.sqrt_newton_schulz_autograd(
- paddle.matmul(
- paddle.matmul(sigma1, paddle.matmul(sigma2, sigma2)),
- sigma1), 10)
- wasserstein_distance_item2 = self.trace(wasserstein_distance_item2)
-
- return wasserstein_distance_item2
-
- def xywhr2xyrs(self, xywhr):
- xywhr = paddle.reshape(xywhr, [-1, 5])
- xy = xywhr[:, :2]
- wh = paddle.clip(xywhr[:, 2:4], min=1e-7, max=1e7)
- r = xywhr[:, 4]
- cos_r = paddle.cos(r)
- sin_r = paddle.sin(r)
- R = paddle.stack(
- (cos_r, -sin_r, sin_r, cos_r), axis=-1).reshape([-1, 2, 2])
- S = 0.5 * paddle.nn.functional.diag_embed(wh)
- return xy, R, S
-
- def gwd_loss(self,
- pred,
- target,
- fun='log',
- tau=1.0,
- alpha=1.0,
- normalize=False):
-
- xy_p, R_p, S_p = self.xywhr2xyrs(pred)
- xy_t, R_t, S_t = self.xywhr2xyrs(target)
-
- xy_distance = (xy_p - xy_t).square().sum(axis=-1)
-
- Sigma_p = R_p.matmul(S_p.square()).matmul(R_p.transpose([0, 2, 1]))
- Sigma_t = R_t.matmul(S_t.square()).matmul(R_t.transpose([0, 2, 1]))
-
- whr_distance = paddle.diagonal(
- S_p, axis1=-2, axis2=-1).square().sum(axis=-1)
-
- whr_distance = whr_distance + paddle.diagonal(
- S_t, axis1=-2, axis2=-1).square().sum(axis=-1)
- _t = Sigma_p.matmul(Sigma_t)
-
- _t_tr = paddle.diagonal(_t, axis1=-2, axis2=-1).sum(axis=-1)
- _t_det_sqrt = paddle.diagonal(S_p, axis1=-2, axis2=-1).prod(axis=-1)
- _t_det_sqrt = _t_det_sqrt * paddle.diagonal(
- S_t, axis1=-2, axis2=-1).prod(axis=-1)
- whr_distance = whr_distance + (-2) * (
- (_t_tr + 2 * _t_det_sqrt).clip(0).sqrt())
-
- distance = (xy_distance + alpha * alpha * whr_distance).clip(0)
-
- if normalize:
- wh_p = pred[..., 2:4].clip(min=1e-7, max=1e7)
- wh_t = target[..., 2:4].clip(min=1e-7, max=1e7)
- scale = ((wh_p.log() + wh_t.log()).sum(dim=-1) / 4).exp()
- distance = distance / scale
-
- if fun == 'log':
- distance = paddle.log1p(distance)
-
- if tau >= 1.0:
- return 1 - 1 / (tau + distance)
-
- return distance
+ rboxes: [x_ctr,y_ctr,w,h,angle]
+ to
+ polys: [x0,y0,x1,y1,x2,y2,x3,y3]
+ """
+ N = paddle.shape(rboxes)[0]
+
+ x_ctr = rboxes[:, 0]
+ y_ctr = rboxes[:, 1]
+ width = rboxes[:, 2]
+ height = rboxes[:, 3]
+ angle = rboxes[:, 4]
+
+ tl_x, tl_y, br_x, br_y = -width * 0.5, -height * 0.5, width * 0.5, height * 0.5
+
+ normal_rects = paddle.stack(
+ [tl_x, br_x, br_x, tl_x, tl_y, tl_y, br_y, br_y], axis=0)
+ normal_rects = paddle.reshape(normal_rects, [2, 4, N])
+ normal_rects = paddle.transpose(normal_rects, [2, 0, 1])
+
+ sin, cos = paddle.sin(angle), paddle.cos(angle)
+ # M: [N,2,2]
+ M = paddle.stack([cos, -sin, sin, cos], axis=0)
+ M = paddle.reshape(M, [2, 2, N])
+ M = paddle.transpose(M, [2, 0, 1])
+
+ # polys: [N,8]
+ polys = paddle.matmul(M, normal_rects)
+ polys = paddle.transpose(polys, [2, 1, 0])
+ polys = paddle.reshape(polys, [-1, N])
+ polys = paddle.transpose(polys, [1, 0])
+
+ tmp = paddle.stack(
+ [x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
+ polys = polys + tmp
+ return polys
diff --git a/paddlers/models/ppdet/modeling/heads/simota_head.py b/paddlers/models/ppdet/modeling/heads/simota_head.py
index 62be2b6..7467747 100644
--- a/paddlers/models/ppdet/modeling/heads/simota_head.py
+++ b/paddlers/models/ppdet/modeling/heads/simota_head.py
@@ -132,8 +132,8 @@ class OTAHead(GFLHead):
yy, xx = self.get_single_level_center_point(featmap_size, stride,
self.cell_offset)
- center_and_stride = paddle.stack([xx, yy, stride, stride],
- -1).tile([num_imgs, 1, 1])
+ center_and_stride = paddle.stack([xx, yy, stride, stride], -1).tile(
+ [num_imgs, 1, 1])
center_and_strides.append(center_and_stride)
center_in_feature = center_and_stride.reshape(
[-1, 4])[:, :-2] / stride
@@ -179,8 +179,8 @@ class OTAHead(GFLHead):
num_level_anchors)
num_total_pos = sum(pos_num_l)
try:
- num_total_pos = paddle.distributed.all_reduce(num_total_pos.clone(
- )) / paddle.distributed.get_world_size()
+ paddle.distributed.all_reduce(num_total_pos)
+ num_total_pos = num_total_pos / paddle.distributed.get_world_size()
except:
num_total_pos = max(num_total_pos, 1)
@@ -255,7 +255,7 @@ class OTAHead(GFLHead):
avg_factor = sum(avg_factor)
try:
- avg_factor = paddle.distributed.all_reduce(avg_factor.clone())
+ paddle.distributed.all_reduce(avg_factor)
avg_factor = paddle.clip(
avg_factor / paddle.distributed.get_world_size(), min=1)
except:
@@ -396,8 +396,8 @@ class OTAVFLHead(OTAHead):
num_level_anchors)
num_total_pos = sum(pos_num_l)
try:
- num_total_pos = paddle.distributed.all_reduce(num_total_pos.clone(
- )) / paddle.distributed.get_world_size()
+ paddle.distributed.all_reduce(num_total_pos)
+ num_total_pos = num_total_pos / paddle.distributed.get_world_size()
except:
num_total_pos = max(num_total_pos, 1)
@@ -475,7 +475,7 @@ class OTAVFLHead(OTAHead):
avg_factor = sum(avg_factor)
try:
- avg_factor = paddle.distributed.all_reduce(avg_factor.clone())
+ paddle.distributed.all_reduce(avg_factor)
avg_factor = paddle.clip(
avg_factor / paddle.distributed.get_world_size(), min=1)
except:
diff --git a/paddlers/models/ppdet/modeling/heads/ssd_head.py b/paddlers/models/ppdet/modeling/heads/ssd_head.py
index 060e4c3..9f4b50f 100644
--- a/paddlers/models/ppdet/modeling/heads/ssd_head.py
+++ b/paddlers/models/ppdet/modeling/heads/ssd_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -20,6 +20,7 @@ from paddle.regularizer import L2Decay
from paddle import ParamAttr
from ..layers import AnchorGeneratorSSD
+from ..cls_utils import _get_class_default_kwargs
class SepConvLayer(nn.Layer):
@@ -113,7 +114,7 @@ class SSDHead(nn.Layer):
def __init__(self,
num_classes=80,
in_channels=(512, 1024, 512, 256, 256, 256),
- anchor_generator=AnchorGeneratorSSD().__dict__,
+ anchor_generator=_get_class_default_kwargs(AnchorGeneratorSSD),
kernel_size=3,
padding=1,
use_sepconv=False,
diff --git a/paddlers/models/ppdet/modeling/heads/tood_head.py b/paddlers/models/ppdet/modeling/heads/tood_head.py
index b479ba7..0a49cd8 100644
--- a/paddlers/models/ppdet/modeling/heads/tood_head.py
+++ b/paddlers/models/ppdet/modeling/heads/tood_head.py
@@ -218,13 +218,17 @@ class TOODHead(nn.Layer):
assert len(feats) == len(self.fpn_strides), \
"The size of feats is not equal to size of fpn_strides"
- anchors, num_anchors_list, stride_tensor_list = generate_anchors_for_grid_cell(
+ anchors, anchor_points, num_anchors_list, stride_tensor =\
+ generate_anchors_for_grid_cell(
feats, self.fpn_strides, self.grid_cell_scale,
self.grid_cell_offset)
+ anchor_centers_split = paddle.split(anchor_points / stride_tensor,
+ num_anchors_list)
cls_score_list, bbox_pred_list = [], []
- for feat, scale_reg, anchor, stride in zip(feats, self.scales_regs,
- anchors, self.fpn_strides):
+ for feat, scale_reg, anchor_centers, stride in zip(
+ feats, self.scales_regs, anchor_centers_split,
+ self.fpn_strides):
b, _, h, w = get_static_shape(feat)
inter_feats = []
for inter_conv in self.inter_convs:
@@ -250,8 +254,8 @@ class TOODHead(nn.Layer):
# reg prediction and alignment
reg_dist = scale_reg(self.tood_reg(reg_feat).exp())
reg_dist = reg_dist.flatten(2).transpose([0, 2, 1])
- anchor_centers = bbox_center(anchor).unsqueeze(0) / stride
- reg_bbox = batch_distance2bbox(anchor_centers, reg_dist)
+ reg_bbox = batch_distance2bbox(
+ anchor_centers.unsqueeze(0), reg_dist)
if self.use_align_head:
reg_offset = F.relu(self.reg_offset_conv1(feat))
reg_offset = self.reg_offset_conv2(reg_offset)
@@ -268,12 +272,8 @@ class TOODHead(nn.Layer):
bbox_pred_list.append(bbox_pred)
cls_score_list = paddle.concat(cls_score_list, axis=1)
bbox_pred_list = paddle.concat(bbox_pred_list, axis=1)
- anchors = paddle.concat(anchors)
- anchors.stop_gradient = True
- stride_tensor_list = paddle.concat(stride_tensor_list).unsqueeze(0)
- stride_tensor_list.stop_gradient = True
- return cls_score_list, bbox_pred_list, anchors, num_anchors_list, stride_tensor_list
+ return cls_score_list, bbox_pred_list, anchors, num_anchors_list, stride_tensor
@staticmethod
def _focal_loss(score, label, alpha=0.25, gamma=2.0):
@@ -286,9 +286,11 @@ class TOODHead(nn.Layer):
return loss
def get_loss(self, head_outs, gt_meta):
- pred_scores, pred_bboxes, anchors, num_anchors_list, stride_tensor_list = head_outs
+ pred_scores, pred_bboxes, anchors, \
+ num_anchors_list, stride_tensor = head_outs
gt_labels = gt_meta['gt_class']
gt_bboxes = gt_meta['gt_bbox']
+ pad_gt_mask = gt_meta['pad_gt_mask']
# label assignment
if gt_meta['epoch_id'] < self.static_assigner_epoch:
assigned_labels, assigned_bboxes, assigned_scores = self.static_assigner(
@@ -296,20 +298,23 @@ class TOODHead(nn.Layer):
num_anchors_list,
gt_labels,
gt_bboxes,
+ pad_gt_mask,
bg_index=self.num_classes)
alpha_l = 0.25
else:
assigned_labels, assigned_bboxes, assigned_scores = self.assigner(
pred_scores.detach(),
- pred_bboxes.detach() * stride_tensor_list,
+ pred_bboxes.detach() * stride_tensor,
bbox_center(anchors),
+ num_anchors_list,
gt_labels,
gt_bboxes,
+ pad_gt_mask,
bg_index=self.num_classes)
alpha_l = -1
# rescale bbox
- assigned_bboxes /= stride_tensor_list
+ assigned_bboxes /= stride_tensor
# classification loss
loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha=alpha_l)
# select positive samples mask
diff --git a/paddlers/models/ppdet/modeling/heads/ttf_head.py b/paddlers/models/ppdet/modeling/heads/ttf_head.py
index 9b3fac2..928374c 100644
--- a/paddlers/models/ppdet/modeling/heads/ttf_head.py
+++ b/paddlers/models/ppdet/modeling/heads/ttf_head.py
@@ -31,7 +31,7 @@ class HMHead(nn.Layer):
ch_out (int): The channel number of output Tensor.
num_classes (int): Number of classes.
conv_num (int): The convolution number of hm_feat.
- dcn_head(bool): whether use dcn in head. False by default.
+ dcn_head(bool): whether use dcn in head. False by default.
lite_head(bool): whether use lite version. False by default.
norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
bn by default
diff --git a/paddlers/models/ppdet/modeling/heads/yolo_head.py b/paddlers/models/ppdet/modeling/heads/yolo_head.py
index 1aa4892..dc884a3 100644
--- a/paddlers/models/ppdet/modeling/heads/yolo_head.py
+++ b/paddlers/models/ppdet/modeling/heads/yolo_head.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
@@ -5,6 +19,17 @@ from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddlers.models.ppdet.core.workspace import register
+import math
+import numpy as np
+from ..initializer import bias_init_with_prob, constant_
+from ..backbones.csp_darknet import BaseConv, DWConv
+from ..losses import IouLoss
+from paddlers.models.ppdet.modeling.assigners.simota_assigner import SimOTAAssigner
+from paddlers.models.ppdet.modeling.bbox_utils import bbox_overlaps
+from paddlers.models.ppdet.modeling.layers import MultiClassNMS
+
+__all__ = ['YOLOv3Head', 'YOLOXHead']
+
def _de_sigmoid(x, eps=1e-7):
x = paddle.clip(x, eps, 1. / eps)
@@ -122,3 +147,270 @@ class YOLOv3Head(nn.Layer):
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }
+
+
+@register
+class YOLOXHead(nn.Layer):
+ __shared__ = ['num_classes', 'width_mult', 'act', 'trt', 'exclude_nms']
+ __inject__ = ['assigner', 'nms']
+
+ def __init__(self,
+ num_classes=80,
+ width_mult=1.0,
+ depthwise=False,
+ in_channels=[256, 512, 1024],
+ feat_channels=256,
+ fpn_strides=(8, 16, 32),
+ l1_epoch=285,
+ act='silu',
+ assigner=SimOTAAssigner(use_vfl=False),
+ nms='MultiClassNMS',
+ loss_weight={
+ 'cls': 1.0,
+ 'obj': 1.0,
+ 'iou': 5.0,
+ 'l1': 1.0,
+ },
+ trt=False,
+ exclude_nms=False):
+ super(YOLOXHead, self).__init__()
+ self._dtype = paddle.framework.get_default_dtype()
+ self.num_classes = num_classes
+ assert len(in_channels) > 0, "in_channels length should > 0"
+ self.in_channels = in_channels
+ feat_channels = int(feat_channels * width_mult)
+ self.fpn_strides = fpn_strides
+ self.l1_epoch = l1_epoch
+ self.assigner = assigner
+ self.nms = nms
+ if isinstance(self.nms, MultiClassNMS) and trt:
+ self.nms.trt = trt
+ self.exclude_nms = exclude_nms
+ self.loss_weight = loss_weight
+ self.iou_loss = IouLoss(loss_weight=1.0) # default loss_weight 2.5
+
+ ConvBlock = DWConv if depthwise else BaseConv
+
+ self.stem_conv = nn.LayerList()
+ self.conv_cls = nn.LayerList()
+ self.conv_reg = nn.LayerList() # reg [x,y,w,h] + obj
+ for in_c in self.in_channels:
+ self.stem_conv.append(BaseConv(in_c, feat_channels, 1, 1, act=act))
+
+ self.conv_cls.append(
+ nn.Sequential(*[
+ ConvBlock(
+ feat_channels, feat_channels, 3, 1, act=act), ConvBlock(
+ feat_channels, feat_channels, 3, 1, act=act),
+ nn.Conv2D(
+ feat_channels,
+ self.num_classes,
+ 1,
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ ]))
+
+ self.conv_reg.append(
+ nn.Sequential(*[
+ ConvBlock(
+ feat_channels, feat_channels, 3, 1, act=act),
+ ConvBlock(
+ feat_channels, feat_channels, 3, 1, act=act),
+ nn.Conv2D(
+ feat_channels,
+ 4 + 1, # reg [x,y,w,h] + obj
+ 1,
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ ]))
+
+ self._init_weights()
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
+
+ def _init_weights(self):
+ bias_cls = bias_init_with_prob(0.01)
+ bias_reg = paddle.full([5], math.log(5.), dtype=self._dtype)
+ bias_reg[:2] = 0.
+ bias_reg[-1] = bias_cls
+ for cls_, reg_ in zip(self.conv_cls, self.conv_reg):
+ constant_(cls_[-1].weight)
+ constant_(cls_[-1].bias, bias_cls)
+ constant_(reg_[-1].weight)
+ reg_[-1].bias.set_value(bias_reg)
+
+ def _generate_anchor_point(self, feat_sizes, strides, offset=0.):
+ anchor_points, stride_tensor = [], []
+ num_anchors_list = []
+ for feat_size, stride in zip(feat_sizes, strides):
+ h, w = feat_size
+ x = (paddle.arange(w) + offset) * stride
+ y = (paddle.arange(h) + offset) * stride
+ y, x = paddle.meshgrid(y, x)
+ anchor_points.append(paddle.stack([x, y], axis=-1).reshape([-1, 2]))
+ stride_tensor.append(
+ paddle.full(
+ [len(anchor_points[-1]), 1], stride, dtype=self._dtype))
+ num_anchors_list.append(len(anchor_points[-1]))
+ anchor_points = paddle.concat(anchor_points).astype(self._dtype)
+ anchor_points.stop_gradient = True
+ stride_tensor = paddle.concat(stride_tensor)
+ stride_tensor.stop_gradient = True
+ return anchor_points, stride_tensor, num_anchors_list
+
+ def forward(self, feats, targets=None):
+ assert len(feats) == len(self.fpn_strides), \
+ "The size of feats is not equal to size of fpn_strides"
+
+ feat_sizes = [[f.shape[-2], f.shape[-1]] for f in feats]
+ cls_score_list, reg_pred_list = [], []
+ obj_score_list = []
+ for i, feat in enumerate(feats):
+ feat = self.stem_conv[i](feat)
+ cls_logit = self.conv_cls[i](feat)
+ reg_pred = self.conv_reg[i](feat)
+ # cls prediction
+ cls_score = F.sigmoid(cls_logit)
+ cls_score_list.append(cls_score.flatten(2).transpose([0, 2, 1]))
+ # reg prediction
+ reg_xywh, obj_logit = paddle.split(reg_pred, [4, 1], axis=1)
+ reg_xywh = reg_xywh.flatten(2).transpose([0, 2, 1])
+ reg_pred_list.append(reg_xywh)
+ # obj prediction
+ obj_score = F.sigmoid(obj_logit)
+ obj_score_list.append(obj_score.flatten(2).transpose([0, 2, 1]))
+
+ cls_score_list = paddle.concat(cls_score_list, axis=1)
+ reg_pred_list = paddle.concat(reg_pred_list, axis=1)
+ obj_score_list = paddle.concat(obj_score_list, axis=1)
+
+ # bbox decode
+ anchor_points, stride_tensor, _ =\
+ self._generate_anchor_point(feat_sizes, self.fpn_strides)
+ reg_xy, reg_wh = paddle.split(reg_pred_list, 2, axis=-1)
+ reg_xy += (anchor_points / stride_tensor)
+ reg_wh = paddle.exp(reg_wh) * 0.5
+ bbox_pred_list = paddle.concat(
+ [reg_xy - reg_wh, reg_xy + reg_wh], axis=-1)
+
+ if self.training:
+ anchor_points, stride_tensor, num_anchors_list =\
+ self._generate_anchor_point(feat_sizes, self.fpn_strides, 0.5)
+ yolox_losses = self.get_loss([
+ cls_score_list, bbox_pred_list, obj_score_list, anchor_points,
+ stride_tensor, num_anchors_list
+ ], targets)
+ return yolox_losses
+ else:
+ pred_scores = (cls_score_list * obj_score_list).sqrt()
+ return pred_scores, bbox_pred_list, stride_tensor
+
+ def get_loss(self, head_outs, targets):
+ pred_cls, pred_bboxes, pred_obj,\
+ anchor_points, stride_tensor, num_anchors_list = head_outs
+ gt_labels = targets['gt_class']
+ gt_bboxes = targets['gt_bbox']
+ pred_scores = (pred_cls * pred_obj).sqrt()
+ # label assignment
+ center_and_strides = paddle.concat(
+ [anchor_points, stride_tensor, stride_tensor], axis=-1)
+ pos_num_list, label_list, bbox_target_list = [], [], []
+ for pred_score, pred_bbox, gt_box, gt_label in zip(
+ pred_scores.detach(),
+ pred_bboxes.detach() * stride_tensor, gt_bboxes, gt_labels):
+ pos_num, label, _, bbox_target = self.assigner(
+ pred_score, center_and_strides, pred_bbox, gt_box, gt_label)
+ pos_num_list.append(pos_num)
+ label_list.append(label)
+ bbox_target_list.append(bbox_target)
+ labels = paddle.to_tensor(np.stack(label_list, axis=0))
+ bbox_targets = paddle.to_tensor(np.stack(bbox_target_list, axis=0))
+ bbox_targets /= stride_tensor # rescale bbox
+
+ # 1. obj score loss
+ mask_positive = (labels != self.num_classes)
+ loss_obj = F.binary_cross_entropy(
+ pred_obj,
+ mask_positive.astype(pred_obj.dtype).unsqueeze(-1),
+ reduction='sum')
+
+ num_pos = sum(pos_num_list)
+
+ if num_pos > 0:
+ num_pos = paddle.to_tensor(num_pos, dtype=self._dtype).clip(min=1)
+ loss_obj /= num_pos
+
+ # 2. iou loss
+ bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 4])
+ pred_bboxes_pos = paddle.masked_select(pred_bboxes,
+ bbox_mask).reshape([-1, 4])
+ assigned_bboxes_pos = paddle.masked_select(
+ bbox_targets, bbox_mask).reshape([-1, 4])
+ bbox_iou = bbox_overlaps(pred_bboxes_pos, assigned_bboxes_pos)
+ bbox_iou = paddle.diag(bbox_iou)
+
+ loss_iou = self.iou_loss(
+ pred_bboxes_pos.split(
+ 4, axis=-1),
+ assigned_bboxes_pos.split(
+ 4, axis=-1))
+ loss_iou = loss_iou.sum() / num_pos
+
+ # 3. cls loss
+ cls_mask = mask_positive.unsqueeze(-1).tile(
+ [1, 1, self.num_classes])
+ pred_cls_pos = paddle.masked_select(
+ pred_cls, cls_mask).reshape([-1, self.num_classes])
+ assigned_cls_pos = paddle.masked_select(labels, mask_positive)
+ assigned_cls_pos = F.one_hot(assigned_cls_pos,
+ self.num_classes + 1)[..., :-1]
+ assigned_cls_pos *= bbox_iou.unsqueeze(-1)
+ loss_cls = F.binary_cross_entropy(
+ pred_cls_pos, assigned_cls_pos, reduction='sum')
+ loss_cls /= num_pos
+
+ # 4. l1 loss
+ if targets['epoch_id'] >= self.l1_epoch:
+ loss_l1 = F.l1_loss(
+ pred_bboxes_pos, assigned_bboxes_pos, reduction='sum')
+ loss_l1 /= num_pos
+ else:
+ loss_l1 = paddle.zeros([1])
+ loss_l1.stop_gradient = False
+ else:
+ loss_cls = paddle.zeros([1])
+ loss_iou = paddle.zeros([1])
+ loss_l1 = paddle.zeros([1])
+ loss_cls.stop_gradient = False
+ loss_iou.stop_gradient = False
+ loss_l1.stop_gradient = False
+
+ loss = self.loss_weight['obj'] * loss_obj + \
+ self.loss_weight['cls'] * loss_cls + \
+ self.loss_weight['iou'] * loss_iou
+
+ if targets['epoch_id'] >= self.l1_epoch:
+ loss += (self.loss_weight['l1'] * loss_l1)
+
+ yolox_losses = {
+ 'loss': loss,
+ 'loss_cls': loss_cls,
+ 'loss_obj': loss_obj,
+ 'loss_iou': loss_iou,
+ 'loss_l1': loss_l1,
+ }
+ return yolox_losses
+
+ def post_process(self, head_outs, img_shape, scale_factor):
+ pred_scores, pred_bboxes, stride_tensor = head_outs
+ pred_scores = pred_scores.transpose([0, 2, 1])
+ pred_bboxes *= stride_tensor
+ # scale bbox to origin image
+ scale_factor = scale_factor.flip(-1).tile([1, 2]).unsqueeze(1)
+ pred_bboxes /= scale_factor
+ if self.exclude_nms:
+ # `exclude_nms=True` just use in benchmark
+ return pred_bboxes.sum(), pred_scores.sum()
+ else:
+ bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
+ return bbox_pred, bbox_num
diff --git a/paddlers/models/ppdet/modeling/initializer.py b/paddlers/models/ppdet/modeling/initializer.py
index 1a1ce84..5c0f8d5 100644
--- a/paddlers/models/ppdet/modeling/initializer.py
+++ b/paddlers/models/ppdet/modeling/initializer.py
@@ -273,7 +273,8 @@ def linear_init_(module):
def conv_init_(module):
bound = 1 / np.sqrt(np.prod(module.weight.shape[1:]))
uniform_(module.weight, -bound, bound)
- uniform_(module.bias, -bound, bound)
+ if module.bias is not None:
+ uniform_(module.bias, -bound, bound)
def bias_init_with_prob(prior_prob=0.01):
diff --git a/paddlers/models/ppdet/modeling/layers.py b/paddlers/models/ppdet/modeling/layers.py
index 6a0cca1..5ee134f 100644
--- a/paddlers/models/ppdet/modeling/layers.py
+++ b/paddlers/models/ppdet/modeling/layers.py
@@ -39,6 +39,81 @@ def _to_list(l):
return [l]
+class AlignConv(nn.Layer):
+ def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
+ super(AlignConv, self).__init__()
+ self.kernel_size = kernel_size
+ self.align_conv = paddle.vision.ops.DeformConv2D(
+ in_channels,
+ out_channels,
+ kernel_size=self.kernel_size,
+ padding=(self.kernel_size - 1) // 2,
+ groups=groups,
+ weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+ bias_attr=None)
+
+ @paddle.no_grad()
+ def get_offset(self, anchors, featmap_size, stride):
+ """
+ Args:
+ anchors: [B, L, 5] xc,yc,w,h,angle
+ featmap_size: (feat_h, feat_w)
+ stride: 8
+ Returns:
+
+ """
+ batch = anchors.shape[0]
+ dtype = anchors.dtype
+ feat_h, feat_w = featmap_size
+ pad = (self.kernel_size - 1) // 2
+ idx = paddle.arange(-pad, pad + 1, dtype=dtype)
+
+ yy, xx = paddle.meshgrid(idx, idx)
+ xx = paddle.reshape(xx, [-1])
+ yy = paddle.reshape(yy, [-1])
+
+ # get sampling locations of default conv
+ xc = paddle.arange(0, feat_w, dtype=dtype)
+ yc = paddle.arange(0, feat_h, dtype=dtype)
+ yc, xc = paddle.meshgrid(yc, xc)
+
+ xc = paddle.reshape(xc, [-1, 1])
+ yc = paddle.reshape(yc, [-1, 1])
+ x_conv = xc + xx
+ y_conv = yc + yy
+
+ # get sampling locations of anchors
+ x_ctr, y_ctr, w, h, a = paddle.split(anchors, 5, axis=-1)
+ x_ctr = x_ctr / stride
+ y_ctr = y_ctr / stride
+ w_s = w / stride
+ h_s = h / stride
+ cos, sin = paddle.cos(a), paddle.sin(a)
+ dw, dh = w_s / self.kernel_size, h_s / self.kernel_size
+ x, y = dw * xx, dh * yy
+ xr = cos * x - sin * y
+ yr = sin * x + cos * y
+ x_anchor, y_anchor = xr + x_ctr, yr + y_ctr
+ # get offset filed
+ offset_x = x_anchor - x_conv
+ offset_y = y_anchor - y_conv
+ offset = paddle.stack([offset_y, offset_x], axis=-1)
+ offset = offset.reshape(
+ [batch, feat_h, feat_w, self.kernel_size * self.kernel_size * 2])
+ offset = offset.transpose([0, 3, 1, 2])
+
+ return offset
+
+ def forward(self, x, refine_anchors, featmap_size, stride):
+ batch = paddle.shape(x)[0].numpy()
+ offset = self.get_offset(refine_anchors, featmap_size, stride)
+ if self.training:
+ x = F.relu(self.align_conv(x, offset.detach()))
+ else:
+ x = F.relu(self.align_conv(x, offset))
+ return x
+
+
class DeformableConvV2(nn.Layer):
def __init__(self,
in_channels,
@@ -128,7 +203,7 @@ class ConvNormLayer(nn.Layer):
dcn_lr_scale=2.,
dcn_regularizer=L2Decay(0.)):
super(ConvNormLayer, self).__init__()
- assert norm_type in ['bn', 'sync_bn', 'gn']
+ assert norm_type in ['bn', 'sync_bn', 'gn', None]
if bias_on:
bias_attr = ParamAttr(
@@ -183,10 +258,13 @@ class ConvNormLayer(nn.Layer):
num_channels=ch_out,
weight_attr=param_attr,
bias_attr=bias_attr)
+ else:
+ self.norm = None
def forward(self, inputs):
out = self.conv(inputs)
- out = self.norm(out)
+ if self.norm is not None:
+ out = self.norm(out)
return out
@@ -248,7 +326,7 @@ class LiteConv(nn.Layer):
class DropBlock(nn.Layer):
- def __init__(self, block_size, keep_prob, name, data_format='NCHW'):
+ def __init__(self, block_size, keep_prob, name=None, data_format='NCHW'):
"""
DropBlock layer, see https://arxiv.org/abs/1810.12890
@@ -360,18 +438,20 @@ class AnchorGeneratorSSD(object):
@register
@serializable
class RCNNBox(object):
- __shared__ = ['num_classes']
+ __shared__ = ['num_classes', 'export_onnx']
def __init__(self,
prior_box_var=[10., 10., 5., 5.],
code_type="decode_center_size",
box_normalized=False,
- num_classes=80):
+ num_classes=80,
+ export_onnx=False):
super(RCNNBox, self).__init__()
self.prior_box_var = prior_box_var
self.code_type = code_type
self.box_normalized = box_normalized
self.num_classes = num_classes
+ self.export_onnx = export_onnx
def __call__(self, bbox_head_out, rois, im_shape, scale_factor):
bbox_pred = bbox_head_out[0]
@@ -379,39 +459,38 @@ class RCNNBox(object):
roi = rois[0]
rois_num = rois[1]
- origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
- scale_list = []
- origin_shape_list = []
+ if self.export_onnx:
+ onnx_rois_num_per_im = rois_num[0]
+ origin_shape = paddle.expand(im_shape[0, :],
+ [onnx_rois_num_per_im, 2])
- batch_size = 1
- if isinstance(roi, list):
- batch_size = len(roi)
else:
- batch_size = paddle.slice(paddle.shape(im_shape), [0], [0], [1])
- # bbox_pred.shape: [N, C*4]
- for idx in range(batch_size):
- roi_per_im = roi[idx]
- rois_num_per_im = rois_num[idx]
- expand_im_shape = paddle.expand(im_shape[idx, :],
- [rois_num_per_im, 2])
- origin_shape_list.append(expand_im_shape)
+ origin_shape_list = []
+ if isinstance(roi, list):
+ batch_size = len(roi)
+ else:
+ batch_size = paddle.slice(paddle.shape(im_shape), [0], [0], [1])
+
+ # bbox_pred.shape: [N, C*4]
+ for idx in range(batch_size):
+ rois_num_per_im = rois_num[idx]
+ expand_im_shape = paddle.expand(im_shape[idx, :],
+ [rois_num_per_im, 2])
+ origin_shape_list.append(expand_im_shape)
- origin_shape = paddle.concat(origin_shape_list)
+ origin_shape = paddle.concat(origin_shape_list)
# bbox_pred.shape: [N, C*4]
# C=num_classes in faster/mask rcnn(bbox_head), C=1 in cascade rcnn(cascade_head)
bbox = paddle.concat(roi)
- if bbox.shape[0] == 0:
- bbox = paddle.zeros([0, bbox_pred.shape[1]], dtype='float32')
- else:
- bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var)
+ bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var)
scores = cls_prob[:, :-1]
# bbox.shape: [N, C, 4]
# bbox.shape[1] must be equal to scores.shape[1]
- bbox_num_class = bbox.shape[1]
- if bbox_num_class == 1:
- bbox = paddle.tile(bbox, [1, self.num_classes, 1])
+ total_num = bbox.shape[0]
+ bbox_dim = bbox.shape[-1]
+ bbox = paddle.expand(bbox, [total_num, self.num_classes, bbox_dim])
origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1)
origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1)
@@ -436,7 +515,8 @@ class MultiClassNMS(object):
normalized=True,
nms_eta=1.0,
return_index=False,
- return_rois_num=True):
+ return_rois_num=True,
+ trt=False):
super(MultiClassNMS, self).__init__()
self.score_threshold = score_threshold
self.nms_top_k = nms_top_k
@@ -446,20 +526,21 @@ class MultiClassNMS(object):
self.nms_eta = nms_eta
self.return_index = return_index
self.return_rois_num = return_rois_num
+ self.trt = trt
def __call__(self, bboxes, score, background_label=-1):
"""
- bboxes (Tensor|List[Tensor]): 1. (Tensor) Predicted bboxes with shape
+ bboxes (Tensor|List[Tensor]): 1. (Tensor) Predicted bboxes with shape
[N, M, 4], N is the batch size and M
is the number of bboxes
2. (List[Tensor]) bboxes and bbox_num,
bboxes have shape of [M, C, 4], C
is the class number and bbox_num means
the number of bboxes of each batch with
- shape [N,]
+ shape [N,]
score (Tensor): Predicted scores with shape [N, C, M] or [M, C]
background_label (int): Ignore the background label; For example, RCNN
- is num_classes and YOLO is -1.
+ is num_classes and YOLO is -1.
"""
kwargs = self.__dict__.copy()
if isinstance(bboxes, tuple):
@@ -467,7 +548,20 @@ class MultiClassNMS(object):
kwargs.update({'rois_num': bbox_num})
if background_label > -1:
kwargs.update({'background_label': background_label})
- return ops.multiclass_nms(bboxes, score, **kwargs)
+ kwargs.pop('trt')
+ # TODO(wangxinxin08): paddle version should be develop or 2.3 and above to run nms on tensorrt
+ if self.trt and (int(paddle.version.major) == 0 or
+ (int(paddle.version.major) >= 2 and
+ int(paddle.version.minor) >= 3)):
+ # TODO(wangxinxin08): tricky switch to run nms on tensorrt
+ kwargs.update({'nms_eta': 1.1})
+ bbox, bbox_num, _ = ops.multiclass_nms(bboxes, score, **kwargs)
+ bbox = bbox.reshape([1, -1, 6])
+ idx = paddle.nonzero(bbox[..., 0] != -1)
+ bbox = paddle.gather_nd(bbox, idx)
+ return bbox, bbox_num, None
+ else:
+ return ops.multiclass_nms(bboxes, score, **kwargs)
@register
@@ -536,10 +630,15 @@ class YOLOBox(object):
origin_shape = im_shape / scale_factor
origin_shape = paddle.cast(origin_shape, 'int32')
for i, head_out in enumerate(yolo_head_out):
- boxes, scores = ops.yolo_box(head_out, origin_shape, anchors[i],
- self.num_classes, self.conf_thresh,
- self.downsample_ratio // 2**i,
- self.clip_bbox, self.scale_x_y)
+ boxes, scores = paddle.vision.ops.yolo_box(
+ head_out,
+ origin_shape,
+ anchors[i],
+ self.num_classes,
+ self.conf_thresh,
+ self.downsample_ratio // 2**i,
+ self.clip_bbox,
+ scale_x_y=self.scale_x_y)
boxes_list.append(boxes)
scores_list.append(paddle.transpose(scores, perm=[0, 2, 1]))
yolo_boxes = paddle.concat(boxes_list, axis=1)
@@ -550,9 +649,14 @@ class YOLOBox(object):
@register
@serializable
class SSDBox(object):
- def __init__(self, is_normalized=True):
+ def __init__(self,
+ is_normalized=True,
+ prior_box_var=[0.1, 0.1, 0.2, 0.2],
+ use_fuse_decode=False):
self.is_normalized = is_normalized
self.norm_delta = float(not self.is_normalized)
+ self.prior_box_var = prior_box_var
+ self.use_fuse_decode = use_fuse_decode
def __call__(self,
preds,
@@ -561,128 +665,42 @@ class SSDBox(object):
scale_factor,
var_weight=None):
boxes, scores = preds
- outputs = []
- for box, score, prior_box in zip(boxes, scores, prior_boxes):
- pb_w = prior_box[:, 2] - prior_box[:, 0] + self.norm_delta
- pb_h = prior_box[:, 3] - prior_box[:, 1] + self.norm_delta
- pb_x = prior_box[:, 0] + pb_w * 0.5
- pb_y = prior_box[:, 1] + pb_h * 0.5
- out_x = pb_x + box[:, :, 0] * pb_w * 0.1
- out_y = pb_y + box[:, :, 1] * pb_h * 0.1
- out_w = paddle.exp(box[:, :, 2] * 0.2) * pb_w
- out_h = paddle.exp(box[:, :, 3] * 0.2) * pb_h
-
- if self.is_normalized:
- h = paddle.unsqueeze(
- im_shape[:, 0] / scale_factor[:, 0], axis=-1)
- w = paddle.unsqueeze(
- im_shape[:, 1] / scale_factor[:, 1], axis=-1)
- output = paddle.stack(
- [(out_x - out_w / 2.) * w, (out_y - out_h / 2.) * h,
- (out_x + out_w / 2.) * w, (out_y + out_h / 2.) * h],
- axis=-1)
- else:
- output = paddle.stack(
- [
- out_x - out_w / 2., out_y - out_h / 2.,
- out_x + out_w / 2. - 1., out_y + out_h / 2. - 1.
- ],
- axis=-1)
- outputs.append(output)
- boxes = paddle.concat(outputs, axis=1)
-
- scores = F.softmax(paddle.concat(scores, axis=1))
- scores = paddle.transpose(scores, [0, 2, 1])
-
- return boxes, scores
-
-
-@register
-@serializable
-class AnchorGrid(object):
- """Generate anchor grid
-
- Args:
- image_size (int or list): input image size, may be a single integer or
- list of [h, w]. Default: 512
- min_level (int): min level of the feature pyramid. Default: 3
- max_level (int): max level of the feature pyramid. Default: 7
- anchor_base_scale: base anchor scale. Default: 4
- num_scales: number of anchor scales. Default: 3
- aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
- """
-
- def __init__(self,
- image_size=512,
- min_level=3,
- max_level=7,
- anchor_base_scale=4,
- num_scales=3,
- aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
- super(AnchorGrid, self).__init__()
- if isinstance(image_size, Integral):
- self.image_size = [image_size, image_size]
+ boxes = paddle.concat(boxes, axis=1)
+ prior_boxes = paddle.concat(prior_boxes)
+ if self.use_fuse_decode:
+ output_boxes = ops.box_coder(
+ prior_boxes,
+ self.prior_box_var,
+ boxes,
+ code_type="decode_center_size",
+ box_normalized=self.is_normalized)
else:
- self.image_size = image_size
- for dim in self.image_size:
- assert dim % 2 ** max_level == 0, \
- "image size should be multiple of the max level stride"
- self.min_level = min_level
- self.max_level = max_level
- self.anchor_base_scale = anchor_base_scale
- self.num_scales = num_scales
- self.aspect_ratios = aspect_ratios
+ pb_w = prior_boxes[:, 2] - prior_boxes[:, 0] + self.norm_delta
+ pb_h = prior_boxes[:, 3] - prior_boxes[:, 1] + self.norm_delta
+ pb_x = prior_boxes[:, 0] + pb_w * 0.5
+ pb_y = prior_boxes[:, 1] + pb_h * 0.5
+ out_x = pb_x + boxes[:, :, 0] * pb_w * self.prior_box_var[0]
+ out_y = pb_y + boxes[:, :, 1] * pb_h * self.prior_box_var[1]
+ out_w = paddle.exp(boxes[:, :, 2] * self.prior_box_var[2]) * pb_w
+ out_h = paddle.exp(boxes[:, :, 3] * self.prior_box_var[3]) * pb_h
+ output_boxes = paddle.stack(
+ [
+ out_x - out_w / 2., out_y - out_h / 2., out_x + out_w / 2.,
+ out_y + out_h / 2.
+ ],
+ axis=-1)
+
+ if self.is_normalized:
+ h = (im_shape[:, 0] / scale_factor[:, 0]).unsqueeze(-1)
+ w = (im_shape[:, 1] / scale_factor[:, 1]).unsqueeze(-1)
+ im_shape = paddle.stack([w, h, w, h], axis=-1)
+ output_boxes *= im_shape
+ else:
+ output_boxes[..., -2:] -= 1.0
+ output_scores = F.softmax(paddle.concat(
+ scores, axis=1)).transpose([0, 2, 1])
- @property
- def base_cell(self):
- if not hasattr(self, '_base_cell'):
- self._base_cell = self.make_cell()
- return self._base_cell
-
- def make_cell(self):
- scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
- scales = np.array(scales)
- ratios = np.array(self.aspect_ratios)
- ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
- hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
- anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
- return anchors
-
- def make_grid(self, stride):
- cell = self.base_cell * stride * self.anchor_base_scale
- x_steps = np.arange(stride // 2, self.image_size[1], stride)
- y_steps = np.arange(stride // 2, self.image_size[0], stride)
- offset_x, offset_y = np.meshgrid(x_steps, y_steps)
- offset_x = offset_x.flatten()
- offset_y = offset_y.flatten()
- offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
- offsets = offsets[:, np.newaxis, :]
- return (cell + offsets).reshape(-1, 4)
-
- def generate(self):
- return [
- self.make_grid(2**l)
- for l in range(self.min_level, self.max_level + 1)
- ]
-
- def __call__(self):
- if not hasattr(self, '_anchor_vars'):
- anchor_vars = []
- helper = LayerHelper('anchor_grid')
- for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
- stride = 2**l
- anchors = self.make_grid(stride)
- var = helper.create_parameter(
- attr=ParamAttr(name='anchors_{}'.format(idx)),
- shape=anchors.shape,
- dtype='float32',
- stop_gradient=True,
- default_initializer=NumpyArrayInitializer(anchors))
- anchor_vars.append(var)
- var.persistable = True
- self._anchor_vars = anchor_vars
-
- return self._anchor_vars
+ return output_boxes, output_scores
@register
@@ -722,7 +740,7 @@ class FCOSBox(object):
Postprocess each layer of the output with corresponding locations.
Args:
locations (Tensor): anchor points for current layer, [H*W, 2]
- box_cls (Tensor): categories prediction, [N, C, H, W],
+ box_cls (Tensor): categories prediction, [N, C, H, W],
C is the number of classes
box_reg (Tensor): bounding box prediction, [N, 4, H, W]
box_ctn (Tensor): centerness prediction, [N, 1, H, W]
@@ -807,7 +825,6 @@ class TTFBox(object):
# batch size is 1
scores_r = paddle.reshape(scores, [cat, -1])
topk_scores, topk_inds = paddle.topk(scores_r, k)
- topk_scores, topk_inds = paddle.topk(scores_r, k)
topk_ys = topk_inds // width
topk_xs = topk_inds % width
@@ -1198,11 +1215,11 @@ def _convert_attention_mask(attn_mask, dtype):
to prevents attention to some unwanted positions, usually the
paddings or the subsequent positions. It is a tensor with shape
broadcasted to `[batch_size, n_head, sequence_length, sequence_length]`.
- When the data type is bool, the unwanted positions have `False`
- values and the others have `True` values. When the data type is
- int, the unwanted positions have 0 values and the others have 1
- values. When the data type is float, the unwanted positions have
- `-INF` values and the others have 0 values. It can be None when
+ When the data type is bool, the unwanted positions have `False`
+ values and the others have `True` values. When the data type is
+ int, the unwanted positions have 0 values and the others have 1
+ values. When the data type is float, the unwanted positions have
+ `-INF` values and the others have 0 values. It can be None when
nothing wanted or needed to be prevented attention to. Default None.
dtype (VarType): The target type of `attn_mask` we expect.
Returns:
diff --git a/paddlers/models/ppdet/modeling/losses/__init__.py b/paddlers/models/ppdet/modeling/losses/__init__.py
index dc1cc49..a4a09f0 100644
--- a/paddlers/models/ppdet/modeling/losses/__init__.py
+++ b/paddlers/models/ppdet/modeling/losses/__init__.py
@@ -25,6 +25,8 @@ from . import fairmot_loss
from . import gfocal_loss
from . import detr_loss
from . import sparsercnn_loss
+from . import focal_loss
+from . import smooth_l1_loss
from .yolo_loss import *
from .iou_aware_loss import *
@@ -39,3 +41,5 @@ from .fairmot_loss import *
from .gfocal_loss import *
from .detr_loss import *
from .sparsercnn_loss import *
+from .focal_loss import *
+from .smooth_l1_loss import *
diff --git a/paddlers/models/ppdet/modeling/losses/detr_loss.py b/paddlers/models/ppdet/modeling/losses/detr_loss.py
index 5c654d3..7333903 100644
--- a/paddlers/models/ppdet/modeling/losses/detr_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/detr_loss.py
@@ -80,7 +80,7 @@ class DETRLoss(nn.Layer):
target_label = target_label.reshape([bs, num_query_objects])
if self.use_focal_loss:
target_label = F.one_hot(target_label,
- self.num_classes + 1)[:, :, :-1]
+ self.num_classes + 1)[..., :-1]
return {
'loss_class': self.loss_coeff['class'] * sigmoid_focal_loss(
logits, target_label, num_gts / num_query_objects)
diff --git a/paddlers/models/ppdet/modeling/losses/fairmot_loss.py b/paddlers/models/ppdet/modeling/losses/fairmot_loss.py
old mode 100644
new mode 100755
diff --git a/paddlers/models/ppdet/modeling/losses/fcos_loss.py b/paddlers/models/ppdet/modeling/losses/fcos_loss.py
index db90b7d..675bcd6 100644
--- a/paddlers/models/ppdet/modeling/losses/fcos_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/fcos_loss.py
@@ -30,7 +30,7 @@ def flatten_tensor(inputs, channel_first=False):
Flatten a Tensor
Args:
inputs (Tensor): 4-D Tensor with shape [N, C, H, W] or [N, H, W, C]
- channel_first (bool): If true the dimension order of Tensor is
+ channel_first (bool): If true the dimension order of Tensor is
[N, C, H, W], otherwise is [N, H, W, C]
Return:
output_channel_last (Tensor): The flattened Tensor in channel_last style
diff --git a/paddlers/models/ppdet/modeling/losses/focal_loss.py b/paddlers/models/ppdet/modeling/losses/focal_loss.py
new file mode 100644
index 0000000..508a08c
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/losses/focal_loss.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn.functional as F
+import paddle.nn as nn
+from paddlers.models.ppdet.core.workspace import register
+
+__all__ = ['FocalLoss']
+
+
+@register
+class FocalLoss(nn.Layer):
+ """A wrapper around paddle.nn.functional.sigmoid_focal_loss.
+ Args:
+ use_sigmoid (bool): currently only support use_sigmoid=True
+ alpha (float): parameter alpha in Focal Loss
+ gamma (float): parameter gamma in Focal Loss
+ loss_weight (float): final loss will be multiplied by this
+ """
+
+ def __init__(self, use_sigmoid=True, alpha=0.25, gamma=2.0,
+ loss_weight=1.0):
+ super(FocalLoss, self).__init__()
+ assert use_sigmoid == True, \
+ 'Focal Loss only supports sigmoid at the moment'
+ self.use_sigmoid = use_sigmoid
+ self.alpha = alpha
+ self.gamma = gamma
+ self.loss_weight = loss_weight
+
+ def forward(self, pred, target, reduction='none'):
+ """forward function.
+ Args:
+ pred (Tensor): logits of class prediction, of shape (N, num_classes)
+ target (Tensor): target class label, of shape (N, )
+ reduction (str): the way to reduce loss, one of (none, sum, mean)
+ """
+ num_classes = pred.shape[1]
+ target = F.one_hot(target, num_classes + 1).cast(pred.dtype)
+ target = target[:, :-1].detach()
+ loss = F.sigmoid_focal_loss(
+ pred,
+ target,
+ alpha=self.alpha,
+ gamma=self.gamma,
+ reduction=reduction)
+ return loss * self.loss_weight
diff --git a/paddlers/models/ppdet/modeling/losses/iou_loss.py b/paddlers/models/ppdet/modeling/losses/iou_loss.py
index 548fbb4..deb3332 100644
--- a/paddlers/models/ppdet/modeling/losses/iou_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/iou_loss.py
@@ -17,13 +17,13 @@ from __future__ import division
from __future__ import print_function
import numpy as np
-
+import math
import paddle
from paddlers.models.ppdet.core.workspace import register, serializable
from ..bbox_utils import bbox_iou
-__all__ = ['IouLoss', 'GIoULoss', 'DIouLoss']
+__all__ = ['IouLoss', 'GIoULoss', 'DIouLoss', 'SIoULoss']
@register
@@ -208,3 +208,88 @@ class DIouLoss(GIoULoss):
diou = paddle.mean((1 - iouk + ciou_term + diou_term) * iou_weight)
return diou * self.loss_weight
+
+
+@register
+@serializable
+class SIoULoss(GIoULoss):
+ """
+ see https://arxiv.org/pdf/2205.12740.pdf
+ Args:
+ loss_weight (float): siou loss weight, default as 1
+ eps (float): epsilon to avoid divide by zero, default as 1e-10
+ theta (float): default as 4
+ reduction (str): Options are "none", "mean" and "sum". default as none
+ """
+
+ def __init__(self, loss_weight=1., eps=1e-10, theta=4., reduction='none'):
+ super(SIoULoss, self).__init__(loss_weight=loss_weight, eps=eps)
+ self.loss_weight = loss_weight
+ self.eps = eps
+ self.theta = theta
+ self.reduction = reduction
+
+ def __call__(self, pbox, gbox):
+ x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
+ x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
+
+ box1 = [x1, y1, x2, y2]
+ box2 = [x1g, y1g, x2g, y2g]
+ iou = bbox_iou(box1, box2)
+
+ cx = (x1 + x2) / 2
+ cy = (y1 + y2) / 2
+ w = x2 - x1 + self.eps
+ h = y2 - y1 + self.eps
+
+ cxg = (x1g + x2g) / 2
+ cyg = (y1g + y2g) / 2
+ wg = x2g - x1g + self.eps
+ hg = y2g - y1g + self.eps
+
+ x2 = paddle.maximum(x1, x2)
+ y2 = paddle.maximum(y1, y2)
+
+ # A or B
+ xc1 = paddle.minimum(x1, x1g)
+ yc1 = paddle.minimum(y1, y1g)
+ xc2 = paddle.maximum(x2, x2g)
+ yc2 = paddle.maximum(y2, y2g)
+
+ cw_out = xc2 - xc1
+ ch_out = yc2 - yc1
+
+ ch = paddle.maximum(cy, cyg) - paddle.minimum(cy, cyg)
+ cw = paddle.maximum(cx, cxg) - paddle.minimum(cx, cxg)
+
+ # angle cost
+ dist_intersection = paddle.sqrt((cx - cxg)**2 + (cy - cyg)**2)
+ sin_angle_alpha = ch / dist_intersection
+ sin_angle_beta = cw / dist_intersection
+ thred = paddle.pow(paddle.to_tensor(2), 0.5) / 2
+ thred.stop_gradient = True
+ sin_alpha = paddle.where(sin_angle_alpha > thred, sin_angle_beta,
+ sin_angle_alpha)
+ angle_cost = paddle.cos(paddle.asin(sin_alpha) * 2 - math.pi / 2)
+
+ # distance cost
+ gamma = 2 - angle_cost
+ # gamma.stop_gradient = True
+ beta_x = ((cxg - cx) / cw_out)**2
+ beta_y = ((cyg - cy) / ch_out)**2
+ dist_cost = 1 - paddle.exp(-gamma * beta_x) + 1 - paddle.exp(-gamma *
+ beta_y)
+
+ # shape cost
+ omega_w = paddle.abs(w - wg) / paddle.maximum(w, wg)
+ omega_h = paddle.abs(hg - h) / paddle.maximum(h, hg)
+ omega = (1 - paddle.exp(-omega_w))**self.theta + (
+ 1 - paddle.exp(-omega_h))**self.theta
+ siou_loss = 1 - iou + (omega + dist_cost) / 2
+
+ if self.reduction == 'mean':
+ siou_loss = paddle.mean(siou_loss)
+ elif self.reduction == 'sum':
+ siou_loss = paddle.sum(siou_loss)
+
+ return siou_loss * self.loss_weight
diff --git a/paddlers/models/ppdet/modeling/losses/smooth_l1_loss.py b/paddlers/models/ppdet/modeling/losses/smooth_l1_loss.py
new file mode 100644
index 0000000..7fb1eaf
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/losses/smooth_l1_loss.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddlers.models.ppdet.core.workspace import register
+
+__all__ = ['SmoothL1Loss']
+
+
+@register
+class SmoothL1Loss(nn.Layer):
+ """Smooth L1 Loss.
+ Args:
+ beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
+ loss_weight (float): the final loss will be multiplied by this
+ """
+
+ def __init__(self, beta=1.0, loss_weight=1.0):
+ super(SmoothL1Loss, self).__init__()
+ assert beta >= 0
+ self.beta = beta
+ self.loss_weight = loss_weight
+
+ def forward(self, pred, target, reduction='none'):
+ """forward function, based on fvcore.
+ Args:
+ pred (Tensor): prediction tensor
+ target (Tensor): target tensor, pred.shape must be the same as target.shape
+ reduction (str): the way to reduce loss, one of (none, sum, mean)
+ """
+ assert reduction in ('none', 'sum', 'mean')
+ target = target.detach()
+ if self.beta < 1e-5:
+ loss = paddle.abs(pred - target)
+ else:
+ n = paddle.abs(pred - target)
+ cond = n < self.beta
+ loss = paddle.where(cond, 0.5 * n**2 / self.beta,
+ n - 0.5 * self.beta)
+ if reduction == 'mean':
+ loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
+ elif reduction == 'sum':
+ loss = loss.sum()
+ return loss * self.loss_weight
diff --git a/paddlers/models/ppdet/modeling/losses/sparsercnn_loss.py b/paddlers/models/ppdet/modeling/losses/sparsercnn_loss.py
index e7f2ef1..8f99de8 100644
--- a/paddlers/models/ppdet/modeling/losses/sparsercnn_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/sparsercnn_loss.py
@@ -198,7 +198,7 @@ class SparseRCNNLoss(nn.Layer):
# Retrieve the matching between the outputs of the last layer and the targets
indices = self.matcher(outputs_without_aux, targets)
- # Compute the average number of target boxes accross all nodes, for normalization purposes
+ # Compute the average number of target boxes across all nodes, for normalization purposes
num_boxes = sum(len(t["labels"]) for t in targets)
num_boxes = paddle.to_tensor(
[num_boxes],
diff --git a/paddlers/models/ppdet/modeling/losses/ssd_loss.py b/paddlers/models/ppdet/modeling/losses/ssd_loss.py
index 345f095..53c1198 100644
--- a/paddlers/models/ppdet/modeling/losses/ssd_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/ssd_loss.py
@@ -20,8 +20,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
-from ..ops import iou_similarity
-from ..bbox_utils import bbox2delta
+from ..bbox_utils import iou_similarity, bbox2delta
__all__ = ['SSDLoss']
diff --git a/paddlers/models/ppdet/modeling/losses/varifocal_loss.py b/paddlers/models/ppdet/modeling/losses/varifocal_loss.py
index 854b253..030e17a 100644
--- a/paddlers/models/ppdet/modeling/losses/varifocal_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/varifocal_loss.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
diff --git a/paddlers/models/ppdet/modeling/losses/yolo_loss.py b/paddlers/models/ppdet/modeling/losses/yolo_loss.py
index fadc303..a0d0f0d 100644
--- a/paddlers/models/ppdet/modeling/losses/yolo_loss.py
+++ b/paddlers/models/ppdet/modeling/losses/yolo_loss.py
@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
-from ..bbox_utils import decode_yolo, xywh2xyxy, iou_similarity
+from ..bbox_utils import decode_yolo, xywh2xyxy, batch_iou_similarity
__all__ = ['YOLOv3Loss']
@@ -56,7 +56,7 @@ class YOLOv3Loss(nn.Layer):
downsample (list): downsample ratio for each detection block
scale_x_y (float): scale_x_y factor
iou_loss (object): IoULoss instance
- iou_aware_loss (object): IouAwareLoss instance
+ iou_aware_loss (object): IouAwareLoss instance
"""
super(YOLOv3Loss, self).__init__()
self.num_classes = num_classes
@@ -80,7 +80,7 @@ class YOLOv3Loss(nn.Layer):
gwh = gbox[:, :, 0:2] + gbox[:, :, 2:4] * 0.5
gbox = paddle.concat([gxy, gwh], axis=-1)
- iou = iou_similarity(pbox, gbox)
+ iou = batch_iou_similarity(pbox, gbox)
iou.stop_gradient = True
iou_max = iou.max(2) # [N, M1]
iou_mask = paddle.cast(iou_max <= self.ignore_thresh, dtype=pbox.dtype)
diff --git a/paddlers/models/ppdet/modeling/mot/matching/__init__.py b/paddlers/models/ppdet/modeling/mot/matching/__init__.py
index 9b2f207..7a25400 100644
--- a/paddlers/models/ppdet/modeling/mot/matching/__init__.py
+++ b/paddlers/models/ppdet/modeling/mot/matching/__init__.py
@@ -14,6 +14,8 @@
from . import jde_matching
from . import deepsort_matching
+from . import ocsort_matching
from .jde_matching import *
from .deepsort_matching import *
+from .ocsort_matching import *
diff --git a/paddlers/models/ppdet/modeling/mot/matching/deepsort_matching.py b/paddlers/models/ppdet/modeling/mot/matching/deepsort_matching.py
index 1a50b30..85fba90 100644
--- a/paddlers/models/ppdet/modeling/mot/matching/deepsort_matching.py
+++ b/paddlers/models/ppdet/modeling/mot/matching/deepsort_matching.py
@@ -78,8 +78,8 @@ def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
that should be matched. Defaults to all `detections`.
Returns:
- cost_matrix (ndarray): A cost matrix of shape len(track_indices),
- len(detection_indices) where entry (i, j) is
+ cost_matrix (ndarray): A cost matrix of shape len(track_indices),
+ len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
@@ -152,7 +152,7 @@ class NearestNeighborDistanceMetric(object):
budget (Optional[int]): If not None, fix samples per class to at most
this number. Removes the oldest samples when the budget is reached.
- Attributes:
+ Attributes:
samples (Dict[int -> List[ndarray]]): A dictionary that maps from target
identities to the list of samples that have been observed so far.
"""
@@ -216,8 +216,8 @@ def min_cost_matching(distance_metric,
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
- The distance metric is given a list of tracks and detections as
- well as a list of N track indices and M detection indices. The
+ The distance metric is given a list of tracks and detections as
+ well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
@@ -284,8 +284,8 @@ def matching_cascade(distance_metric,
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
- The distance metric is given a list of tracks and detections as
- well as a list of N track indices and M detection indices. The
+ The distance metric is given a list of tracks and detections as
+ well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
diff --git a/paddlers/models/ppdet/modeling/mot/matching/jde_matching.py b/paddlers/models/ppdet/modeling/mot/matching/jde_matching.py
index 08a1963..89be751 100644
--- a/paddlers/models/ppdet/modeling/mot/matching/jde_matching.py
+++ b/paddlers/models/ppdet/modeling/mot/matching/jde_matching.py
@@ -15,7 +15,14 @@
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py
"""
-import lap
+try:
+ import lap
+except:
+ print(
+ 'Warning: Unable to use JDE/FairMOT/ByteTrack, please install lap, for example: `pip install lap`, see https://github.com/gatagat/lap'
+ )
+ pass
+
import scipy
import numpy as np
from scipy.spatial.distance import cdist
@@ -26,7 +33,7 @@ warnings.filterwarnings("ignore")
__all__ = [
'merge_matches',
'linear_assignment',
- 'cython_bbox_ious',
+ 'bbox_ious',
'iou_distance',
'embedding_distance',
'fuse_motion',
@@ -53,6 +60,12 @@ def merge_matches(m1, m2, shape):
def linear_assignment(cost_matrix, thresh):
+ try:
+ import lap
+ except Exception as e:
+ raise RuntimeError(
+ 'Unable to use JDE/FairMOT/ByteTrack, please install lap, for example: `pip install lap`, see https://github.com/gatagat/lap'
+ )
if cost_matrix.size == 0:
return np.empty(
(0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(
@@ -68,22 +81,28 @@ def linear_assignment(cost_matrix, thresh):
return matches, unmatched_a, unmatched_b
-def cython_bbox_ious(atlbrs, btlbrs):
- ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
- if ious.size == 0:
+def bbox_ious(atlbrs, btlbrs):
+ boxes = np.ascontiguousarray(atlbrs, dtype=np.float)
+ query_boxes = np.ascontiguousarray(btlbrs, dtype=np.float)
+ N = boxes.shape[0]
+ K = query_boxes.shape[0]
+ ious = np.zeros((N, K), dtype=boxes.dtype)
+ if N * K == 0:
return ious
- try:
- import cython_bbox
- except Exception as e:
- print('cython_bbox not found, please install cython_bbox.'
- 'for example: `pip install cython_bbox`.')
- raise e
-
- ious = cython_bbox.bbox_overlaps(
- np.ascontiguousarray(
- atlbrs, dtype=np.float),
- np.ascontiguousarray(
- btlbrs, dtype=np.float))
+
+ for k in range(K):
+ box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ (query_boxes[k, 3] - query_boxes[k, 1] + 1))
+ for n in range(N):
+ iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(
+ boxes[n, 0], query_boxes[k, 0]) + 1)
+ if iw > 0:
+ ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(
+ boxes[n, 1], query_boxes[k, 1]) + 1)
+ if ih > 0:
+ ua = float((boxes[n, 2] - boxes[n, 0] + 1) * (boxes[
+ n, 3] - boxes[n, 1] + 1) + box_area - iw * ih)
+ ious[n, k] = iw * ih / ua
return ious
@@ -98,7 +117,7 @@ def iou_distance(atracks, btracks):
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
- _ious = cython_bbox_ious(atlbrs, btlbrs)
+ _ious = bbox_ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
diff --git a/paddlers/models/ppdet/modeling/mot/matching/ocsort_matching.py b/paddlers/models/ppdet/modeling/mot/matching/ocsort_matching.py
new file mode 100644
index 0000000..a32d761
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/mot/matching/ocsort_matching.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/noahcao/OC_SORT/blob/master/trackers/ocsort_tracker/association.py
+"""
+
+import os
+import numpy as np
+
+
+def iou_batch(bboxes1, bboxes2):
+ bboxes2 = np.expand_dims(bboxes2, 0)
+ bboxes1 = np.expand_dims(bboxes1, 1)
+
+ xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+ yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+ xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+ yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+ w = np.maximum(0., xx2 - xx1)
+ h = np.maximum(0., yy2 - yy1)
+ area = w * h
+ iou_matrix = area / ((bboxes1[..., 2] - bboxes1[..., 0]) *
+ (bboxes1[..., 3] - bboxes1[..., 1]) +
+ (bboxes2[..., 2] - bboxes2[..., 0]) *
+ (bboxes2[..., 3] - bboxes2[..., 1]) - area)
+ return iou_matrix
+
+
+def speed_direction_batch(dets, tracks):
+ tracks = tracks[..., np.newaxis]
+ CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0
+ CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (
+ tracks[:, 1] + tracks[:, 3]) / 2.0
+ dx = CX1 - CX2
+ dy = CY1 - CY2
+ norm = np.sqrt(dx**2 + dy**2) + 1e-6
+ dx = dx / norm
+ dy = dy / norm
+ return dy, dx
+
+
+def linear_assignment(cost_matrix):
+ try:
+ import lap
+ _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
+ return np.array([[y[i], i] for i in x if i >= 0])
+ except ImportError:
+ from scipy.optimize import linear_sum_assignment
+ x, y = linear_sum_assignment(cost_matrix)
+ return np.array(list(zip(x, y)))
+
+
+def associate(detections, trackers, iou_threshold, velocities, previous_obs,
+ vdc_weight):
+ if (len(trackers) == 0):
+ return np.empty(
+ (0, 2), dtype=int), np.arange(len(detections)), np.empty(
+ (0, 5), dtype=int)
+
+ Y, X = speed_direction_batch(detections, previous_obs)
+ inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
+ inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+ inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+ diff_angle_cos = inertia_X * X + inertia_Y * Y
+ diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+ diff_angle = np.arccos(diff_angle_cos)
+ diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+
+ valid_mask = np.ones(previous_obs.shape[0])
+ valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+
+ iou_matrix = iou_batch(detections, trackers)
+ scores = np.repeat(
+ detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
+ # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this
+ valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+
+ angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+ angle_diff_cost = angle_diff_cost.T
+ angle_diff_cost = angle_diff_cost * scores
+
+ if min(iou_matrix.shape) > 0:
+ a = (iou_matrix > iou_threshold).astype(np.int32)
+ if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+ matched_indices = np.stack(np.where(a), axis=1)
+ else:
+ matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost))
+ else:
+ matched_indices = np.empty(shape=(0, 2))
+
+ unmatched_detections = []
+ for d, det in enumerate(detections):
+ if (d not in matched_indices[:, 0]):
+ unmatched_detections.append(d)
+ unmatched_trackers = []
+ for t, trk in enumerate(trackers):
+ if (t not in matched_indices[:, 1]):
+ unmatched_trackers.append(t)
+
+ # filter out matched with low IOU
+ matches = []
+ for m in matched_indices:
+ if (iou_matrix[m[0], m[1]] < iou_threshold):
+ unmatched_detections.append(m[0])
+ unmatched_trackers.append(m[1])
+ else:
+ matches.append(m.reshape(1, 2))
+ if (len(matches) == 0):
+ matches = np.empty((0, 2), dtype=int)
+ else:
+ matches = np.concatenate(matches, axis=0)
+
+ return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
diff --git a/paddlers/models/ppdet/modeling/mot/motion/kalman_filter.py b/paddlers/models/ppdet/modeling/mot/motion/kalman_filter.py
index 6de6198..6714a00 100644
--- a/paddlers/models/ppdet/modeling/mot/motion/kalman_filter.py
+++ b/paddlers/models/ppdet/modeling/mot/motion/kalman_filter.py
@@ -83,7 +83,7 @@ class KalmanFilter(object):
Returns:
The mean vector (8 dimensional) and covariance matrix (8x8
- dimensional) of the new track. Unobserved velocities are
+ dimensional) of the new track. Unobserved velocities are
initialized to 0 mean.
"""
mean_pos = measurement
@@ -112,7 +112,7 @@ class KalmanFilter(object):
object state at the previous time step.
Returns:
- The mean vector and covariance matrix of the predicted state.
+ The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
@@ -157,7 +157,7 @@ class KalmanFilter(object):
def multi_predict(self, mean, covariance):
"""
Run Kalman filter prediction step (Vectorized version).
-
+
Args:
mean (ndarray): The Nx8 dimensional mean matrix of the object states
at the previous time step.
@@ -231,7 +231,7 @@ class KalmanFilter(object):
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
-
+
Args:
mean (ndarray): Mean vector over the state distribution (8
dimensional).
@@ -240,7 +240,7 @@ class KalmanFilter(object):
measurements (ndarray): An Nx4 dimensional matrix of N measurements,
each in format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
- only_position (Optional[bool]): If True, distance computation is
+ only_position (Optional[bool]): If True, distance computation is
done with respect to the bounding box center position only.
metric (str): Metric type, 'gaussian' or 'maha'.
diff --git a/paddlers/models/ppdet/modeling/mot/tracker/__init__.py b/paddlers/models/ppdet/modeling/mot/tracker/__init__.py
index f97fe45..8019cac 100644
--- a/paddlers/models/ppdet/modeling/mot/tracker/__init__.py
+++ b/paddlers/models/ppdet/modeling/mot/tracker/__init__.py
@@ -16,8 +16,10 @@ from . import base_jde_tracker
from . import base_sde_tracker
from . import jde_tracker
from . import deepsort_tracker
+from . import ocsort_tracker
from .base_jde_tracker import *
from .base_sde_tracker import *
from .jde_tracker import *
from .deepsort_tracker import *
+from .ocsort_tracker import *
diff --git a/paddlers/models/ppdet/modeling/mot/tracker/base_jde_tracker.py b/paddlers/models/ppdet/modeling/mot/tracker/base_jde_tracker.py
index 37b2501..6ee42a0 100644
--- a/paddlers/models/ppdet/modeling/mot/tracker/base_jde_tracker.py
+++ b/paddlers/models/ppdet/modeling/mot/tracker/base_jde_tracker.py
@@ -51,7 +51,7 @@ class BaseTrack(object):
history = OrderedDict()
features = []
- curr_feature = None
+ curr_feat = None
score = 0
start_frame = 0
frame_id = 0
diff --git a/paddlers/models/ppdet/modeling/mot/tracker/deepsort_tracker.py b/paddlers/models/ppdet/modeling/mot/tracker/deepsort_tracker.py
index 221890a..4000fe6 100644
--- a/paddlers/models/ppdet/modeling/mot/tracker/deepsort_tracker.py
+++ b/paddlers/models/ppdet/modeling/mot/tracker/deepsort_tracker.py
@@ -47,12 +47,12 @@ class DeepSORTTracker(object):
Removes the oldest samples when the budget is reached.
max_age (int): maximum number of missed misses before a track is deleted
n_init (float): Number of frames that a track remains in initialization
- phase. Number of consecutive detections before the track is confirmed.
- The track state is set to `Deleted` if a miss occurs within the first
+ phase. Number of consecutive detections before the track is confirmed.
+ The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
- metric_type (str): either "euclidean" or "cosine", the distance metric
+ metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
- matching_threshold (float): samples with larger distance are
+ matching_threshold (float): samples with larger distance are
considered an invalid match.
max_iou_distance (float): max iou distance threshold
motion (object): KalmanFilter instance
@@ -96,13 +96,16 @@ class DeepSORTTracker(object):
Perform measurement update and track management.
Args:
pred_dets (np.array): Detection results of the image, the shape is
- [N, 6], means 'x0, y0, x1, y1, score, cls_id'.
+ [N, 6], means 'cls_id, score, x0, y0, x1, y1'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128], usually pred_embs.shape[1] is a multiple of 128.
"""
- pred_tlwhs = pred_dets[:, :4]
- pred_scores = pred_dets[:, 4:5]
- pred_cls_ids = pred_dets[:, 5:]
+ pred_cls_ids = pred_dets[:, 0:1]
+ pred_scores = pred_dets[:, 1:2]
+ pred_xyxys = pred_dets[:, 2:6]
+ pred_tlwhs = np.concatenate(
+ (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
+ axis=1)
detections = [
Detection(tlwh, score, feat, cls_id)
diff --git a/paddlers/models/ppdet/modeling/mot/tracker/jde_tracker.py b/paddlers/models/ppdet/modeling/mot/tracker/jde_tracker.py
index 2d0d308..d2ac7fe 100644
--- a/paddlers/models/ppdet/modeling/mot/tracker/jde_tracker.py
+++ b/paddlers/models/ppdet/modeling/mot/tracker/jde_tracker.py
@@ -38,22 +38,30 @@ class JDETracker(object):
JDE tracker, support single class and multi classes
Args:
+ use_byte (bool): Whether use ByteTracker, default False
num_classes (int): the number of classes
det_thresh (float): threshold of detection score
track_buffer (int): buffer for tracker
min_box_area (int): min box area to filter out low quality boxes
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
- bad results. If set <0 means no need to filter bboxes,usually set
+ bad results. If set <= 0 means no need to filter bboxes,usually set
1.6 for pedestrian tracking.
- tracked_thresh (float): linear assignment threshold of tracked
+ tracked_thresh (float): linear assignment threshold of tracked
stracks and detections
- r_tracked_thresh (float): linear assignment threshold of
+ r_tracked_thresh (float): linear assignment threshold of
tracked stracks and unmatched detections
- unconfirmed_thresh (float): linear assignment threshold of
+ unconfirmed_thresh (float): linear assignment threshold of
unconfirmed stracks and unmatched detections
+ conf_thres (float): confidence threshold for tracking, also used in
+ ByteTracker as higher confidence threshold
+ match_thres (float): linear assignment threshold of tracked
+ stracks and detections in ByteTracker
+ low_conf_thres (float): lower confidence threshold for tracking in
+ ByteTracker
+ input_size (list): input feature map size to reid model, [h, w] format,
+ [64, 192] as default.
motion (str): motion model, KalmanFilter as default
- conf_thres (float): confidence threshold for tracking
- metric_type (str): either "euclidean" or "cosine", the distance metric
+ metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
"""
@@ -62,14 +70,15 @@ class JDETracker(object):
num_classes=1,
det_thresh=0.3,
track_buffer=30,
- min_box_area=200,
- vertical_ratio=1.6,
+ min_box_area=0,
+ vertical_ratio=0,
tracked_thresh=0.7,
r_tracked_thresh=0.5,
unconfirmed_thresh=0.7,
conf_thres=0,
match_thres=0.8,
low_conf_thres=0.2,
+ input_size=[64, 192],
motion='KalmanFilter',
metric_type='euclidean'):
self.use_byte = use_byte
@@ -86,6 +95,7 @@ class JDETracker(object):
self.match_thres = match_thres
self.low_conf_thres = low_conf_thres
+ self.input_size = input_size
if motion == 'KalmanFilter':
self.motion = KalmanFilter()
self.metric_type = metric_type
@@ -106,13 +116,13 @@ class JDETracker(object):
Args:
pred_dets (np.array): Detection results of the image, the shape is
- [N, 6], means 'x0, y0, x1, y1, score, cls_id'.
+ [N, 6], means 'cls_id, score, x0, y0, x1, y1'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128] or [N, 512].
Return:
output_stracks_dict (dict(list)): The list contains information
- regarding the online_tracklets for the recieved image tensor.
+ regarding the online_tracklets for the received image tensor.
"""
self.frame_id += 1
if self.frame_id == 1:
@@ -128,7 +138,7 @@ class JDETracker(object):
# unify single and multi classes detection and embedding results
for cls_id in range(self.num_classes):
- cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
+ cls_idx = (pred_dets[:, 0:1] == cls_id).squeeze(-1)
pred_dets_dict[cls_id] = pred_dets[cls_idx]
if pred_embs is not None:
pred_embs_dict[cls_id] = pred_embs[cls_idx]
@@ -139,14 +149,15 @@ class JDETracker(object):
""" Step 1: Get detections by class"""
pred_dets_cls = pred_dets_dict[cls_id]
pred_embs_cls = pred_embs_dict[cls_id]
- remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
+ remain_inds = (pred_dets_cls[:, 1:2] > self.conf_thres).squeeze(-1)
if remain_inds.sum() > 0:
pred_dets_cls = pred_dets_cls[remain_inds]
- if self.use_byte:
+ if pred_embs_cls is None:
+ # in original ByteTrack
detections = [
STrack(
- STrack.tlbr_to_tlwh(tlbrs[:4]),
- tlbrs[4],
+ STrack.tlbr_to_tlwh(tlbrs[2:6]),
+ tlbrs[1],
cls_id,
30,
temp_feat=None) for tlbrs in pred_dets_cls
@@ -155,7 +166,7 @@ class JDETracker(object):
pred_embs_cls = pred_embs_cls[remain_inds]
detections = [
STrack(
- STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id,
+ STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id,
30, temp_feat)
for (tlbrs, temp_feat
) in zip(pred_dets_cls, pred_embs_cls)
@@ -181,11 +192,12 @@ class JDETracker(object):
# Predict the current location with KalmanFilter
STrack.multi_predict(track_pool_dict[cls_id], self.motion)
- if self.use_byte:
+ if pred_embs_cls is None:
+ # in original ByteTrack
dists = matching.iou_distance(track_pool_dict[cls_id],
detections)
matches, u_track, u_detection = matching.linear_assignment(
- dists, thresh=self.match_thres) #
+ dists, thresh=self.match_thres) # not self.tracked_thresh
else:
dists = matching.embedding_distance(
track_pool_dict[cls_id],
@@ -213,22 +225,34 @@ class JDETracker(object):
# None of the steps below happen if there are no undetected tracks.
""" Step 3: Second association, with IOU"""
if self.use_byte:
- inds_low = pred_dets_dict[cls_id][:, 4:5] > self.low_conf_thres
- inds_high = pred_dets_dict[cls_id][:, 4:5] < self.conf_thres
+ inds_low = pred_dets_dict[cls_id][:, 1:2] > self.low_conf_thres
+ inds_high = pred_dets_dict[cls_id][:, 1:2] < self.conf_thres
inds_second = np.logical_and(inds_low, inds_high).squeeze(-1)
pred_dets_cls_second = pred_dets_dict[cls_id][inds_second]
# association the untrack to the low score detections
if len(pred_dets_cls_second) > 0:
- detections_second = [
- STrack(
- STrack.tlbr_to_tlwh(tlbrs[:4]),
- tlbrs[4],
- cls_id,
- 30,
- temp_feat=None)
- for tlbrs in pred_dets_cls_second[:, :5]
- ]
+ if pred_embs_dict[cls_id] is None:
+ # in original ByteTrack
+ detections_second = [
+ STrack(
+ STrack.tlbr_to_tlwh(tlbrs[2:6]),
+ tlbrs[1],
+ cls_id,
+ 30,
+ temp_feat=None)
+ for tlbrs in pred_dets_cls_second
+ ]
+ else:
+ pred_embs_cls_second = pred_embs_dict[cls_id][
+ inds_second]
+ detections_second = [
+ STrack(
+ STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1],
+ cls_id, 30, temp_feat)
+ for (tlbrs, temp_feat) in zip(pred_dets_cls_second,
+ pred_embs_cls_second)
+ ]
else:
detections_second = []
r_tracked_stracks = [
diff --git a/paddlers/models/ppdet/modeling/mot/tracker/ocsort_tracker.py b/paddlers/models/ppdet/modeling/mot/tracker/ocsort_tracker.py
new file mode 100644
index 0000000..b86eb5e
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/mot/tracker/ocsort_tracker.py
@@ -0,0 +1,369 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/noahcao/OC_SORT/blob/master/trackers/ocsort_tracker/ocsort.py
+"""
+
+import numpy as np
+try:
+ from filterpy.kalman import KalmanFilter
+except:
+ print(
+ 'Warning: Unable to use OC-SORT, please install filterpy, for example: `pip install filterpy`, see https://github.com/rlabbe/filterpy'
+ )
+ pass
+
+from ..matching.ocsort_matching import associate, linear_assignment, iou_batch
+from paddlers.models.ppdet.core.workspace import register, serializable
+
+
+def k_previous_obs(observations, cur_age, k):
+ if len(observations) == 0:
+ return [-1, -1, -1, -1, -1]
+ for i in range(k):
+ dt = k - i
+ if cur_age - dt in observations:
+ return observations[cur_age - dt]
+ max_age = max(observations.keys())
+ return observations[max_age]
+
+
+def convert_bbox_to_z(bbox):
+ """
+ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
+ [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
+ the aspect ratio
+ """
+ w = bbox[2] - bbox[0]
+ h = bbox[3] - bbox[1]
+ x = bbox[0] + w / 2.
+ y = bbox[1] + h / 2.
+ s = w * h # scale is just area
+ r = w / float(h + 1e-6)
+ return np.array([x, y, s, r]).reshape((4, 1))
+
+
+def convert_x_to_bbox(x, score=None):
+ """
+ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
+ [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
+ """
+ w = np.sqrt(x[2] * x[3])
+ h = x[2] / w
+ if (score == None):
+ return np.array(
+ [x[0] - w / 2., x[1] - h / 2., x[0] + w / 2.,
+ x[1] + h / 2.]).reshape((1, 4))
+ else:
+ score = np.array([score])
+ return np.array([
+ x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score
+ ]).reshape((1, 5))
+
+
+def speed_direction(bbox1, bbox2):
+ cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
+ cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
+ speed = np.array([cy2 - cy1, cx2 - cx1])
+ norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
+ return speed / norm
+
+
+class KalmanBoxTracker(object):
+ """
+ This class represents the internal state of individual tracked objects observed as bbox.
+
+ Args:
+ bbox (np.array): bbox in [x1,y1,x2,y2,score] format.
+ delta_t (int): delta_t of previous observation
+ """
+ count = 0
+
+ def __init__(self, bbox, delta_t=3):
+ try:
+ from filterpy.kalman import KalmanFilter
+ except Exception as e:
+ raise RuntimeError(
+ 'Unable to use OC-SORT, please install filterpy, for example: `pip install filterpy`, see https://github.com/rlabbe/filterpy'
+ )
+ self.kf = KalmanFilter(dim_x=7, dim_z=4)
+ self.kf.F = np.array([[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0],
+ [0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 0, 0, 0, 1]])
+ self.kf.H = np.array([[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]])
+ self.kf.R[2:, 2:] *= 10.
+ self.kf.P[4:, 4:] *= 1000.
+ # give high uncertainty to the unobservable initial velocities
+ self.kf.P *= 10.
+ self.kf.Q[-1, -1] *= 0.01
+ self.kf.Q[4:, 4:] *= 0.01
+
+ self.score = bbox[4]
+ self.kf.x[:4] = convert_bbox_to_z(bbox)
+ self.time_since_update = 0
+ self.id = KalmanBoxTracker.count
+ KalmanBoxTracker.count += 1
+ self.history = []
+ self.hits = 0
+ self.hit_streak = 0
+ self.age = 0
+ """
+ NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of
+ function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a
+ fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.
+ """
+ self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
+ self.observations = dict()
+ self.history_observations = []
+ self.velocity = None
+ self.delta_t = delta_t
+
+ def update(self, bbox):
+ """
+ Updates the state vector with observed bbox.
+ """
+ if bbox is not None:
+ if self.last_observation.sum() >= 0: # no previous observation
+ previous_box = None
+ for i in range(self.delta_t):
+ dt = self.delta_t - i
+ if self.age - dt in self.observations:
+ previous_box = self.observations[self.age - dt]
+ break
+ if previous_box is None:
+ previous_box = self.last_observation
+ """
+ Estimate the track speed direction with observations \Delta t steps away
+ """
+ self.velocity = speed_direction(previous_box, bbox)
+ """
+ Insert new observations. This is a ugly way to maintain both self.observations
+ and self.history_observations. Bear it for the moment.
+ """
+ self.last_observation = bbox
+ self.observations[self.age] = bbox
+ self.history_observations.append(bbox)
+
+ self.time_since_update = 0
+ self.history = []
+ self.hits += 1
+ self.hit_streak += 1
+ self.kf.update(convert_bbox_to_z(bbox))
+ else:
+ self.kf.update(bbox)
+
+ def predict(self):
+ """
+ Advances the state vector and returns the predicted bounding box estimate.
+ """
+ if ((self.kf.x[6] + self.kf.x[2]) <= 0):
+ self.kf.x[6] *= 0.0
+
+ self.kf.predict()
+ self.age += 1
+ if (self.time_since_update > 0):
+ self.hit_streak = 0
+ self.time_since_update += 1
+ self.history.append(convert_x_to_bbox(self.kf.x, score=self.score))
+ return self.history[-1]
+
+ def get_state(self):
+ return convert_x_to_bbox(self.kf.x, score=self.score)
+
+
+@register
+@serializable
+class OCSORTTracker(object):
+ """
+ OCSORT tracker, support single class
+
+ Args:
+ det_thresh (float): threshold of detection score
+ max_age (int): maximum number of missed misses before a track is deleted
+ min_hits (int): minimum hits for associate
+ iou_threshold (float): iou threshold for associate
+ delta_t (int): delta_t of previous observation
+ inertia (float): vdc_weight of angle_diff_cost for associate
+ vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
+ bad results. If set <= 0 means no need to filter bboxes,usually set
+ 1.6 for pedestrian tracking.
+ min_box_area (int): min box area to filter out low quality boxes
+ use_byte (bool): Whether use ByteTracker, default False
+ """
+
+ def __init__(self,
+ det_thresh=0.6,
+ max_age=30,
+ min_hits=3,
+ iou_threshold=0.3,
+ delta_t=3,
+ inertia=0.2,
+ vertical_ratio=-1,
+ min_box_area=0,
+ use_byte=False):
+ self.det_thresh = det_thresh
+ self.max_age = max_age
+ self.min_hits = min_hits
+ self.iou_threshold = iou_threshold
+ self.delta_t = delta_t
+ self.inertia = inertia
+ self.vertical_ratio = vertical_ratio
+ self.min_box_area = min_box_area
+ self.use_byte = use_byte
+
+ self.trackers = []
+ self.frame_count = 0
+ KalmanBoxTracker.count = 0
+
+ def update(self, pred_dets, pred_embs=None):
+ """
+ Args:
+ pred_dets (np.array): Detection results of the image, the shape is
+ [N, 6], means 'cls_id, score, x0, y0, x1, y1'.
+ pred_embs (np.array): Embedding results of the image, the shape is
+ [N, 128] or [N, 512], default as None.
+
+ Return:
+ tracking boxes (np.array): [M, 6], means 'x0, y0, x1, y1, score, id'.
+ """
+ if pred_dets is None:
+ return np.empty((0, 6))
+
+ self.frame_count += 1
+
+ bboxes = pred_dets[:, 2:]
+ scores = pred_dets[:, 1:2]
+ dets = np.concatenate((bboxes, scores), axis=1)
+ scores = scores.squeeze(-1)
+
+ inds_low = scores > 0.1
+ inds_high = scores < self.det_thresh
+ inds_second = np.logical_and(inds_low, inds_high)
+ # self.det_thresh > score > 0.1, for second matching
+ dets_second = dets[inds_second] # detections for second matching
+ remain_inds = scores > self.det_thresh
+ dets = dets[remain_inds]
+
+ # get predicted locations from existing trackers.
+ trks = np.zeros((len(self.trackers), 5))
+ to_del = []
+ ret = []
+ for t, trk in enumerate(trks):
+ pos = self.trackers[t].predict()[0]
+ trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
+ if np.any(np.isnan(pos)):
+ to_del.append(t)
+ trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
+ for t in reversed(to_del):
+ self.trackers.pop(t)
+
+ velocities = np.array([
+ trk.velocity if trk.velocity is not None else np.array((0, 0))
+ for trk in self.trackers
+ ])
+ last_boxes = np.array([trk.last_observation for trk in self.trackers])
+ k_observations = np.array([
+ k_previous_obs(trk.observations, trk.age, self.delta_t)
+ for trk in self.trackers
+ ])
+ """
+ First round of association
+ """
+ matched, unmatched_dets, unmatched_trks = associate(
+ dets, trks, self.iou_threshold, velocities, k_observations,
+ self.inertia)
+ for m in matched:
+ self.trackers[m[1]].update(dets[m[0], :])
+ """
+ Second round of associaton by OCR
+ """
+ # BYTE association
+ if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[
+ 0] > 0:
+ u_trks = trks[unmatched_trks]
+ iou_left = iou_batch(
+ dets_second,
+ u_trks) # iou between low score detections and unmatched tracks
+ iou_left = np.array(iou_left)
+ if iou_left.max() > self.iou_threshold:
+ """
+ NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
+ get a higher performance especially on MOT17/MOT20 datasets. But we keep it
+ uniform here for simplicity
+ """
+ matched_indices = linear_assignment(-iou_left)
+ to_remove_trk_indices = []
+ for m in matched_indices:
+ det_ind, trk_ind = m[0], unmatched_trks[m[1]]
+ if iou_left[m[0], m[1]] < self.iou_threshold:
+ continue
+ self.trackers[trk_ind].update(dets_second[det_ind, :])
+ to_remove_trk_indices.append(trk_ind)
+ unmatched_trks = np.setdiff1d(unmatched_trks,
+ np.array(to_remove_trk_indices))
+
+ if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
+ left_dets = dets[unmatched_dets]
+ left_trks = last_boxes[unmatched_trks]
+ iou_left = iou_batch(left_dets, left_trks)
+ iou_left = np.array(iou_left)
+ if iou_left.max() > self.iou_threshold:
+ """
+ NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
+ get a higher performance especially on MOT17/MOT20 datasets. But we keep it
+ uniform here for simplicity
+ """
+ rematched_indices = linear_assignment(-iou_left)
+ to_remove_det_indices = []
+ to_remove_trk_indices = []
+ for m in rematched_indices:
+ det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[
+ 1]]
+ if iou_left[m[0], m[1]] < self.iou_threshold:
+ continue
+ self.trackers[trk_ind].update(dets[det_ind, :])
+ to_remove_det_indices.append(det_ind)
+ to_remove_trk_indices.append(trk_ind)
+ unmatched_dets = np.setdiff1d(unmatched_dets,
+ np.array(to_remove_det_indices))
+ unmatched_trks = np.setdiff1d(unmatched_trks,
+ np.array(to_remove_trk_indices))
+
+ for m in unmatched_trks:
+ self.trackers[m].update(None)
+
+ # create and initialise new trackers for unmatched detections
+ for i in unmatched_dets:
+ trk = KalmanBoxTracker(dets[i, :], delta_t=self.delta_t)
+ self.trackers.append(trk)
+ i = len(self.trackers)
+ for trk in reversed(self.trackers):
+ if trk.last_observation.sum() < 0:
+ d = trk.get_state()[0]
+ else:
+ d = trk.last_observation # tlbr + score
+ if (trk.time_since_update < 1) and (
+ trk.hit_streak >= self.min_hits or
+ self.frame_count <= self.min_hits):
+ # +1 as MOT benchmark requires positive
+ ret.append(np.concatenate((d, [trk.id + 1])).reshape(1, -1))
+ i -= 1
+ # remove dead tracklet
+ if (trk.time_since_update > self.max_age):
+ self.trackers.pop(i)
+ if (len(ret) > 0):
+ return np.concatenate(ret)
+ return np.empty((0, 6))
diff --git a/paddlers/models/ppdet/modeling/mot/utils.py b/paddlers/models/ppdet/modeling/mot/utils.py
index a33fd0c..cf3069e 100644
--- a/paddlers/models/ppdet/modeling/mot/utils.py
+++ b/paddlers/models/ppdet/modeling/mot/utils.py
@@ -77,7 +77,7 @@ class Detection(object):
tlwh (Tensor): Bounding box in format `(top left x, top left y,
width, height)`.
score (Tensor): Bounding box confidence score.
- feature (Tensor): A feature vector that describes the object
+ feature (Tensor): A feature vector that describes the object
contained in this image.
cls_id (Tensor): Bounding box category id.
"""
@@ -205,8 +205,8 @@ def load_det_results(det_file, num_frames):
def scale_coords(coords, input_shape, im_shape, scale_factor):
# Note: ratio has only one value, scale_factor[0] == scale_factor[1]
- #
- # This function only used for JDE YOLOv3 or other detectors with
+ #
+ # This function only used for JDE YOLOv3 or other detectors with
# LetterBoxResize and JDEBBoxPostProcess, coords output from detector had
# not scaled back to the origin image.
diff --git a/paddlers/models/ppdet/modeling/necks/__init__.py b/paddlers/models/ppdet/modeling/necks/__init__.py
index 197ef56..529b5e2 100644
--- a/paddlers/models/ppdet/modeling/necks/__init__.py
+++ b/paddlers/models/ppdet/modeling/necks/__init__.py
@@ -19,6 +19,9 @@ from . import ttf_fpn
from . import centernet_fpn
from . import bifpn
from . import csp_pan
+from . import es_pan
+from . import lc_pan
+from . import custom_pan
from .fpn import *
from .yolo_fpn import *
@@ -28,3 +31,6 @@ from .centernet_fpn import *
from .blazeface_fpn import *
from .bifpn import *
from .csp_pan import *
+from .es_pan import *
+from .lc_pan import *
+from .custom_pan import *
diff --git a/paddlers/models/ppdet/modeling/necks/centernet_fpn.py b/paddlers/models/ppdet/modeling/necks/centernet_fpn.py
old mode 100644
new mode 100755
index d5a7322..81a3681
--- a/paddlers/models/ppdet/modeling/necks/centernet_fpn.py
+++ b/paddlers/models/ppdet/modeling/necks/centernet_fpn.py
@@ -164,11 +164,11 @@ class IDAUp(nn.Layer):
for i in range(start_level + 1, end_level):
upsample = getattr(self, 'up_' + str(i - start_level))
project = getattr(self, 'proj_' + str(i - start_level))
-
inputs[i] = project(inputs[i])
inputs[i] = upsample(inputs[i])
node = getattr(self, 'node_' + str(i - start_level))
inputs[i] = node(paddle.add(inputs[i], inputs[i - 1]))
+ return inputs
class DLAUp(nn.Layer):
@@ -197,8 +197,8 @@ class DLAUp(nn.Layer):
out = [inputs[-1]] # start with 32
for i in range(len(inputs) - self.start_level - 1):
ida = getattr(self, 'ida_{}'.format(i))
- ida(inputs, len(inputs) - i - 2, len(inputs))
- out.insert(0, inputs[-1])
+ outputs = ida(inputs, len(inputs) - i - 2, len(inputs))
+ out.insert(0, outputs[-1])
return out
@@ -259,7 +259,9 @@ class CenterNetDLAFPN(nn.Layer):
def forward(self, body_feats):
- dla_up_feats = self.dla_up(body_feats)
+ inputs = [body_feats[i] for i in range(len(body_feats))]
+
+ dla_up_feats = self.dla_up(inputs)
ida_up_feats = []
for i in range(self.last_level - self.first_level):
diff --git a/paddlers/models/ppdet/modeling/necks/csp_pan.py b/paddlers/models/ppdet/modeling/necks/csp_pan.py
index 0843462..2558b55 100644
--- a/paddlers/models/ppdet/modeling/necks/csp_pan.py
+++ b/paddlers/models/ppdet/modeling/necks/csp_pan.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
@@ -19,7 +19,6 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
-from paddle.regularizer import L2Decay
from paddlers.models.ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
@@ -36,8 +35,6 @@ class ConvBNLayer(nn.Layer):
act='leaky_relu'):
super(ConvBNLayer, self).__init__()
initializer = nn.initializer.KaimingUniform()
- self.act = act
- assert self.act in ['leaky_relu', "hard_swish"]
self.conv = nn.Conv2D(
in_channels=in_channel,
out_channels=out_channel,
@@ -48,13 +45,14 @@ class ConvBNLayer(nn.Layer):
weight_attr=ParamAttr(initializer=initializer),
bias_attr=False)
self.bn = nn.BatchNorm2D(out_channel)
+ if act == "hard_swish":
+ act = 'hardswish'
+ self.act = act
def forward(self, x):
x = self.bn(self.conv(x))
- if self.act == "leaky_relu":
- x = F.leaky_relu(x)
- elif self.act == "hard_swish":
- x = F.hardswish(x)
+ if self.act:
+ x = getattr(F, self.act)(x)
return x
@@ -75,10 +73,11 @@ class DPModule(nn.Layer):
out_channel=96,
kernel_size=3,
stride=1,
- act='leaky_relu'):
+ act='leaky_relu',
+ use_act_in_out=True):
super(DPModule, self).__init__()
initializer = nn.initializer.KaimingUniform()
- self.act = act
+ self.use_act_in_out = use_act_in_out
self.dwconv = nn.Conv2D(
in_channels=in_channel,
out_channels=out_channel,
@@ -98,17 +97,17 @@ class DPModule(nn.Layer):
weight_attr=ParamAttr(initializer=initializer),
bias_attr=False)
self.bn2 = nn.BatchNorm2D(out_channel)
-
- def act_func(self, x):
- if self.act == "leaky_relu":
- x = F.leaky_relu(x)
- elif self.act == "hard_swish":
- x = F.hardswish(x)
- return x
+ if act == "hard_swish":
+ act = 'hardswish'
+ self.act = act
def forward(self, x):
- x = self.act_func(self.bn1(self.dwconv(x)))
- x = self.act_func(self.bn2(self.pwconv(x)))
+ x = self.bn1(self.dwconv(x))
+ if self.act:
+ x = getattr(F, self.act)(x)
+ x = self.bn2(self.pwconv(x))
+ if self.use_act_in_out and self.act:
+ x = getattr(F, self.act)(x)
return x
diff --git a/paddlers/models/ppdet/modeling/necks/custom_pan.py b/paddlers/models/ppdet/modeling/necks/custom_pan.py
new file mode 100644
index 0000000..76388e9
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/necks/custom_pan.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddlers.models.ppdet.core.workspace import register, serializable
+from paddlers.models.ppdet.modeling.layers import DropBlock
+from paddlers.models.ppdet.modeling.ops import get_act_fn
+from ..backbones.cspresnet import ConvBNLayer, BasicBlock
+from ..shape_spec import ShapeSpec
+
+__all__ = ['CustomCSPPAN']
+
+
+class SPP(nn.Layer):
+ def __init__(self,
+ ch_in,
+ ch_out,
+ k,
+ pool_size,
+ act='swish',
+ data_format='NCHW'):
+ super(SPP, self).__init__()
+ self.pool = []
+ self.data_format = data_format
+ for i, size in enumerate(pool_size):
+ pool = self.add_sublayer(
+ 'pool{}'.format(i),
+ nn.MaxPool2D(
+ kernel_size=size,
+ stride=1,
+ padding=size // 2,
+ data_format=data_format,
+ ceil_mode=False))
+ self.pool.append(pool)
+ self.conv = ConvBNLayer(ch_in, ch_out, k, padding=k // 2, act=act)
+
+ def forward(self, x):
+ outs = [x]
+ for pool in self.pool:
+ outs.append(pool(x))
+ if self.data_format == 'NCHW':
+ y = paddle.concat(outs, axis=1)
+ else:
+ y = paddle.concat(outs, axis=-1)
+
+ y = self.conv(y)
+ return y
+
+
+class CSPStage(nn.Layer):
+ def __init__(self, block_fn, ch_in, ch_out, n, act='swish', spp=False):
+ super(CSPStage, self).__init__()
+
+ ch_mid = int(ch_out // 2)
+ self.conv1 = ConvBNLayer(ch_in, ch_mid, 1, act=act)
+ self.conv2 = ConvBNLayer(ch_in, ch_mid, 1, act=act)
+ self.convs = nn.Sequential()
+ next_ch_in = ch_mid
+ for i in range(n):
+ self.convs.add_sublayer(
+ str(i),
+ eval(block_fn)(next_ch_in, ch_mid, act=act, shortcut=False))
+ if i == (n - 1) // 2 and spp:
+ self.convs.add_sublayer(
+ 'spp', SPP(ch_mid * 4, ch_mid, 1, [5, 9, 13], act=act))
+ next_ch_in = ch_mid
+ self.conv3 = ConvBNLayer(ch_mid * 2, ch_out, 1, act=act)
+
+ def forward(self, x):
+ y1 = self.conv1(x)
+ y2 = self.conv2(x)
+ y2 = self.convs(y2)
+ y = paddle.concat([y1, y2], axis=1)
+ y = self.conv3(y)
+ return y
+
+
+@register
+@serializable
+class CustomCSPPAN(nn.Layer):
+ __shared__ = ['norm_type', 'data_format', 'width_mult', 'depth_mult', 'trt']
+
+ def __init__(self,
+ in_channels=[256, 512, 1024],
+ out_channels=[1024, 512, 256],
+ norm_type='bn',
+ act='leaky',
+ stage_fn='CSPStage',
+ block_fn='BasicBlock',
+ stage_num=1,
+ block_num=3,
+ drop_block=False,
+ block_size=3,
+ keep_prob=0.9,
+ spp=False,
+ data_format='NCHW',
+ width_mult=1.0,
+ depth_mult=1.0,
+ trt=False):
+
+ super(CustomCSPPAN, self).__init__()
+ out_channels = [max(round(c * width_mult), 1) for c in out_channels]
+ block_num = max(round(block_num * depth_mult), 1)
+ act = get_act_fn(
+ act, trt=trt) if act is None or isinstance(act,
+ (str, dict)) else act
+ self.num_blocks = len(in_channels)
+ self.data_format = data_format
+ self._out_channels = out_channels
+ in_channels = in_channels[::-1]
+ fpn_stages = []
+ fpn_routes = []
+ for i, (ch_in, ch_out) in enumerate(zip(in_channels, out_channels)):
+ if i > 0:
+ ch_in += ch_pre // 2
+
+ stage = nn.Sequential()
+ for j in range(stage_num):
+ stage.add_sublayer(
+ str(j),
+ eval(stage_fn)(block_fn,
+ ch_in if j == 0 else ch_out,
+ ch_out,
+ block_num,
+ act=act,
+ spp=(spp and i == 0)))
+
+ if drop_block:
+ stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
+
+ fpn_stages.append(stage)
+
+ if i < self.num_blocks - 1:
+ fpn_routes.append(
+ ConvBNLayer(
+ ch_in=ch_out,
+ ch_out=ch_out // 2,
+ filter_size=1,
+ stride=1,
+ padding=0,
+ act=act))
+
+ ch_pre = ch_out
+
+ self.fpn_stages = nn.LayerList(fpn_stages)
+ self.fpn_routes = nn.LayerList(fpn_routes)
+
+ pan_stages = []
+ pan_routes = []
+ for i in reversed(range(self.num_blocks - 1)):
+ pan_routes.append(
+ ConvBNLayer(
+ ch_in=out_channels[i + 1],
+ ch_out=out_channels[i + 1],
+ filter_size=3,
+ stride=2,
+ padding=1,
+ act=act))
+
+ ch_in = out_channels[i] + out_channels[i + 1]
+ ch_out = out_channels[i]
+ stage = nn.Sequential()
+ for j in range(stage_num):
+ stage.add_sublayer(
+ str(j),
+ eval(stage_fn)(block_fn,
+ ch_in if j == 0 else ch_out,
+ ch_out,
+ block_num,
+ act=act,
+ spp=False))
+ if drop_block:
+ stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
+
+ pan_stages.append(stage)
+
+ self.pan_stages = nn.LayerList(pan_stages[::-1])
+ self.pan_routes = nn.LayerList(pan_routes[::-1])
+
+ def forward(self, blocks, for_mot=False):
+ blocks = blocks[::-1]
+ fpn_feats = []
+
+ for i, block in enumerate(blocks):
+ if i > 0:
+ block = paddle.concat([route, block], axis=1)
+ route = self.fpn_stages[i](block)
+ fpn_feats.append(route)
+
+ if i < self.num_blocks - 1:
+ route = self.fpn_routes[i](route)
+ route = F.interpolate(
+ route, scale_factor=2., data_format=self.data_format)
+
+ pan_feats = [fpn_feats[-1], ]
+ route = fpn_feats[-1]
+ for i in reversed(range(self.num_blocks - 1)):
+ block = fpn_feats[i]
+ route = self.pan_routes[i](route)
+ block = paddle.concat([route, block], axis=1)
+ route = self.pan_stages[i](block)
+ pan_feats.append(route)
+
+ return pan_feats[::-1]
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
+
+ @property
+ def out_shape(self):
+ return [ShapeSpec(channels=c) for c in self._out_channels]
diff --git a/paddlers/models/ppdet/modeling/necks/es_pan.py b/paddlers/models/ppdet/modeling/necks/es_pan.py
new file mode 100644
index 0000000..1d7d31a
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/necks/es_pan.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddlers.models.ppdet.core.workspace import register, serializable
+
+from ..shape_spec import ShapeSpec
+from ..backbones.esnet import SEModule
+from .csp_pan import ConvBNLayer, Channel_T, DPModule
+
+__all__ = ['ESPAN']
+
+
+class ES_Block(nn.Layer):
+ def __init__(self,
+ in_channels,
+ mid_channels,
+ out_channels,
+ kernel_size=5,
+ stride=1,
+ act='leaky_relu'):
+ super(ES_Block, self).__init__()
+ self._residual = ConvBNLayer(
+ in_channel=in_channels,
+ out_channel=out_channels,
+ kernel_size=1,
+ stride=1,
+ groups=1,
+ act=act)
+ self._conv_pw = ConvBNLayer(
+ in_channel=in_channels,
+ out_channel=mid_channels // 2,
+ kernel_size=1,
+ stride=1,
+ groups=1,
+ act=act)
+ self._conv_dw = ConvBNLayer(
+ in_channel=mid_channels // 2,
+ out_channel=mid_channels // 2,
+ kernel_size=kernel_size,
+ stride=stride,
+ groups=mid_channels // 2,
+ act=None)
+ self._se = SEModule(mid_channels)
+
+ self._conv_linear = ConvBNLayer(
+ in_channel=mid_channels,
+ out_channel=out_channels,
+ kernel_size=1,
+ stride=1,
+ groups=1,
+ act=act)
+
+ self._out_conv = ConvBNLayer(
+ in_channel=out_channels * 2,
+ out_channel=out_channels,
+ kernel_size=1,
+ stride=1,
+ groups=1,
+ act=act)
+
+ def forward(self, inputs):
+ x1 = self._residual(inputs)
+ x2 = self._conv_pw(inputs)
+ x3 = self._conv_dw(x2)
+ x3 = paddle.concat([x2, x3], axis=1)
+ x3 = self._se(x3)
+ x3 = self._conv_linear(x3)
+ out = paddle.concat([x1, x3], axis=1)
+ out = self._out_conv(out)
+ return out
+
+
+@register
+@serializable
+class ESPAN(nn.Layer):
+ """Path Aggregation Network with ES module.
+
+ Args:
+ in_channels (List[int]): Number of input channels per scale.
+ out_channels (int): Number of output channels (used at each scale)
+ kernel_size (int): The conv2d kernel size of this Module.
+ num_features (int): Number of output features of CSPPAN module.
+ num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
+ use_depthwise (bool): Whether to depthwise separable convolution in
+ blocks. Default: True
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=5,
+ num_features=3,
+ use_depthwise=True,
+ act='hard_swish',
+ spatial_scales=[0.125, 0.0625, 0.03125]):
+ super(ESPAN, self).__init__()
+ self.conv_t = Channel_T(in_channels, out_channels, act=act)
+ in_channels = [out_channels] * len(spatial_scales)
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.spatial_scales = spatial_scales
+ self.num_features = num_features
+ conv_func = DPModule if use_depthwise else ConvBNLayer
+
+ if self.num_features == 4:
+ self.first_top_conv = conv_func(
+ in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
+ self.second_top_conv = conv_func(
+ in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
+ self.spatial_scales.append(self.spatial_scales[-1] / 2)
+
+ # build top-down blocks
+ self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+ self.top_down_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1, 0, -1):
+ self.top_down_blocks.append(
+ ES_Block(
+ in_channels[idx - 1] * 2,
+ in_channels[idx - 1],
+ in_channels[idx - 1],
+ kernel_size=kernel_size,
+ stride=1,
+ act=act))
+
+ # build bottom-up blocks
+ self.downsamples = nn.LayerList()
+ self.bottom_up_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1):
+ self.downsamples.append(
+ conv_func(
+ in_channels[idx],
+ in_channels[idx],
+ kernel_size=kernel_size,
+ stride=2,
+ act=act))
+ self.bottom_up_blocks.append(
+ ES_Block(
+ in_channels[idx] * 2,
+ in_channels[idx + 1],
+ in_channels[idx + 1],
+ kernel_size=kernel_size,
+ stride=1,
+ act=act))
+
+ def forward(self, inputs):
+ """
+ Args:
+ inputs (tuple[Tensor]): input features.
+
+ Returns:
+ tuple[Tensor]: CSPPAN features.
+ """
+ assert len(inputs) == len(self.in_channels)
+ inputs = self.conv_t(inputs)
+
+ # top-down path
+ inner_outs = [inputs[-1]]
+ for idx in range(len(self.in_channels) - 1, 0, -1):
+ feat_heigh = inner_outs[0]
+ feat_low = inputs[idx - 1]
+
+ upsample_feat = self.upsample(feat_heigh)
+
+ inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
+ paddle.concat([upsample_feat, feat_low], 1))
+ inner_outs.insert(0, inner_out)
+
+ # bottom-up path
+ outs = [inner_outs[0]]
+ for idx in range(len(self.in_channels) - 1):
+ feat_low = outs[-1]
+ feat_height = inner_outs[idx + 1]
+ downsample_feat = self.downsamples[idx](feat_low)
+ out = self.bottom_up_blocks[idx](paddle.concat(
+ [downsample_feat, feat_height], 1))
+ outs.append(out)
+
+ top_features = None
+ if self.num_features == 4:
+ top_features = self.first_top_conv(inputs[-1])
+ top_features = top_features + self.second_top_conv(outs[-1])
+ outs.append(top_features)
+
+ return tuple(outs)
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=self.out_channels, stride=1. / s)
+ for s in self.spatial_scales
+ ]
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
diff --git a/paddlers/models/ppdet/modeling/necks/fpn.py b/paddlers/models/ppdet/modeling/necks/fpn.py
index 6bf1f94..472101c 100644
--- a/paddlers/models/ppdet/modeling/necks/fpn.py
+++ b/paddlers/models/ppdet/modeling/necks/fpn.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn as nn
@@ -31,28 +31,28 @@ class FPN(nn.Layer):
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
- in_channels (list[int]): input channels of each level which can be
+ in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config
- out_channel (list[int]): output channel of each level
+ out_channel (int): output channel of each level
spatial_scales (list[float]): the spatial scales between input feature
- maps and original input image which can be derived from the output
+ maps and original input image which can be derived from the output
shape of backbone by from_config
has_extra_convs (bool): whether to add extra conv to the last level.
default False
extra_stage (int): the number of extra stages added to the last level.
default 1
- use_c5 (bool): Whether to use c5 as the input of extra stage,
+ use_c5 (bool): Whether to use c5 as the input of extra stage,
otherwise p5 is used. default True
- norm_type (string|None): The normalization type in FPN module. If
- norm_type is None, norm will not be used after conv and if
+ norm_type (string|None): The normalization type in FPN module. If
+ norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default None
norm_decay (float): weight decay for normalization layer weights.
default 0.
- freeze_norm (bool): whether to freeze normalization layer.
+ freeze_norm (bool): whether to freeze normalization layer.
default False
relu_before_extra_convs (bool): whether to add relu before extra convs.
default False
-
+
"""
def __init__(self,
diff --git a/paddlers/models/ppdet/modeling/necks/hrfpn.py b/paddlers/models/ppdet/modeling/necks/hrfpn.py
index 785c572..0a17ea1 100644
--- a/paddlers/models/ppdet/modeling/necks/hrfpn.py
+++ b/paddlers/models/ppdet/modeling/necks/hrfpn.py
@@ -37,7 +37,8 @@ class HRFPN(nn.Layer):
out_channel=256,
share_conv=False,
extra_stage=1,
- spatial_scales=[1. / 4, 1. / 8, 1. / 16, 1. / 32]):
+ spatial_scales=[1. / 4, 1. / 8, 1. / 16, 1. / 32],
+ use_bias=False):
super(HRFPN, self).__init__()
in_channel = sum(in_channels)
self.in_channel = in_channel
@@ -47,12 +48,14 @@ class HRFPN(nn.Layer):
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
self.spatial_scales = spatial_scales
self.num_out = len(self.spatial_scales)
+ self.use_bias = use_bias
+ bias_attr = False if use_bias is False else None
self.reduction = nn.Conv2D(
in_channels=in_channel,
out_channels=out_channel,
kernel_size=1,
- bias_attr=False)
+ bias_attr=bias_attr)
if share_conv:
self.fpn_conv = nn.Conv2D(
@@ -60,7 +63,7 @@ class HRFPN(nn.Layer):
out_channels=out_channel,
kernel_size=3,
padding=1,
- bias_attr=False)
+ bias_attr=bias_attr)
else:
self.fpn_conv = []
for i in range(self.num_out):
@@ -72,7 +75,7 @@ class HRFPN(nn.Layer):
out_channels=out_channel,
kernel_size=3,
padding=1,
- bias_attr=False))
+ bias_attr=bias_attr))
self.fpn_conv.append(conv)
def forward(self, body_feats):
diff --git a/paddlers/models/ppdet/modeling/necks/lc_pan.py b/paddlers/models/ppdet/modeling/necks/lc_pan.py
new file mode 100644
index 0000000..0faf32b
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/necks/lc_pan.py
@@ -0,0 +1,168 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddlers.models.ppdet.core.workspace import register, serializable
+
+from ..shape_spec import ShapeSpec
+from ..backbones.lcnet import DepthwiseSeparable
+from .csp_pan import ConvBNLayer, Channel_T, DPModule
+
+__all__ = ['LCPAN']
+
+
+@register
+@serializable
+class LCPAN(nn.Layer):
+ """Path Aggregation Network with LCNet module.
+ Args:
+ in_channels (List[int]): Number of input channels per scale.
+ out_channels (int): Number of output channels (used at each scale)
+ kernel_size (int): The conv2d kernel size of this Module.
+ num_features (int): Number of output features of CSPPAN module.
+ num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
+ use_depthwise (bool): Whether to depthwise separable convolution in
+ blocks. Default: True
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=5,
+ num_features=3,
+ use_depthwise=True,
+ act='hard_swish',
+ spatial_scales=[0.125, 0.0625, 0.03125]):
+ super(LCPAN, self).__init__()
+ self.conv_t = Channel_T(in_channels, out_channels, act=act)
+ in_channels = [out_channels] * len(spatial_scales)
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.spatial_scales = spatial_scales
+ self.num_features = num_features
+ conv_func = DPModule if use_depthwise else ConvBNLayer
+
+ NET_CONFIG = {
+ #k, in_c, out_c, stride, use_se
+ "block1": [
+ [kernel_size, out_channels * 2, out_channels * 2, 1, False],
+ [kernel_size, out_channels * 2, out_channels, 1, False],
+ ],
+ "block2": [
+ [kernel_size, out_channels * 2, out_channels * 2, 1, False],
+ [kernel_size, out_channels * 2, out_channels, 1, False],
+ ]
+ }
+
+ if self.num_features == 4:
+ self.first_top_conv = conv_func(
+ in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
+ self.second_top_conv = conv_func(
+ in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
+ self.spatial_scales.append(self.spatial_scales[-1] / 2)
+
+ # build top-down blocks
+ self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+ self.top_down_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1, 0, -1):
+ self.top_down_blocks.append(
+ nn.Sequential(*[
+ DepthwiseSeparable(
+ num_channels=in_c,
+ num_filters=out_c,
+ dw_size=k,
+ stride=s,
+ use_se=se)
+ for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG[
+ "block1"])
+ ]))
+
+ # build bottom-up blocks
+ self.downsamples = nn.LayerList()
+ self.bottom_up_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1):
+ self.downsamples.append(
+ conv_func(
+ in_channels[idx],
+ in_channels[idx],
+ kernel_size=kernel_size,
+ stride=2,
+ act=act))
+ self.bottom_up_blocks.append(
+ nn.Sequential(*[
+ DepthwiseSeparable(
+ num_channels=in_c,
+ num_filters=out_c,
+ dw_size=k,
+ stride=s,
+ use_se=se)
+ for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG[
+ "block2"])
+ ]))
+
+ def forward(self, inputs):
+ """
+ Args:
+ inputs (tuple[Tensor]): input features.
+ Returns:
+ tuple[Tensor]: CSPPAN features.
+ """
+ assert len(inputs) == len(self.in_channels)
+ inputs = self.conv_t(inputs)
+
+ # top-down path
+ inner_outs = [inputs[-1]]
+ for idx in range(len(self.in_channels) - 1, 0, -1):
+ feat_heigh = inner_outs[0]
+ feat_low = inputs[idx - 1]
+
+ upsample_feat = self.upsample(feat_heigh)
+
+ inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
+ paddle.concat([upsample_feat, feat_low], 1))
+ inner_outs.insert(0, inner_out)
+
+ # bottom-up path
+ outs = [inner_outs[0]]
+ for idx in range(len(self.in_channels) - 1):
+ feat_low = outs[-1]
+ feat_height = inner_outs[idx + 1]
+ downsample_feat = self.downsamples[idx](feat_low)
+ out = self.bottom_up_blocks[idx](paddle.concat(
+ [downsample_feat, feat_height], 1))
+ outs.append(out)
+
+ top_features = None
+ if self.num_features == 4:
+ top_features = self.first_top_conv(inputs[-1])
+ top_features = top_features + self.second_top_conv(outs[-1])
+ outs.append(top_features)
+
+ return tuple(outs)
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=self.out_channels, stride=1. / s)
+ for s in self.spatial_scales
+ ]
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
diff --git a/paddlers/models/ppdet/modeling/necks/ttf_fpn.py b/paddlers/models/ppdet/modeling/necks/ttf_fpn.py
index ae2f245..f25cfc1 100644
--- a/paddlers/models/ppdet/modeling/necks/ttf_fpn.py
+++ b/paddlers/models/ppdet/modeling/necks/ttf_fpn.py
@@ -166,9 +166,9 @@ class TTFFPN(nn.Layer):
shortcut_num (list): the number of convolution layers in each shortcut.
[3,2,1] by default, means DarkNet53 backbone return_idx_1 has 3 convs
in its shortcut, return_idx_2 has 2 convs and return_idx_3 has 1 conv.
- norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
+ norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
bn by default
- lite_neck (bool): whether to use lite conv in TTFNet FPN,
+ lite_neck (bool): whether to use lite conv in TTFNet FPN,
False by default
fusion_method (string): the method to fusion upsample and lateral layer.
'add' and 'concat' are optional, add by default
diff --git a/paddlers/models/ppdet/modeling/necks/yolo_fpn.py b/paddlers/models/ppdet/modeling/necks/yolo_fpn.py
index a859dee..bd667a2 100644
--- a/paddlers/models/ppdet/modeling/necks/yolo_fpn.py
+++ b/paddlers/models/ppdet/modeling/necks/yolo_fpn.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -17,10 +17,12 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.modeling.layers import DropBlock
+from paddlers.models.ppdet.modeling.ops import get_act_fn
from ..backbones.darknet import ConvBNLayer
from ..shape_spec import ShapeSpec
+from ..backbones.csp_darknet import BaseConv, DWConv, CSPLayer
-__all__ = ['YOLOv3FPN', 'PPYOLOFPN', 'PPYOLOTinyFPN', 'PPYOLOPAN']
+__all__ = ['YOLOv3FPN', 'PPYOLOFPN', 'PPYOLOTinyFPN', 'PPYOLOPAN', 'YOLOCSPPAN']
def add_coord(x, data_format):
@@ -114,7 +116,7 @@ class SPP(nn.Layer):
ch_out,
k,
pool_size,
- norm_type,
+ norm_type='bn',
freeze_norm=False,
name='',
act='leaky',
@@ -267,7 +269,7 @@ class PPYOLOTinyDetBlock(nn.Layer):
self.conv_module = nn.Sequential()
cfgs = [
- # name, in channels, out channels, filter_size,
+ # name, in channels, out channels, filter_size,
# stride, padding, groups
['.0', ch_in, ch_out, 1, 1, 0, 1],
['.1', ch_out, ch_out, 5, 1, 2, ch_out],
@@ -679,7 +681,7 @@ class PPYOLOTinyFPN(nn.Layer):
detection_block_channels (list): channels in fpn
norm_type (str): batch norm type, default bn
data_format (str): data format, NCHW or NHWC
- kwargs: extra key-value pairs, such as parameter of DropBlock and spp
+ kwargs: extra key-value pairs, such as parameter of DropBlock and spp
"""
super(PPYOLOTinyFPN, self).__init__()
assert len(in_channels) > 0, "in_channels length should > 0"
@@ -986,3 +988,112 @@ class PPYOLOPAN(nn.Layer):
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]
+
+
+@register
+@serializable
+class YOLOCSPPAN(nn.Layer):
+ """
+ YOLO CSP-PAN, used in YOLOv5 and YOLOX.
+ """
+ __shared__ = ['depth_mult', 'data_format', 'act', 'trt']
+
+ def __init__(self,
+ depth_mult=1.0,
+ in_channels=[256, 512, 1024],
+ depthwise=False,
+ data_format='NCHW',
+ act='silu',
+ trt=False):
+ super(YOLOCSPPAN, self).__init__()
+ self.in_channels = in_channels
+ self._out_channels = in_channels
+ Conv = DWConv if depthwise else BaseConv
+
+ self.data_format = data_format
+ act = get_act_fn(
+ act, trt=trt) if act is None or isinstance(act,
+ (str, dict)) else act
+ self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
+
+ # top-down fpn
+ self.lateral_convs = nn.LayerList()
+ self.fpn_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1, 0, -1):
+ self.lateral_convs.append(
+ BaseConv(
+ int(in_channels[idx]),
+ int(in_channels[idx - 1]),
+ 1,
+ 1,
+ act=act))
+ self.fpn_blocks.append(
+ CSPLayer(
+ int(in_channels[idx - 1] * 2),
+ int(in_channels[idx - 1]),
+ round(3 * depth_mult),
+ shortcut=False,
+ depthwise=depthwise,
+ act=act))
+
+ # bottom-up pan
+ self.downsample_convs = nn.LayerList()
+ self.pan_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1):
+ self.downsample_convs.append(
+ Conv(
+ int(in_channels[idx]),
+ int(in_channels[idx]),
+ 3,
+ stride=2,
+ act=act))
+ self.pan_blocks.append(
+ CSPLayer(
+ int(in_channels[idx] * 2),
+ int(in_channels[idx + 1]),
+ round(3 * depth_mult),
+ shortcut=False,
+ depthwise=depthwise,
+ act=act))
+
+ def forward(self, feats, for_mot=False):
+ assert len(feats) == len(self.in_channels)
+
+ # top-down fpn
+ inner_outs = [feats[-1]]
+ for idx in range(len(self.in_channels) - 1, 0, -1):
+ feat_heigh = inner_outs[0]
+ feat_low = feats[idx - 1]
+ feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](
+ feat_heigh)
+ inner_outs[0] = feat_heigh
+
+ upsample_feat = F.interpolate(
+ feat_heigh,
+ scale_factor=2.,
+ mode="nearest",
+ data_format=self.data_format)
+ inner_out = self.fpn_blocks[len(self.in_channels) - 1 - idx](
+ paddle.concat(
+ [upsample_feat, feat_low], axis=1))
+ inner_outs.insert(0, inner_out)
+
+ # bottom-up pan
+ outs = [inner_outs[0]]
+ for idx in range(len(self.in_channels) - 1):
+ feat_low = outs[-1]
+ feat_height = inner_outs[idx + 1]
+ downsample_feat = self.downsample_convs[idx](feat_low)
+ out = self.pan_blocks[idx](paddle.concat(
+ [downsample_feat, feat_height], axis=1))
+ outs.append(out)
+
+ return outs
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_channels': [i.channels for i in input_shape], }
+
+ @property
+ def out_shape(self):
+ return [ShapeSpec(channels=c) for c in self._out_channels]
diff --git a/paddlers/models/ppdet/modeling/ops.py b/paddlers/models/ppdet/modeling/ops.py
index 005a131..26a3171 100644
--- a/paddlers/models/ppdet/modeling/ops.py
+++ b/paddlers/models/ppdet/modeling/ops.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -17,31 +17,72 @@ import paddle.nn.functional as F
import paddle.nn as nn
from paddle import ParamAttr
from paddle.regularizer import L2Decay
+try:
+ import paddle._legacy_C_ops as C_ops
+except:
+ import paddle._C_ops as C_ops
-from paddle.fluid.framework import Variable, in_dygraph_mode
-from paddle.fluid import core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
+from paddle import in_dynamic_mode
+from paddle.common_ops_import import Variable, LayerHelper, check_variable_and_dtype, check_type, check_dtype
__all__ = [
- 'roi_pool',
- 'roi_align',
'prior_box',
'generate_proposals',
- 'iou_similarity',
'box_coder',
- 'yolo_box',
'multiclass_nms',
'distribute_fpn_proposals',
- 'collect_fpn_proposals',
'matrix_nms',
'batch_norm',
'mish',
+ 'silu',
+ 'swish',
+ 'identity',
]
+def identity(x):
+ return x
+
+
def mish(x):
- return x * paddle.tanh(F.softplus(x))
+ return F.mish(x) if hasattr(F, mish) else x * F.tanh(F.softplus(x))
+
+
+def silu(x):
+ return F.silu(x)
+
+
+def swish(x):
+ return x * F.sigmoid(x)
+
+
+TRT_ACT_SPEC = {'swish': swish, 'silu': swish}
+
+ACT_SPEC = {'mish': mish, 'silu': silu}
+
+
+def get_act_fn(act=None, trt=False):
+ assert act is None or isinstance(act, (
+ str, dict)), 'name of activation should be str, dict or None'
+ if not act:
+ return identity
+
+ if isinstance(act, dict):
+ name = act['name']
+ act.pop('name')
+ kwargs = act
+ else:
+ name = act
+ kwargs = dict()
+
+ if trt and name in TRT_ACT_SPEC:
+ fn = TRT_ACT_SPEC[name]
+ elif name in ACT_SPEC:
+ fn = ACT_SPEC[name]
+ else:
+ fn = getattr(F, name)
+
+ return lambda x: fn(x, **kwargs)
def batch_norm(ch,
@@ -77,390 +118,6 @@ def batch_norm(ch,
return norm_layer
-@paddle.jit.not_to_static
-def roi_pool(input,
- rois,
- output_size,
- spatial_scale=1.0,
- rois_num=None,
- name=None):
- """
-
- This operator implements the roi_pooling layer.
- Region of interest pooling (also known as RoI pooling) is to perform max pooling on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7).
-
- The operator has three steps:
-
- 1. Dividing each region proposal into equal-sized sections with output_size(h, w);
- 2. Finding the largest value in each section;
- 3. Copying these max values to the output buffer.
-
- For more information, please refer to https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
-
- Args:
- input (Tensor): Input feature, 4D-Tensor with the shape of [N,C,H,W],
- where N is the batch size, C is the input channel, H is Height, W is weight.
- The data type is float32 or float64.
- rois (Tensor): ROIs (Regions of Interest) to pool over.
- 2D-Tensor or 2D-LoDTensor with the shape of [num_rois,4], the lod level is 1.
- Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates,
- and (x2, y2) is the bottom right coordinates.
- output_size (int or tuple[int, int]): The pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size.
- spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0
- rois_num (Tensor): The number of RoIs in each image. Default: None
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
-
-
- Returns:
- Tensor: The pooled feature, 4D-Tensor with the shape of [num_rois, C, output_size[0], output_size[1]].
-
-
- Examples:
-
- .. code-block:: python
-
- import paddle
- from paddlers.models.ppdet.modeling import ops
- paddle.enable_static()
-
- x = paddle.static.data(
- name='data', shape=[None, 256, 32, 32], dtype='float32')
- rois = paddle.static.data(
- name='rois', shape=[None, 4], dtype='float32')
- rois_num = paddle.static.data(name='rois_num', shape=[None], dtype='int32')
-
- pool_out = ops.roi_pool(
- input=x,
- rois=rois,
- output_size=(1, 1),
- spatial_scale=1.0,
- rois_num=rois_num)
- """
- check_type(output_size, 'output_size', (int, tuple), 'roi_pool')
- if isinstance(output_size, int):
- output_size = (output_size, output_size)
-
- pooled_height, pooled_width = output_size
- if in_dygraph_mode():
- assert rois_num is not None, "rois_num should not be None in dygraph mode."
- pool_out, argmaxes = core.ops.roi_pool(
- input, rois, rois_num, "pooled_height", pooled_height,
- "pooled_width", pooled_width, "spatial_scale", spatial_scale)
- return pool_out, argmaxes
-
- else:
- check_variable_and_dtype(input, 'input', ['float32'], 'roi_pool')
- check_variable_and_dtype(rois, 'rois', ['float32'], 'roi_pool')
- helper = LayerHelper('roi_pool', **locals())
- dtype = helper.input_dtype()
- pool_out = helper.create_variable_for_type_inference(dtype)
- argmaxes = helper.create_variable_for_type_inference(dtype='int32')
-
- inputs = {
- "X": input,
- "ROIs": rois,
- }
- if rois_num is not None:
- inputs['RoisNum'] = rois_num
- helper.append_op(
- type="roi_pool",
- inputs=inputs,
- outputs={"Out": pool_out,
- "Argmax": argmaxes},
- attrs={
- "pooled_height": pooled_height,
- "pooled_width": pooled_width,
- "spatial_scale": spatial_scale
- })
- return pool_out, argmaxes
-
-
-@paddle.jit.not_to_static
-def roi_align(input,
- rois,
- output_size,
- spatial_scale=1.0,
- sampling_ratio=-1,
- rois_num=None,
- aligned=True,
- name=None):
- """
-
- Region of interest align (also known as RoI align) is to perform
- bilinear interpolation on inputs of nonuniform sizes to obtain
- fixed-size feature maps (e.g. 7*7)
-
- Dividing each region proposal into equal-sized sections with
- the pooled_width and pooled_height. Location remains the origin
- result.
-
- In each ROI bin, the value of the four regularly sampled locations
- are computed directly through bilinear interpolation. The output is
- the mean of four locations.
- Thus avoid the misaligned problem.
-
- Args:
- input (Tensor): Input feature, 4D-Tensor with the shape of [N,C,H,W],
- where N is the batch size, C is the input channel, H is Height, W is weight.
- The data type is float32 or float64.
- rois (Tensor): ROIs (Regions of Interest) to pool over.It should be
- a 2-D Tensor or 2-D LoDTensor of shape (num_rois, 4), the lod level is 1.
- The data type is float32 or float64. Given as [[x1, y1, x2, y2], ...],
- (x1, y1) is the top left coordinates, and (x2, y2) is the bottom right coordinates.
- output_size (int or tuple[int, int]): The pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size.
- spatial_scale (float32, optional): Multiplicative spatial scale factor to translate ROI coords
- from their input scale to the scale used when pooling. Default: 1.0
- sampling_ratio(int32, optional): number of sampling points in the interpolation grid.
- If <=0, then grid points are adaptive to roi_width and pooled_w, likewise for height. Default: -1
- rois_num (Tensor): The number of RoIs in each image. Default: None
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
-
- Returns:
- Tensor:
-
- Output: The output of ROIAlignOp is a 4-D tensor with shape (num_rois, channels, pooled_h, pooled_w). The data type is float32 or float64.
-
-
- Examples:
- .. code-block:: python
-
- import paddle
- from paddlers.models.ppdet.modeling import ops
- paddle.enable_static()
-
- x = paddle.static.data(
- name='data', shape=[None, 256, 32, 32], dtype='float32')
- rois = paddle.static.data(
- name='rois', shape=[None, 4], dtype='float32')
- rois_num = paddle.static.data(name='rois_num', shape=[None], dtype='int32')
- align_out = ops.roi_align(input=x,
- rois=rois,
- ouput_size=(7, 7),
- spatial_scale=0.5,
- sampling_ratio=-1,
- rois_num=rois_num)
- """
- check_type(output_size, 'output_size', (int, tuple), 'roi_align')
- if isinstance(output_size, int):
- output_size = (output_size, output_size)
-
- pooled_height, pooled_width = output_size
-
- if in_dygraph_mode():
- assert rois_num is not None, "rois_num should not be None in dygraph mode."
- align_out = core.ops.roi_align(
- input, rois, rois_num, "pooled_height", pooled_height,
- "pooled_width", pooled_width, "spatial_scale", spatial_scale,
- "sampling_ratio", sampling_ratio, "aligned", aligned)
- return align_out
-
- else:
- check_variable_and_dtype(input, 'input', ['float32', 'float64'],
- 'roi_align')
- check_variable_and_dtype(rois, 'rois', ['float32', 'float64'],
- 'roi_align')
- helper = LayerHelper('roi_align', **locals())
- dtype = helper.input_dtype()
- align_out = helper.create_variable_for_type_inference(dtype)
- inputs = {
- "X": input,
- "ROIs": rois,
- }
- if rois_num is not None:
- inputs['RoisNum'] = rois_num
- helper.append_op(
- type="roi_align",
- inputs=inputs,
- outputs={"Out": align_out},
- attrs={
- "pooled_height": pooled_height,
- "pooled_width": pooled_width,
- "spatial_scale": spatial_scale,
- "sampling_ratio": sampling_ratio,
- "aligned": aligned,
- })
- return align_out
-
-
-@paddle.jit.not_to_static
-def iou_similarity(x, y, box_normalized=True, name=None):
- """
- Computes intersection-over-union (IOU) between two box lists.
- Box list 'X' should be a LoDTensor and 'Y' is a common Tensor,
- boxes in 'Y' are shared by all instance of the batched inputs of X.
- Given two boxes A and B, the calculation of IOU is as follows:
-
- $$
- IOU(A, B) =
- \\frac{area(A\\cap B)}{area(A)+area(B)-area(A\\cap B)}
- $$
-
- Args:
- x (Tensor): Box list X is a 2-D Tensor with shape [N, 4] holds N
- boxes, each box is represented as [xmin, ymin, xmax, ymax],
- the shape of X is [N, 4]. [xmin, ymin] is the left top
- coordinate of the box if the input is image feature map, they
- are close to the origin of the coordinate system.
- [xmax, ymax] is the right bottom coordinate of the box.
- The data type is float32 or float64.
- y (Tensor): Box list Y holds M boxes, each box is represented as
- [xmin, ymin, xmax, ymax], the shape of X is [N, 4].
- [xmin, ymin] is the left top coordinate of the box if the
- input is image feature map, and [xmax, ymax] is the right
- bottom coordinate of the box. The data type is float32 or float64.
- box_normalized(bool): Whether treat the priorbox as a normalized box.
- Set true by default.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
-
- Returns:
- Tensor: The output of iou_similarity op, a tensor with shape [N, M]
- representing pairwise iou scores. The data type is same with x.
-
- Examples:
- .. code-block:: python
-
- import paddle
- from paddlers.models.ppdet.modeling import ops
- paddle.enable_static()
-
- x = paddle.static.data(name='x', shape=[None, 4], dtype='float32')
- y = paddle.static.data(name='y', shape=[None, 4], dtype='float32')
- iou = ops.iou_similarity(x=x, y=y)
- """
-
- if in_dygraph_mode():
- out = core.ops.iou_similarity(x, y, 'box_normalized', box_normalized)
- return out
- else:
- helper = LayerHelper("iou_similarity", **locals())
- out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
- helper.append_op(
- type="iou_similarity",
- inputs={"X": x,
- "Y": y},
- attrs={"box_normalized": box_normalized},
- outputs={"Out": out})
- return out
-
-
-@paddle.jit.not_to_static
-def collect_fpn_proposals(multi_rois,
- multi_scores,
- min_level,
- max_level,
- post_nms_top_n,
- rois_num_per_level=None,
- name=None):
- """
-
- **This OP only supports LoDTensor as input**. Concat multi-level RoIs
- (Region of Interest) and select N RoIs with respect to multi_scores.
- This operation performs the following steps:
-
- 1. Choose num_level RoIs and scores as input: num_level = max_level - min_level
- 2. Concat multi-level RoIs and scores
- 3. Sort scores and select post_nms_top_n scores
- 4. Gather RoIs by selected indices from scores
- 5. Re-sort RoIs by corresponding batch_id
-
- Args:
- multi_rois(list): List of RoIs to collect. Element in list is 2-D
- LoDTensor with shape [N, 4] and data type is float32 or float64,
- N is the number of RoIs.
- multi_scores(list): List of scores of RoIs to collect. Element in list
- is 2-D LoDTensor with shape [N, 1] and data type is float32 or
- float64, N is the number of RoIs.
- min_level(int): The lowest level of FPN layer to collect
- max_level(int): The highest level of FPN layer to collect
- post_nms_top_n(int): The number of selected RoIs
- rois_num_per_level(list, optional): The List of RoIs' numbers.
- Each element is 1-D Tensor which contains the RoIs' number of each
- image on each level and the shape is [B] and data type is
- int32, B is the number of images. If it is not None then return
- a 1-D Tensor contains the output RoIs' number of each image and
- the shape is [B]. Default: None
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
-
- Returns:
- Variable:
-
- fpn_rois(Variable): 2-D LoDTensor with shape [N, 4] and data type is
- float32 or float64. Selected RoIs.
-
- rois_num(Tensor): 1-D Tensor contains the RoIs's number of each
- image. The shape is [B] and data type is int32. B is the number of
- images.
-
- Examples:
- .. code-block:: python
-
- import paddle
- from paddlers.models.ppdet.modeling import ops
- paddle.enable_static()
- multi_rois = []
- multi_scores = []
- for i in range(4):
- multi_rois.append(paddle.static.data(
- name='roi_'+str(i), shape=[None, 4], dtype='float32', lod_level=1))
- for i in range(4):
- multi_scores.append(paddle.static.data(
- name='score_'+str(i), shape=[None, 1], dtype='float32', lod_level=1))
-
- fpn_rois = ops.collect_fpn_proposals(
- multi_rois=multi_rois,
- multi_scores=multi_scores,
- min_level=2,
- max_level=5,
- post_nms_top_n=2000)
- """
- check_type(multi_rois, 'multi_rois', list, 'collect_fpn_proposals')
- check_type(multi_scores, 'multi_scores', list, 'collect_fpn_proposals')
- num_lvl = max_level - min_level + 1
- input_rois = multi_rois[:num_lvl]
- input_scores = multi_scores[:num_lvl]
-
- if in_dygraph_mode():
- assert rois_num_per_level is not None, "rois_num_per_level should not be None in dygraph mode."
- attrs = ('post_nms_topN', post_nms_top_n)
- output_rois, rois_num = core.ops.collect_fpn_proposals(
- input_rois, input_scores, rois_num_per_level, *attrs)
- return output_rois, rois_num
-
- else:
- helper = LayerHelper('collect_fpn_proposals', **locals())
- dtype = helper.input_dtype('multi_rois')
- check_dtype(dtype, 'multi_rois', ['float32', 'float64'],
- 'collect_fpn_proposals')
- output_rois = helper.create_variable_for_type_inference(dtype)
- output_rois.stop_gradient = True
-
- inputs = {
- 'MultiLevelRois': input_rois,
- 'MultiLevelScores': input_scores,
- }
- outputs = {'FpnRois': output_rois}
- if rois_num_per_level is not None:
- inputs['MultiLevelRoIsNum'] = rois_num_per_level
- rois_num = helper.create_variable_for_type_inference(dtype='int32')
- rois_num.stop_gradient = True
- outputs['RoisNum'] = rois_num
- helper.append_op(
- type='collect_fpn_proposals',
- inputs=inputs,
- outputs=outputs,
- attrs={'post_nms_topN': post_nms_top_n})
- return output_rois, rois_num
-
-
@paddle.jit.not_to_static
def distribute_fpn_proposals(fpn_rois,
min_level,
@@ -471,14 +128,14 @@ def distribute_fpn_proposals(fpn_rois,
rois_num=None,
name=None):
r"""
-
- **This op only takes LoDTensor as input.** In Feature Pyramid Networks
- (FPN) models, it is needed to distribute all proposals into different FPN
- level, with respect to scale of the proposals, the referring scale and the
- referring level. Besides, to restore the order of proposals, we return an
- array which indicates the original index of rois in current proposals.
+
+ **This op only takes LoDTensor as input.** In Feature Pyramid Networks
+ (FPN) models, it is needed to distribute all proposals into different FPN
+ level, with respect to scale of the proposals, the referring scale and the
+ referring level. Besides, to restore the order of proposals, we return an
+ array which indicates the original index of rois in current proposals.
To compute FPN level for each roi, the formula is given as follows:
-
+
.. math::
roi\_scale &= \sqrt{BBoxArea(fpn\_roi)}
@@ -489,36 +146,36 @@ def distribute_fpn_proposals(fpn_rois,
Args:
- fpn_rois(Variable): 2-D Tensor with shape [N, 4] and data type is
+ fpn_rois(Variable): 2-D Tensor with shape [N, 4] and data type is
float32 or float64. The input fpn_rois.
- min_level(int32): The lowest level of FPN layer where the proposals come
+ min_level(int32): The lowest level of FPN layer where the proposals come
from.
max_level(int32): The highest level of FPN layer where the proposals
come from.
refer_level(int32): The referring level of FPN layer with specified scale.
refer_scale(int32): The referring scale of FPN layer with specified level.
- rois_num(Tensor): 1-D Tensor contains the number of RoIs in each image.
+ rois_num(Tensor): 1-D Tensor contains the number of RoIs in each image.
The shape is [B] and data type is int32. B is the number of images.
- If it is not None then return a list of 1-D Tensor. Each element
+ If it is not None then return a list of 1-D Tensor. Each element
is the output RoIs' number of each image on the corresponding level
and the shape is [B]. None by default.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
+ name(str, optional): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
Returns:
Tuple:
- multi_rois(List) : A list of 2-D LoDTensor with shape [M, 4]
- and data type of float32 and float64. The length is
+ multi_rois(List) : A list of 2-D LoDTensor with shape [M, 4]
+ and data type of float32 and float64. The length is
max_level-min_level+1. The proposals in each FPN level.
- restore_ind(Variable): A 2-D Tensor with shape [N, 1], N is
+ restore_ind(Variable): A 2-D Tensor with shape [N, 1], N is
the number of total rois. The data type is int32. It is
used to restore the order of fpn_rois.
- rois_num_per_level(List): A list of 1-D Tensor and each Tensor is
- the RoIs' number in each image on the corresponding level. The shape
+ rois_num_per_level(List): A list of 1-D Tensor and each Tensor is
+ the RoIs' number in each image on the corresponding level. The shape
is [B] and data type of int32. B is the number of images
@@ -539,13 +196,14 @@ def distribute_fpn_proposals(fpn_rois,
"""
num_lvl = max_level - min_level + 1
- if in_dygraph_mode():
+ if in_dynamic_mode():
assert rois_num is not None, "rois_num should not be None in dygraph mode."
attrs = ('min_level', min_level, 'max_level', max_level, 'refer_level',
refer_level, 'refer_scale', refer_scale, 'pixel_offset',
pixel_offset)
- multi_rois, restore_ind, rois_num_per_level = core.ops.distribute_fpn_proposals(
+ multi_rois, restore_ind, rois_num_per_level = C_ops.distribute_fpn_proposals(
fpn_rois, rois_num, num_lvl, num_lvl, *attrs)
+
return multi_rois, restore_ind, rois_num_per_level
else:
@@ -573,6 +231,8 @@ def distribute_fpn_proposals(fpn_rois,
for i in range(num_lvl)
]
outputs['MultiLevelRoIsNum'] = rois_num_per_level
+ else:
+ rois_num_per_level = None
helper.append_op(
type='distribute_fpn_proposals',
@@ -588,143 +248,6 @@ def distribute_fpn_proposals(fpn_rois,
return multi_rois, restore_ind, rois_num_per_level
-@paddle.jit.not_to_static
-def yolo_box(
- x,
- origin_shape,
- anchors,
- class_num,
- conf_thresh,
- downsample_ratio,
- clip_bbox=True,
- scale_x_y=1.,
- name=None, ):
- """
-
- This operator generates YOLO detection boxes from output of YOLOv3 network.
-
- The output of previous network is in shape [N, C, H, W], while H and W
- should be the same, H and W specify the grid size, each grid point predict
- given number boxes, this given number, which following will be represented as S,
- is specified by the number of anchors. In the second dimension(the channel
- dimension), C should be equal to S * (5 + class_num), class_num is the object
- category number of source dataset(such as 80 in coco dataset), so the
- second(channel) dimension, apart from 4 box location coordinates x, y, w, h,
- also includes confidence score of the box and class one-hot key of each anchor
- box.
- Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box
- predictions should be as follows:
- $$
- b_x = \\sigma(t_x) + c_x
- $$
- $$
- b_y = \\sigma(t_y) + c_y
- $$
- $$
- b_w = p_w e^{t_w}
- $$
- $$
- b_h = p_h e^{t_h}
- $$
- in the equation above, :math:`c_x, c_y` is the left top corner of current grid
- and :math:`p_w, p_h` is specified by anchors.
- The logistic regression value of the 5th channel of each anchor prediction boxes
- represents the confidence score of each prediction box, and the logistic
- regression value of the last :attr:`class_num` channels of each anchor prediction
- boxes represents the classifcation scores. Boxes with confidence scores less than
- :attr:`conf_thresh` should be ignored, and box final scores is the product of
- confidence scores and classification scores.
- $$
- score_{pred} = score_{conf} * score_{class}
- $$
-
- Args:
- x (Tensor): The input tensor of YoloBox operator is a 4-D tensor with shape of [N, C, H, W].
- The second dimension(C) stores box locations, confidence score and
- classification one-hot keys of each anchor box. Generally, X should be the output of YOLOv3 network.
- The data type is float32 or float64.
- origin_shape (Tensor): The image size tensor of YoloBox operator, This is a 2-D tensor with shape of [N, 2].
- This tensor holds height and width of each input image used for resizing output box in input image
- scale. The data type is int32.
- anchors (list|tuple): The anchor width and height, it will be parsed pair by pair.
- class_num (int): The number of classes to predict.
- conf_thresh (float): The confidence scores threshold of detection boxes. Boxes with confidence scores
- under threshold should be ignored.
- downsample_ratio (int): The downsample ratio from network input to YoloBox operator input,
- so 32, 16, 8 should be set for the first, second, and thrid YoloBox operators.
- clip_bbox (bool): Whether clip output bonding box in Input(ImgSize) boundary. Default true.
- scale_x_y (float): Scale the center point of decoded bounding box. Default 1.0.
- name (string): The default value is None. Normally there is no need
- for user to set this property. For more information,
- please refer to :ref:`api_guide_Name`
-
- Returns:
- boxes Tensor: A 3-D tensor with shape [N, M, 4], the coordinates of boxes, N is the batch num,
- M is output box number, and the 3rd dimension stores [xmin, ymin, xmax, ymax] coordinates of boxes.
- scores Tensor: A 3-D tensor with shape [N, M, :attr:`class_num`], the coordinates of boxes, N is the batch num,
- M is output box number.
-
- Raises:
- TypeError: Attr anchors of yolo box must be list or tuple
- TypeError: Attr class_num of yolo box must be an integer
- TypeError: Attr conf_thresh of yolo box must be a float number
-
- Examples:
-
- .. code-block:: python
-
- import paddle
- from paddlers.models.ppdet.modeling import ops
-
- paddle.enable_static()
- x = paddle.static.data(name='x', shape=[None, 255, 13, 13], dtype='float32')
- img_size = paddle.static.data(name='img_size',shape=[None, 2],dtype='int64')
- anchors = [10, 13, 16, 30, 33, 23]
- boxes,scores = ops.yolo_box(x=x, img_size=img_size, class_num=80, anchors=anchors,
- conf_thresh=0.01, downsample_ratio=32)
- """
- helper = LayerHelper('yolo_box', **locals())
-
- if not isinstance(anchors, list) and not isinstance(anchors, tuple):
- raise TypeError("Attr anchors of yolo_box must be list or tuple")
- if not isinstance(class_num, int):
- raise TypeError("Attr class_num of yolo_box must be an integer")
- if not isinstance(conf_thresh, float):
- raise TypeError("Attr ignore_thresh of yolo_box must be a float number")
-
- if in_dygraph_mode():
- attrs = ('anchors', anchors, 'class_num', class_num, 'conf_thresh',
- conf_thresh, 'downsample_ratio', downsample_ratio, 'clip_bbox',
- clip_bbox, 'scale_x_y', scale_x_y)
- boxes, scores = core.ops.yolo_box(x, origin_shape, *attrs)
- return boxes, scores
- else:
- boxes = helper.create_variable_for_type_inference(dtype=x.dtype)
- scores = helper.create_variable_for_type_inference(dtype=x.dtype)
-
- attrs = {
- "anchors": anchors,
- "class_num": class_num,
- "conf_thresh": conf_thresh,
- "downsample_ratio": downsample_ratio,
- "clip_bbox": clip_bbox,
- "scale_x_y": scale_x_y,
- }
-
- helper.append_op(
- type='yolo_box',
- inputs={
- "X": x,
- "ImgSize": origin_shape,
- },
- outputs={
- 'Boxes': boxes,
- 'Scores': scores,
- },
- attrs=attrs)
- return boxes, scores
-
-
@paddle.jit.not_to_static
def prior_box(input,
image,
@@ -769,7 +292,7 @@ def prior_box(input,
Caffe. Please note, this order affects the weights order of
convolution layer followed by and does not affect the final
detection results. Default: False.
- name(str, optional): The default value is None. Normally there is no need for
+ name(str, optional): The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name`
Returns:
@@ -827,14 +350,14 @@ def prior_box(input,
max_sizes = [max_sizes]
cur_max_sizes = max_sizes
- if in_dygraph_mode():
+ if in_dynamic_mode():
attrs = ('min_sizes', min_sizes, 'aspect_ratios', aspect_ratios,
'variances', variance, 'flip', flip, 'clip', clip, 'step_w',
steps[0], 'step_h', steps[1], 'offset', offset,
'min_max_aspect_ratios_order', min_max_aspect_ratios_order)
if cur_max_sizes is not None:
attrs += ('max_sizes', cur_max_sizes)
- box, var = core.ops.prior_box(input, image, *attrs)
+ box, var = C_ops.prior_box(input, image, *attrs)
return box, var
else:
attrs = {
@@ -929,9 +452,9 @@ def multiclass_nms(bboxes,
step. -1 means keeping all bboxes after NMS step.
normalized (bool): Whether detections are normalized. Default: True
return_index(bool): Whether return selected index. Default: False
- rois_num(Tensor): 1-D Tensor contains the number of RoIs in each image.
+ rois_num(Tensor): 1-D Tensor contains the number of RoIs in each image.
The shape is [B] and data type is int32. B is the number of images.
- If it is not None then return a list of 1-D Tensor. Each element
+ If it is not None then return a list of 1-D Tensor. Each element
is the output RoIs' number of each image on the corresponding level
and the shape is [B]. None by default.
name(str): Name of the multiclass nms op. Default: None.
@@ -972,13 +495,13 @@ def multiclass_nms(bboxes,
"""
helper = LayerHelper('multiclass_nms3', **locals())
- if in_dygraph_mode():
+ if in_dynamic_mode():
attrs = ('background_label', background_label, 'score_threshold',
score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold',
nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta,
'normalized', normalized)
- output, index, nms_rois_num = core.ops.multiclass_nms3(bboxes, scores,
- rois_num, *attrs)
+ output, index, nms_rois_num = C_ops.multiclass_nms3(bboxes, scores,
+ rois_num, *attrs)
if not return_index:
index = None
return output, nms_rois_num, index
@@ -1086,7 +609,7 @@ def matrix_nms(bboxes,
from {0} to {1})
Index (Tensor): A 2-D Tensor with shape [No, 1] containing the
selected indices, which are absolute values cross batches.
- rois_num (Tensor): A 1-D Tensor with shape [N] containing
+ rois_num (Tensor): A 1-D Tensor with shape [N] containing
the number of detected boxes in each image.
Examples:
.. code-block:: python
@@ -1113,13 +636,13 @@ def matrix_nms(bboxes,
check_type(gaussian_sigma, 'gaussian_sigma', float, 'matrix_nms')
check_type(background_label, 'background_label', int, 'matrix_nms')
- if in_dygraph_mode():
+ if in_dynamic_mode():
attrs = ('background_label', background_label, 'score_threshold',
score_threshold, 'post_threshold', post_threshold, 'nms_top_k',
nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian',
use_gaussian, 'keep_top_k', keep_top_k, 'normalized',
normalized)
- out, index, rois_num = core.ops.matrix_nms(bboxes, scores, *attrs)
+ out, index, rois_num = C_ops.matrix_nms(bboxes, scores, *attrs)
if not return_index:
index = None
if not return_rois_num:
@@ -1158,111 +681,6 @@ def matrix_nms(bboxes,
return output, rois_num, index
-def bipartite_match(dist_matrix,
- match_type=None,
- dist_threshold=None,
- name=None):
- """
-
- This operator implements a greedy bipartite matching algorithm, which is
- used to obtain the matching with the maximum distance based on the input
- distance matrix. For input 2D matrix, the bipartite matching algorithm can
- find the matched column for each row (matched means the largest distance),
- also can find the matched row for each column. And this operator only
- calculate matched indices from column to row. For each instance,
- the number of matched indices is the column number of the input distance
- matrix. **The OP only supports CPU**.
-
- There are two outputs, matched indices and distance.
- A simple description, this algorithm matched the best (maximum distance)
- row entity to the column entity and the matched indices are not duplicated
- in each row of ColToRowMatchIndices. If the column entity is not matched
- any row entity, set -1 in ColToRowMatchIndices.
-
- NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor.
- If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
- If Tensor, the height of ColToRowMatchIndices is 1.
-
- NOTE: This API is a very low level API. It is used by :code:`ssd_loss`
- layer. Please consider to use :code:`ssd_loss` instead.
-
- Args:
- dist_matrix(Tensor): This input is a 2-D LoDTensor with shape
- [K, M]. The data type is float32 or float64. It is pair-wise
- distance matrix between the entities represented by each row and
- each column. For example, assumed one entity is A with shape [K],
- another entity is B with shape [M]. The dist_matrix[i][j] is the
- distance between A[i] and B[j]. The bigger the distance is, the
- better matching the pairs are. NOTE: This tensor can contain LoD
- information to represent a batch of inputs. One instance of this
- batch can contain different numbers of entities.
- match_type(str, optional): The type of matching method, should be
- 'bipartite' or 'per_prediction'. None ('bipartite') by default.
- dist_threshold(float32, optional): If `match_type` is 'per_prediction',
- this threshold is to determine the extra matching bboxes based
- on the maximum distance, 0.5 by default.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
-
- Returns:
- Tuple:
-
- matched_indices(Tensor): A 2-D Tensor with shape [N, M]. The data
- type is int32. N is the batch size. If match_indices[i][j] is -1, it
- means B[j] does not match any entity in i-th instance.
- Otherwise, it means B[j] is matched to row
- match_indices[i][j] in i-th instance. The row number of
- i-th instance is saved in match_indices[i][j].
-
- matched_distance(Tensor): A 2-D Tensor with shape [N, M]. The data
- type is float32. N is batch size. If match_indices[i][j] is -1,
- match_distance[i][j] is also -1.0. Otherwise, assumed
- match_distance[i][j] = d, and the row offsets of each instance
- are called LoD. Then match_distance[i][j] =
- dist_matrix[d+LoD[i]][j].
-
- Examples:
-
- .. code-block:: python
- import paddle
- from paddlers.models.ppdet.modeling import ops
- from paddlers.models.ppdet.modeling.utils import iou_similarity
-
- paddle.enable_static()
-
- x = paddle.static.data(name='x', shape=[None, 4], dtype='float32')
- y = paddle.static.data(name='y', shape=[None, 4], dtype='float32')
- iou = iou_similarity(x=x, y=y)
- matched_indices, matched_dist = ops.bipartite_match(iou)
- """
- check_variable_and_dtype(dist_matrix, 'dist_matrix',
- ['float32', 'float64'], 'bipartite_match')
-
- if in_dygraph_mode():
- match_indices, match_distance = core.ops.bipartite_match(
- dist_matrix, "match_type", match_type, "dist_threshold",
- dist_threshold)
- return match_indices, match_distance
-
- helper = LayerHelper('bipartite_match', **locals())
- match_indices = helper.create_variable_for_type_inference(dtype='int32')
- match_distance = helper.create_variable_for_type_inference(
- dtype=dist_matrix.dtype)
- helper.append_op(
- type='bipartite_match',
- inputs={'DistMat': dist_matrix},
- attrs={
- 'match_type': match_type,
- 'dist_threshold': dist_threshold,
- },
- outputs={
- 'ColToRowMatchIndices': match_indices,
- 'ColToRowMatchDist': match_distance
- })
- return match_indices, match_distance
-
-
@paddle.jit.not_to_static
def box_coder(prior_box,
prior_box_var,
@@ -1274,74 +692,74 @@ def box_coder(prior_box,
r"""
**Box Coder Layer**
Encode/Decode the target bounding box with the priorbox information.
-
+
The Encoding schema described below:
.. math::
ox = (tx - px) / pw / pxv
oy = (ty - py) / ph / pyv
- ow = \log(\abs(tw / pw)) / pwv
- oh = \log(\abs(th / ph)) / phv
+ ow = \log(\abs(tw / pw)) / pwv
+ oh = \log(\abs(th / ph)) / phv
The Decoding schema described below:
-
+
.. math::
-
+
ox = (pw * pxv * tx * + px) - tw / 2
oy = (ph * pyv * ty * + py) - th / 2
ow = \exp(pwv * tw) * pw + tw / 2
- oh = \exp(phv * th) * ph + th / 2
- where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates,
- width and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote
- the priorbox's (anchor) center coordinates, width and height. `pxv`,
- `pyv`, `pwv`, `phv` denote the variance of the priorbox and `ox`, `oy`,
- `ow`, `oh` denote the encoded/decoded coordinates, width and height.
- During Box Decoding, two modes for broadcast are supported. Say target
- box has shape [N, M, 4], and the shape of prior box can be [N, 4] or
- [M, 4]. Then prior box will broadcast to target box along the
- assigned axis.
+ oh = \exp(phv * th) * ph + th / 2
+ where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates,
+ width and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote
+ the priorbox's (anchor) center coordinates, width and height. `pxv`,
+ `pyv`, `pwv`, `phv` denote the variance of the priorbox and `ox`, `oy`,
+ `ow`, `oh` denote the encoded/decoded coordinates, width and height.
+ During Box Decoding, two modes for broadcast are supported. Say target
+ box has shape [N, M, 4], and the shape of prior box can be [N, 4] or
+ [M, 4]. Then prior box will broadcast to target box along the
+ assigned axis.
Args:
- prior_box(Tensor): Box list prior_box is a 2-D Tensor with shape
+ prior_box(Tensor): Box list prior_box is a 2-D Tensor with shape
[M, 4] holds M boxes and data type is float32 or float64. Each box
- is represented as [xmin, ymin, xmax, ymax], [xmin, ymin] is the
+ is represented as [xmin, ymin, xmax, ymax], [xmin, ymin] is the
left top coordinate of the anchor box, if the input is image feature
- map, they are close to the origin of the coordinate system.
- [xmax, ymax] is the right bottom coordinate of the anchor box.
- prior_box_var(List|Tensor|None): prior_box_var supports three types
- of input. One is Tensor with shape [M, 4] which holds M group and
- data type is float32 or float64. The second is list consist of
- 4 elements shared by all boxes and data type is float32 or float64.
- Other is None and not involved in calculation.
- target_box(Tensor): This input can be a 2-D LoDTensor with shape
- [N, 4] when code_type is 'encode_center_size'. This input also can
- be a 3-D Tensor with shape [N, M, 4] when code_type is
- 'decode_center_size'. Each box is represented as
- [xmin, ymin, xmax, ymax]. The data type is float32 or float64.
+ map, they are close to the origin of the coordinate system.
+ [xmax, ymax] is the right bottom coordinate of the anchor box.
+ prior_box_var(List|Tensor|None): prior_box_var supports three types
+ of input. One is Tensor with shape [M, 4] which holds M group and
+ data type is float32 or float64. The second is list consist of
+ 4 elements shared by all boxes and data type is float32 or float64.
+ Other is None and not involved in calculation.
+ target_box(Tensor): This input can be a 2-D LoDTensor with shape
+ [N, 4] when code_type is 'encode_center_size'. This input also can
+ be a 3-D Tensor with shape [N, M, 4] when code_type is
+ 'decode_center_size'. Each box is represented as
+ [xmin, ymin, xmax, ymax]. The data type is float32 or float64.
code_type(str): The code type used with the target box. It can be
- `encode_center_size` or `decode_center_size`. `encode_center_size`
+ `encode_center_size` or `decode_center_size`. `encode_center_size`
by default.
box_normalized(bool): Whether treat the priorbox as a normalized box.
Set true by default.
- axis(int): Which axis in PriorBox to broadcast for box decode,
- for example, if axis is 0 and TargetBox has shape [N, M, 4] and
+ axis(int): Which axis in PriorBox to broadcast for box decode,
+ for example, if axis is 0 and TargetBox has shape [N, M, 4] and
PriorBox has shape [M, 4], then PriorBox will broadcast to [N, M, 4]
- for decoding. It is only valid when code type is
- `decode_center_size`. Set 0 by default.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
+ for decoding. It is only valid when code type is
+ `decode_center_size`. Set 0 by default.
+ name(str, optional): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
Returns:
Tensor:
- output_box(Tensor): When code_type is 'encode_center_size', the
- output tensor of box_coder_op with shape [N, M, 4] representing the
- result of N target boxes encoded with M Prior boxes and variances.
- When code_type is 'decode_center_size', N represents the batch size
+ output_box(Tensor): When code_type is 'encode_center_size', the
+ output tensor of box_coder_op with shape [N, M, 4] representing the
+ result of N target boxes encoded with M Prior boxes and variances.
+ When code_type is 'decode_center_size', N represents the batch size
and M represents the number of decoded boxes.
Examples:
-
+
.. code-block:: python
-
+
import paddle
from paddlers.models.ppdet.modeling import ops
paddle.enable_static()
@@ -1375,14 +793,14 @@ def box_coder(prior_box,
check_variable_and_dtype(target_box, 'target_box', ['float32', 'float64'],
'box_coder')
- if in_dygraph_mode():
+ if in_dynamic_mode():
if isinstance(prior_box_var, Variable):
- output_box = core.ops.box_coder(
+ output_box = C_ops.box_coder(
prior_box, prior_box_var, target_box, "code_type", code_type,
"box_normalized", box_normalized, "axis", axis)
elif isinstance(prior_box_var, list):
- output_box = core.ops.box_coder(
+ output_box = C_ops.box_coder(
prior_box, None, target_box, "code_type", code_type,
"box_normalized", box_normalized, "axis", axis, "variance",
prior_box_var)
@@ -1434,16 +852,16 @@ def generate_proposals(scores,
"""
**Generate proposal Faster-RCNN**
This operation proposes RoIs according to each box with their
- probability to be a foreground object and
+ probability to be a foreground object and
the box can be calculated by anchors. Bbox_deltais and scores
to be an object are the output of RPN. Final proposals
could be used to train detection net.
For generating proposals, this operation performs following steps:
1. Transposes and resizes scores and bbox_deltas in size of
(H*W*A, 1) and (H*W*A, 4)
- 2. Calculate box locations as proposals candidates.
+ 2. Calculate box locations as proposals candidates.
3. Clip boxes to image
- 4. Remove predicted boxes with small area.
+ 4. Remove predicted boxes with small area.
5. Apply NMS to get final proposals as output.
Args:
scores(Tensor): A 4-D Tensor with shape [N, A, H, W] represents
@@ -1454,7 +872,7 @@ def generate_proposals(scores,
represents the difference between predicted box location and
anchor location. The data type must be float32.
im_shape(Tensor): A 2-D Tensor with shape [N, 2] represents H, W, the
- origin image size or input size. The data type can be float32 or
+ origin image size or input size. The data type can be float32 or
float64.
anchors(Tensor): A 4-D Tensor represents the anchors with a layout
of [H, W, A, 4]. H and W are height and width of the feature map,
@@ -1472,13 +890,13 @@ def generate_proposals(scores,
width < min_size. The data type must be float32. `0.1` by default.
eta(float): Apply in adaptive NMS, if adaptive `threshold > 0.5`,
`adaptive_threshold = adaptive_threshold * eta` in each iteration.
- return_rois_num(bool): When setting True, it will return a 1D Tensor with shape [N, ] that includes Rois's
+ return_rois_num(bool): When setting True, it will return a 1D Tensor with shape [N, ] that includes Rois's
num of each image in one batch. The N is the image's num. For example, the tensor has values [4,5] that represents
- the first image has 4 Rois, the second image has 5 Rois. It only used in rcnn model.
- 'False' by default.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
+ the first image has 4 Rois, the second image has 5 Rois. It only used in rcnn model.
+ 'False' by default.
+ name(str, optional): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
Returns:
tuple:
@@ -1488,7 +906,7 @@ def generate_proposals(scores,
Examples:
.. code-block:: python
-
+
import paddle
from paddlers.models.ppdet.modeling import ops
paddle.enable_static()
@@ -1500,13 +918,15 @@ def generate_proposals(scores,
rois, roi_probs = ops.generate_proposals(scores, bbox_deltas,
im_shape, anchors, variances)
"""
- if in_dygraph_mode():
+ if in_dynamic_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta,
'pixel_offset', pixel_offset)
- rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals_v2(
+ rpn_rois, rpn_roi_probs, rpn_rois_num = C_ops.generate_proposals_v2(
scores, bbox_deltas, im_shape, anchors, variances, *attrs)
+ if not return_rois_num:
+ rpn_rois_num = None
return rpn_rois, rpn_roi_probs, rpn_rois_num
else:
@@ -1557,6 +977,8 @@ def generate_proposals(scores,
outputs=outputs)
rpn_rois.stop_gradient = True
rpn_roi_probs.stop_gradient = True
+ if not return_rois_num:
+ rpn_rois_num = None
return rpn_rois, rpn_roi_probs, rpn_rois_num
diff --git a/paddlers/models/ppdet/modeling/post_process.py b/paddlers/models/ppdet/modeling/post_process.py
index b9e556e..61f6b03 100644
--- a/paddlers/models/ppdet/modeling/post_process.py
+++ b/paddlers/models/ppdet/modeling/post_process.py
@@ -17,7 +17,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppdet.core.workspace import register
-from paddlers.models.ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly
+from paddlers.models.ppdet.modeling.bbox_utils import nonempty_bbox
from paddlers.models.ppdet.modeling.layers import TTFBox
from .transformers import bbox_cxcywh_to_xyxy
try:
@@ -27,23 +27,30 @@ except Exception:
__all__ = [
'BBoxPostProcess', 'MaskPostProcess', 'FCOSPostProcess',
- 'S2ANetBBoxPostProcess', 'JDEBBoxPostProcess', 'CenterNetPostProcess',
- 'DETRBBoxPostProcess', 'SparsePostProcess'
+ 'JDEBBoxPostProcess', 'CenterNetPostProcess', 'DETRBBoxPostProcess',
+ 'SparsePostProcess'
]
@register
-class BBoxPostProcess(nn.Layer):
- __shared__ = ['num_classes']
+class BBoxPostProcess(object):
+ __shared__ = ['num_classes', 'export_onnx', 'export_eb']
__inject__ = ['decode', 'nms']
- def __init__(self, num_classes=80, decode=None, nms=None):
+ def __init__(self,
+ num_classes=80,
+ decode=None,
+ nms=None,
+ export_onnx=False,
+ export_eb=False):
super(BBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.decode = decode
self.nms = nms
+ self.export_onnx = export_onnx
+ self.export_eb = export_eb
- def forward(self, head_out, rois, im_shape, scale_factor):
+ def __call__(self, head_out, rois, im_shape, scale_factor):
"""
Decode the bbox and do NMS if needed.
@@ -52,6 +59,7 @@ class BBoxPostProcess(nn.Layer):
rois (tuple): roi and rois_num of rpn_head output.
im_shape (Tensor): The shape of the input image.
scale_factor (Tensor): The scale factor of the input image.
+ export_onnx (bool): whether export model to onnx
Returns:
bbox_pred (Tensor): The output prediction with shape [N, 6], including
labels, scores and bboxes. The size of bboxes are corresponding
@@ -62,15 +70,26 @@ class BBoxPostProcess(nn.Layer):
if self.nms is not None:
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
bbox_pred, bbox_num, _ = self.nms(bboxes, score, self.num_classes)
+
else:
bbox_pred, bbox_num = self.decode(head_out, rois, im_shape,
scale_factor)
+
+ if self.export_onnx:
+ # add fake box after postprocess when exporting onnx
+ fake_bboxes = paddle.to_tensor(
+ np.array(
+ [[0., 0.0, 0.0, 0.0, 1.0, 1.0]], dtype='float32'))
+
+ bbox_pred = paddle.concat([bbox_pred, fake_bboxes])
+ bbox_num = bbox_num + 1
+
return bbox_pred, bbox_num
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
"""
- Rescale, clip and filter the bbox from the output of NMS to
- get final prediction.
+ Rescale, clip and filter the bbox from the output of NMS to
+ get final prediction.
Notes:
Currently only support bs = 1.
@@ -86,46 +105,59 @@ class BBoxPostProcess(nn.Layer):
pred_result (Tensor): The final prediction results with shape [N, 6]
including labels, scores and bboxes.
"""
-
- bboxes_list = []
- bbox_num_list = []
- id_start = 0
- fake_bboxes = paddle.to_tensor(
- np.array(
- [[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
- fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
-
- # add fake bbox when output is empty for each batch
- for i in range(bbox_num.shape[0]):
- if bbox_num[i] == 0:
- bboxes_i = fake_bboxes
- bbox_num_i = fake_bbox_num
- id_start += 1
- else:
- bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
- bbox_num_i = bbox_num[i]
- id_start += bbox_num[i]
- bboxes_list.append(bboxes_i)
- bbox_num_list.append(bbox_num_i)
- bboxes = paddle.concat(bboxes_list)
- bbox_num = paddle.concat(bbox_num_list)
+ if self.export_eb:
+ # enable rcnn models for edgeboard hw to skip the following postprocess.
+ return bboxes, bboxes, bbox_num
+
+ if not self.export_onnx:
+ bboxes_list = []
+ bbox_num_list = []
+ id_start = 0
+ fake_bboxes = paddle.to_tensor(
+ np.array(
+ [[0., 0.0, 0.0, 0.0, 1.0, 1.0]], dtype='float32'))
+ fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
+
+ # add fake bbox when output is empty for each batch
+ for i in range(bbox_num.shape[0]):
+ if bbox_num[i] == 0:
+ bboxes_i = fake_bboxes
+ bbox_num_i = fake_bbox_num
+ else:
+ bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
+ bbox_num_i = bbox_num[i]
+ id_start += bbox_num[i]
+ bboxes_list.append(bboxes_i)
+ bbox_num_list.append(bbox_num_i)
+ bboxes = paddle.concat(bboxes_list)
+ bbox_num = paddle.concat(bbox_num_list)
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
- origin_shape_list = []
- scale_factor_list = []
- # scale_factor: scale_y, scale_x
- for i in range(bbox_num.shape[0]):
- expand_shape = paddle.expand(origin_shape[i:i + 1, :],
- [bbox_num[i], 2])
- scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
- scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
- expand_scale = paddle.expand(scale, [bbox_num[i], 4])
- origin_shape_list.append(expand_shape)
- scale_factor_list.append(expand_scale)
-
- self.origin_shape_list = paddle.concat(origin_shape_list)
- scale_factor_list = paddle.concat(scale_factor_list)
+ if not self.export_onnx:
+ origin_shape_list = []
+ scale_factor_list = []
+ # scale_factor: scale_y, scale_x
+ for i in range(bbox_num.shape[0]):
+ expand_shape = paddle.expand(origin_shape[i:i + 1, :],
+ [bbox_num[i], 2])
+ scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
+ scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
+ expand_scale = paddle.expand(scale, [bbox_num[i], 4])
+ origin_shape_list.append(expand_shape)
+ scale_factor_list.append(expand_scale)
+
+ self.origin_shape_list = paddle.concat(origin_shape_list)
+ scale_factor_list = paddle.concat(scale_factor_list)
+
+ else:
+ # simplify the computation for bs=1 when exporting onnx
+ scale_y, scale_x = scale_factor[0][0], scale_factor[0][1]
+ scale = paddle.concat(
+ [scale_x, scale_y, scale_x, scale_y]).unsqueeze(0)
+ self.origin_shape_list = paddle.expand(origin_shape,
+ [bbox_num[0], 2])
+ scale_factor_list = paddle.expand(scale, [bbox_num[0], 4])
# bboxes: [N, 6], label, score, bbox
pred_label = bboxes[:, 0:1]
@@ -148,7 +180,7 @@ class BBoxPostProcess(nn.Layer):
pred_label = paddle.where(keep_mask, pred_label,
paddle.ones_like(pred_label) * -1)
pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1)
- return pred_result
+ return bboxes, pred_result, bbox_num
def get_origin_shape(self, ):
return self.origin_shape_list
@@ -156,6 +188,7 @@ class BBoxPostProcess(nn.Layer):
@register
class MaskPostProcess(object):
+ __shared__ = ['export_onnx', 'assign_on_cpu']
"""
refer to:
https://github.com/facebookresearch/detectron2/layers/mask_ops.py
@@ -163,27 +196,36 @@ class MaskPostProcess(object):
Get Mask output according to the output from model
"""
- def __init__(self, binary_thresh=0.5):
+ def __init__(self,
+ binary_thresh=0.5,
+ export_onnx=False,
+ assign_on_cpu=False):
super(MaskPostProcess, self).__init__()
self.binary_thresh = binary_thresh
+ self.export_onnx = export_onnx
+ self.assign_on_cpu = assign_on_cpu
def paste_mask(self, masks, boxes, im_h, im_w):
"""
Paste the mask prediction to the original image.
"""
-
+ x0_int, y0_int = 0, 0
+ x1_int, y1_int = im_w, im_h
x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1)
- masks = paddle.unsqueeze(masks, [0, 1])
- img_y = paddle.arange(0, im_h, dtype='float32') + 0.5
- img_x = paddle.arange(0, im_w, dtype='float32') + 0.5
+ N = masks.shape[0]
+ img_y = paddle.arange(y0_int, y1_int) + 0.5
+ img_x = paddle.arange(x0_int, x1_int) + 0.5
+
img_y = (img_y - y0) / (y1 - y0) * 2 - 1
img_x = (img_x - x0) / (x1 - x0) * 2 - 1
- img_x = paddle.unsqueeze(img_x, [1])
- img_y = paddle.unsqueeze(img_y, [2])
- N = boxes.shape[0]
-
- gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]])
- gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]])
+ # img_x, img_y have shapes (N, w), (N, h)
+
+ if self.assign_on_cpu:
+ paddle.set_device('cpu')
+ gx = img_x[:, None, :].expand(
+ [N, paddle.shape(img_y)[1], paddle.shape(img_x)[1]])
+ gy = img_y[:, :, None].expand(
+ [N, paddle.shape(img_y)[1], paddle.shape(img_x)[1]])
grid = paddle.stack([gx, gy], axis=3)
img_masks = F.grid_sample(masks, grid, align_corners=False)
return img_masks[:, 0]
@@ -206,22 +248,38 @@ class MaskPostProcess(object):
"""
num_mask = mask_out.shape[0]
origin_shape = paddle.cast(origin_shape, 'int32')
- # TODO: support bs > 1 and mask output dtype is bool
- pred_result = paddle.zeros(
- [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='int32')
- if (len(bbox_num) == 1 and bbox_num[0] == 1) and bboxes[0][0] == -1:
- return pred_result
-
- # TODO: optimize chunk paste
- pred_result = []
- for i in range(bboxes.shape[0]):
- im_h, im_w = origin_shape[i][0], origin_shape[i][1]
- pred_mask = self.paste_mask(mask_out[i], bboxes[i:i + 1, 2:], im_h,
- im_w)
- pred_mask = pred_mask >= self.binary_thresh
- pred_mask = paddle.cast(pred_mask, 'int32')
- pred_result.append(pred_mask)
- pred_result = paddle.concat(pred_result)
+ device = paddle.device.get_device()
+
+ if self.export_onnx:
+ h, w = origin_shape[0][0], origin_shape[0][1]
+ mask_onnx = self.paste_mask(mask_out[:, None, :, :], bboxes[:, 2:],
+ h, w)
+ mask_onnx = mask_onnx >= self.binary_thresh
+ pred_result = paddle.cast(mask_onnx, 'int32')
+
+ else:
+ max_h = paddle.max(origin_shape[:, 0])
+ max_w = paddle.max(origin_shape[:, 1])
+ pred_result = paddle.zeros(
+ [num_mask, max_h, max_w], dtype='int32') - 1
+
+ id_start = 0
+ for i in range(paddle.shape(bbox_num)[0]):
+ bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
+ mask_out_i = mask_out[id_start:id_start + bbox_num[i], :, :]
+ im_h = origin_shape[i, 0]
+ im_w = origin_shape[i, 1]
+ bbox_num_i = bbox_num[id_start]
+ pred_mask = self.paste_mask(mask_out_i[:, None, :, :],
+ bboxes_i[:, 2:], im_h, im_w)
+ pred_mask = paddle.cast(pred_mask >= self.binary_thresh,
+ 'int32')
+ pred_result[id_start:id_start + bbox_num[i], :im_h, :
+ im_w] = pred_mask
+ id_start += bbox_num[i]
+ if self.assign_on_cpu:
+ paddle.set_device(device)
+
return pred_result
@@ -245,109 +303,6 @@ class FCOSPostProcess(object):
return bbox_pred, bbox_num
-@register
-class S2ANetBBoxPostProcess(nn.Layer):
- __shared__ = ['num_classes']
- __inject__ = ['nms']
-
- def __init__(self, num_classes=15, nms_pre=2000, min_bbox_size=0, nms=None):
- super(S2ANetBBoxPostProcess, self).__init__()
- self.num_classes = num_classes
- self.nms_pre = paddle.to_tensor(nms_pre)
- self.min_bbox_size = min_bbox_size
- self.nms = nms
- self.origin_shape_list = []
- self.fake_pred_cls_score_bbox = paddle.to_tensor(
- np.array(
- [[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
- dtype='float32'))
- self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
-
- def forward(self, pred_scores, pred_bboxes):
- """
- pred_scores : [N, M] score
- pred_bboxes : [N, 5] xc, yc, w, h, a
- im_shape : [N, 2] im_shape
- scale_factor : [N, 2] scale_factor
- """
- pred_ploys0 = rbox2poly(pred_bboxes)
- pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
-
- # pred_scores [NA, 16] --> [16, NA]
- pred_scores0 = paddle.transpose(pred_scores, [1, 0])
- pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
-
- pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
- self.num_classes)
- # Prevent empty bbox_pred from decode or NMS.
- # Bboxes and score before NMS may be empty due to the score threshold.
- if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
- 1] <= 1:
- pred_cls_score_bbox = self.fake_pred_cls_score_bbox
- bbox_num = self.fake_bbox_num
-
- pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
- return pred_cls_score_bbox, bbox_num
-
- def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
- """
- Rescale, clip and filter the bbox from the output of NMS to
- get final prediction.
- Args:
- bboxes(Tensor): bboxes [N, 10]
- bbox_num(Tensor): bbox_num
- im_shape(Tensor): [1 2]
- scale_factor(Tensor): [1 2]
- Returns:
- bbox_pred(Tensor): The output is the prediction with shape [N, 8]
- including labels, scores and bboxes. The size of
- bboxes are corresponding to the original image.
- """
- origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
-
- origin_shape_list = []
- scale_factor_list = []
- # scale_factor: scale_y, scale_x
- for i in range(bbox_num.shape[0]):
- expand_shape = paddle.expand(origin_shape[i:i + 1, :],
- [bbox_num[i], 2])
- scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
- scale = paddle.concat([
- scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x,
- scale_y
- ])
- expand_scale = paddle.expand(scale, [bbox_num[i], 8])
- origin_shape_list.append(expand_shape)
- scale_factor_list.append(expand_scale)
-
- origin_shape_list = paddle.concat(origin_shape_list)
- scale_factor_list = paddle.concat(scale_factor_list)
-
- # bboxes: [N, 10], label, score, bbox
- pred_label_score = bboxes[:, 0:2]
- pred_bbox = bboxes[:, 2:]
-
- # rescale bbox to original image
- pred_bbox = pred_bbox.reshape([-1, 8])
- scaled_bbox = pred_bbox / scale_factor_list
- origin_h = origin_shape_list[:, 0]
- origin_w = origin_shape_list[:, 1]
-
- bboxes = scaled_bbox
- zeros = paddle.zeros_like(origin_h)
- x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros)
- y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros)
- x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros)
- y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros)
- x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros)
- y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros)
- x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros)
- y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros)
- pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1)
- pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1)
- return pred_result
-
-
@register
class JDEBBoxPostProcess(nn.Layer):
__shared__ = ['num_classes']
@@ -378,18 +333,18 @@ class JDEBBoxPostProcess(nn.Layer):
def forward(self, head_out, anchors):
"""
- Decode the bbox and do NMS for JDE model.
+ Decode the bbox and do NMS for JDE model.
Args:
head_out (list): Bbox_pred and cls_prob of bbox_head output.
anchors (list): Anchors of JDE model.
Returns:
- boxes_idx (Tensor): The index of kept bboxes after decode 'JDEBox'.
+ boxes_idx (Tensor): The index of kept bboxes after decode 'JDEBox'.
bbox_pred (Tensor): The output is the prediction with shape [N, 6]
including labels, scores and bboxes.
bbox_num (Tensor): The number of prediction of each batch with shape [N].
- nms_keep_idx (Tensor): The index of kept bboxes after NMS.
+ nms_keep_idx (Tensor): The index of kept bboxes after NMS.
"""
boxes_idx, yolo_boxes_scores = self.decode(head_out, anchors)
@@ -484,7 +439,7 @@ class CenterNetPostProcess(TTFBox):
x2 = xs + wh[:, 0:1] / 2
y2 = ys + wh[:, 1:2] / 2
- n, c, feat_h, feat_w = hm.shape[:]
+ n, c, feat_h, feat_w = paddle.shape(hm)
padw = (feat_w * self.down_ratio - im_shape[0, 1]) / 2
padh = (feat_h * self.down_ratio - im_shape[0, 0]) / 2
x1 = x1 * self.down_ratio
@@ -505,11 +460,10 @@ class CenterNetPostProcess(TTFBox):
boxes_shape = bboxes.shape[:]
scale_expand = paddle.expand(scale_expand, shape=boxes_shape)
bboxes = paddle.divide(bboxes, scale_expand)
+ results = paddle.concat([clses, scores, bboxes], axis=1)
if self.for_mot:
- results = paddle.concat([bboxes, scores, clses], axis=1)
return results, inds, topk_clses
else:
- results = paddle.concat([clses, scores, bboxes], axis=1)
return results, paddle.shape(results)[0:1], topk_clses
@@ -672,8 +626,23 @@ class SparsePostProcess(object):
return bbox_pred, bbox_num
-def nms(dets, thresh):
- """Apply classic DPM-style greedy NMS."""
+def multiclass_nms(bboxs, num_classes, match_threshold=0.6, match_metric='iou'):
+ final_boxes = []
+ for c in range(num_classes):
+ idxs = bboxs[:, 0] == c
+ if np.count_nonzero(idxs) == 0: continue
+ r = nms(bboxs[idxs, 1:], match_threshold, match_metric)
+ final_boxes.append(np.concatenate([np.full((r.shape[0], 1), c), r], 1))
+ return final_boxes
+
+
+def nms(dets, match_threshold=0.6, match_metric='iou'):
+ """ Apply NMS to avoid detecting too many overlapping bounding boxes.
+ Args:
+ dets: shape [N, 5], [score, x1, y1, x2, y2]
+ match_metric: 'iou' or 'ios'
+ match_threshold: overlap thresh for match metric.
+ """
if dets.shape[0] == 0:
return dets[[], :]
scores = dets[:, 0]
@@ -681,25 +650,12 @@ def nms(dets, thresh):
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
-
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int)
- # nominal indices
- # _i, _j
- # sorted indices
- # i, j
- # temp variables for box i's (the box currently under consideration)
- # ix1, iy1, ix2, iy2, iarea
-
- # variables for computing overlap with box j (lower scoring box)
- # xx1, yy1, xx2, yy2
- # w, h
- # inter, ovr
-
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
@@ -720,8 +676,15 @@ def nms(dets, thresh):
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
- ovr = inter / (iarea + areas[j] - inter)
- if ovr >= thresh:
+ if match_metric == 'iou':
+ union = iarea + areas[j] - inter
+ match_value = inter / union
+ elif match_metric == 'ios':
+ smaller = min(iarea, areas[j])
+ match_value = inter / smaller
+ else:
+ raise ValueError()
+ if match_value >= match_threshold:
suppressed[j] = 1
keep = np.where(suppressed == 0)[0]
dets = dets[keep, :]
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py b/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
index 40538a6..ae5b074 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
@@ -12,16 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# The code is based on
+# The code is based on
# https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/anchor_generator.py
import math
import paddle
import paddle.nn as nn
+import numpy as np
from paddlers.models.ppdet.core.workspace import register
+__all__ = ['AnchorGenerator', 'RetinaAnchorGenerator', 'S2ANetAnchorGenerator']
+
@register
class AnchorGenerator(nn.Layer):
@@ -29,18 +32,18 @@ class AnchorGenerator(nn.Layer):
Generate anchors according to the feature maps
Args:
- anchor_sizes (list[float] | list[list[float]]): The anchor sizes at
- each feature point. list[float] means all feature levels share the
- same sizes. list[list[float]] means the anchor sizes for
+ anchor_sizes (list[float] | list[list[float]]): The anchor sizes at
+ each feature point. list[float] means all feature levels share the
+ same sizes. list[list[float]] means the anchor sizes for
each level. The sizes stand for the scale of input size.
aspect_ratios (list[float] | list[list[float]]): The aspect ratios at
each feature point. list[float] means all feature levels share the
same ratios. list[list[float]] means the aspect ratios for
each level.
- strides (list[float]): The strides of feature maps which generate
+ strides (list[float]): The strides of feature maps which generate
anchors
offset (float): The offset of the coordinate of anchors, default 0.
-
+
"""
def __init__(self,
@@ -129,3 +132,135 @@ class AnchorGenerator(nn.Layer):
For FPN models, `num_anchors` on every feature map is the same.
"""
return len(self.cell_anchors[0])
+
+
+@register
+class RetinaAnchorGenerator(AnchorGenerator):
+ def __init__(self,
+ octave_base_scale=4,
+ scales_per_octave=3,
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[8.0, 16.0, 32.0, 64.0, 128.0],
+ variance=[1.0, 1.0, 1.0, 1.0],
+ offset=0.0):
+ anchor_sizes = []
+ for s in strides:
+ anchor_sizes.append([
+ s * octave_base_scale * 2**(i/scales_per_octave) \
+ for i in range(scales_per_octave)])
+ super(RetinaAnchorGenerator, self).__init__(
+ anchor_sizes=anchor_sizes,
+ aspect_ratios=aspect_ratios,
+ strides=strides,
+ variance=variance,
+ offset=offset)
+
+
+@register
+class S2ANetAnchorGenerator(nn.Layer):
+ """
+ AnchorGenerator by paddle
+ """
+
+ def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
+ super(S2ANetAnchorGenerator, self).__init__()
+ self.base_size = base_size
+ self.scales = paddle.to_tensor(scales)
+ self.ratios = paddle.to_tensor(ratios)
+ self.scale_major = scale_major
+ self.ctr = ctr
+ self.base_anchors = self.gen_base_anchors()
+
+ @property
+ def num_base_anchors(self):
+ return self.base_anchors.shape[0]
+
+ def gen_base_anchors(self):
+ w = self.base_size
+ h = self.base_size
+ if self.ctr is None:
+ x_ctr = 0.5 * (w - 1)
+ y_ctr = 0.5 * (h - 1)
+ else:
+ x_ctr, y_ctr = self.ctr
+
+ h_ratios = paddle.sqrt(self.ratios)
+ w_ratios = 1 / h_ratios
+ if self.scale_major:
+ ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
+ hs = (h * h_ratios[:] * self.scales[:]).reshape([-1])
+ else:
+ ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
+ hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
+
+ base_anchors = paddle.stack(
+ [
+ x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
+ x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
+ ],
+ axis=-1)
+ base_anchors = paddle.round(base_anchors)
+ return base_anchors
+
+ def _meshgrid(self, x, y, row_major=True):
+ yy, xx = paddle.meshgrid(y, x)
+ yy = yy.reshape([-1])
+ xx = xx.reshape([-1])
+ if row_major:
+ return xx, yy
+ else:
+ return yy, xx
+
+ def forward(self, featmap_size, stride=16):
+ # featmap_size*stride project it to original area
+
+ feat_h = featmap_size[0]
+ feat_w = featmap_size[1]
+ shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
+ shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
+ shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+ shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
+
+ all_anchors = self.base_anchors[:, :] + shifts[:, :]
+ all_anchors = all_anchors.cast(paddle.float32).reshape(
+ [feat_h * feat_w, 4])
+ all_anchors = self.rect2rbox(all_anchors)
+ return all_anchors
+
+ def valid_flags(self, featmap_size, valid_size):
+ feat_h, feat_w = featmap_size
+ valid_h, valid_w = valid_size
+ assert valid_h <= feat_h and valid_w <= feat_w
+ valid_x = paddle.zeros([feat_w], dtype='int32')
+ valid_y = paddle.zeros([feat_h], dtype='int32')
+ valid_x[:valid_w] = 1
+ valid_y[:valid_h] = 1
+ valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+ valid = valid_xx & valid_yy
+ valid = paddle.reshape(valid, [-1, 1])
+ valid = paddle.expand(valid, [-1, self.num_base_anchors]).reshape([-1])
+ return valid
+
+ def rect2rbox(self, bboxes):
+ """
+ :param bboxes: shape (L, 4) (xmin, ymin, xmax, ymax)
+ :return: dbboxes: shape (L, 5) (x_ctr, y_ctr, w, h, angle)
+ """
+ x1, y1, x2, y2 = paddle.split(bboxes, 4, axis=-1)
+
+ x_ctr = (x1 + x2) / 2.0
+ y_ctr = (y1 + y2) / 2.0
+ edges1 = paddle.abs(x2 - x1)
+ edges2 = paddle.abs(y2 - y1)
+
+ rbox_w = paddle.maximum(edges1, edges2)
+ rbox_h = paddle.minimum(edges1, edges2)
+
+ # set angle
+ inds = edges1 < edges2
+ inds = paddle.cast(inds, paddle.float32)
+ rboxes_angle = inds * np.pi / 2.0
+
+ rboxes = paddle.concat(
+ (x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=-1)
+ return rboxes
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/proposal_generator.py b/paddlers/models/ppdet/modeling/proposal_generator/proposal_generator.py
index 2af84bc..1fc9544 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/proposal_generator.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/proposal_generator.py
@@ -24,7 +24,7 @@ class ProposalGenerator(object):
"""
Proposal generation module
- For more details, please refer to the document of generate_proposals
+ For more details, please refer to the document of generate_proposals
in ppdet/modeing/ops.py
Args:
@@ -38,8 +38,8 @@ class ProposalGenerator(object):
eta (float): Apply in adaptive NMS, if adaptive `threshold > 0.5`,
`adaptive_threshold = adaptive_threshold * eta` in each iteration.
default 1.
- topk_after_collect (bool): whether to adopt topk after batch
- collection. If topk_after_collect is true, box filter will not be
+ topk_after_collect (bool): whether to adopt topk after batch
+ collection. If topk_after_collect is true, box filter will not be
used after NMS at each image in proposal generation. default false
"""
@@ -62,16 +62,31 @@ class ProposalGenerator(object):
top_n = self.pre_nms_top_n if self.topk_after_collect else self.post_nms_top_n
variances = paddle.ones_like(anchors)
- rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals(
- scores,
- bbox_deltas,
- im_shape,
- anchors,
- variances,
- pre_nms_top_n=self.pre_nms_top_n,
- post_nms_top_n=top_n,
- nms_thresh=self.nms_thresh,
- min_size=self.min_size,
- eta=self.eta,
- return_rois_num=True)
+ if hasattr(paddle.vision.ops, "generate_proposals"):
+ rpn_rois, rpn_rois_prob, rpn_rois_num = paddle.vision.ops.generate_proposals(
+ scores,
+ bbox_deltas,
+ im_shape,
+ anchors,
+ variances,
+ pre_nms_top_n=self.pre_nms_top_n,
+ post_nms_top_n=top_n,
+ nms_thresh=self.nms_thresh,
+ min_size=self.min_size,
+ eta=self.eta,
+ return_rois_num=True)
+ else:
+ rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals(
+ scores,
+ bbox_deltas,
+ im_shape,
+ anchors,
+ variances,
+ pre_nms_top_n=self.pre_nms_top_n,
+ post_nms_top_n=top_n,
+ nms_thresh=self.nms_thresh,
+ min_size=self.min_size,
+ eta=self.eta,
+ return_rois_num=True)
+
return rpn_rois, rpn_rois_prob, rpn_rois_num, self.post_nms_top_n
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/rpn_head.py b/paddlers/models/ppdet/modeling/proposal_generator/rpn_head.py
index e0605cc..f301da9 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/rpn_head.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/rpn_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
@@ -21,6 +21,7 @@ from paddlers.models.ppdet.core.workspace import register
from .anchor_generator import AnchorGenerator
from .target_layer import RPNTargetAssign
from .proposal_generator import ProposalGenerator
+from ..cls_utils import _get_class_default_kwargs
class RPNFeat(nn.Layer):
@@ -66,18 +67,24 @@ class RPNHead(nn.Layer):
in_channel (int): channel of input feature maps which can be
derived by from_config
"""
+ __shared__ = ['export_onnx']
+ __inject__ = ['loss_rpn_bbox']
def __init__(self,
- anchor_generator=AnchorGenerator().__dict__,
- rpn_target_assign=RPNTargetAssign().__dict__,
- train_proposal=ProposalGenerator(12000, 2000).__dict__,
- test_proposal=ProposalGenerator().__dict__,
- in_channel=1024):
+ anchor_generator=_get_class_default_kwargs(AnchorGenerator),
+ rpn_target_assign=_get_class_default_kwargs(RPNTargetAssign),
+ train_proposal=_get_class_default_kwargs(ProposalGenerator,
+ 12000, 2000),
+ test_proposal=_get_class_default_kwargs(ProposalGenerator),
+ in_channel=1024,
+ export_onnx=False,
+ loss_rpn_bbox=None):
super(RPNHead, self).__init__()
self.anchor_generator = anchor_generator
self.rpn_target_assign = rpn_target_assign
self.train_proposal = train_proposal
self.test_proposal = test_proposal
+ self.export_onnx = export_onnx
if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGenerator(**anchor_generator)
if isinstance(rpn_target_assign, dict):
@@ -86,6 +93,7 @@ class RPNHead(nn.Layer):
self.train_proposal = ProposalGenerator(**train_proposal)
if isinstance(test_proposal, dict):
self.test_proposal = ProposalGenerator(**test_proposal)
+ self.loss_rpn_bbox = loss_rpn_bbox
num_anchors = self.anchor_generator.num_anchors
self.rpn_feat = RPNFeat(in_channel, in_channel)
@@ -149,49 +157,90 @@ class RPNHead(nn.Layer):
# Collect multi-level proposals for each batch
# Get 'topk' of them as final output
- bs_rois_collect = []
- bs_rois_num_collect = []
- batch_size = paddle.slice(paddle.shape(im_shape), [0], [0], [1])
- # Generate proposals for each level and each batch.
- # Discard batch-computing to avoid sorting bbox cross different batches.
- for i in range(batch_size):
- rpn_rois_list = []
- rpn_prob_list = []
- rpn_rois_num_list = []
+ if self.export_onnx:
+ # bs = 1 when exporting onnx
+ onnx_rpn_rois_list = []
+ onnx_rpn_prob_list = []
+ onnx_rpn_rois_num_list = []
for rpn_score, rpn_delta, anchor in zip(scores, bbox_deltas,
anchors):
- rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = prop_gen(
- scores=rpn_score[i:i + 1],
- bbox_deltas=rpn_delta[i:i + 1],
+ onnx_rpn_rois, onnx_rpn_rois_prob, onnx_rpn_rois_num, onnx_post_nms_top_n = prop_gen(
+ scores=rpn_score[0:1],
+ bbox_deltas=rpn_delta[0:1],
anchors=anchor,
- im_shape=im_shape[i:i + 1])
- if rpn_rois.shape[0] > 0:
+ im_shape=im_shape[0:1])
+ onnx_rpn_rois_list.append(onnx_rpn_rois)
+ onnx_rpn_prob_list.append(onnx_rpn_rois_prob)
+ onnx_rpn_rois_num_list.append(onnx_rpn_rois_num)
+
+ onnx_rpn_rois = paddle.concat(onnx_rpn_rois_list)
+ onnx_rpn_prob = paddle.concat(onnx_rpn_prob_list).flatten()
+
+ onnx_top_n = paddle.to_tensor(onnx_post_nms_top_n).cast('int32')
+ onnx_num_rois = paddle.shape(onnx_rpn_prob)[0].cast('int32')
+ k = paddle.minimum(onnx_top_n, onnx_num_rois)
+ onnx_topk_prob, onnx_topk_inds = paddle.topk(onnx_rpn_prob, k)
+ onnx_topk_rois = paddle.gather(onnx_rpn_rois, onnx_topk_inds)
+ # TODO(wangguanzhong): Now bs_rois_collect in export_onnx is moved outside conditional branch
+ # due to problems in dy2static of paddle. Will fix it when updating paddle framework.
+ # bs_rois_collect = [onnx_topk_rois]
+ # bs_rois_num_collect = paddle.shape(onnx_topk_rois)[0]
+
+ else:
+ bs_rois_collect = []
+ bs_rois_num_collect = []
+
+ batch_size = paddle.slice(paddle.shape(im_shape), [0], [0], [1])
+
+ # Generate proposals for each level and each batch.
+ # Discard batch-computing to avoid sorting bbox cross different batches.
+ for i in range(batch_size):
+ rpn_rois_list = []
+ rpn_prob_list = []
+ rpn_rois_num_list = []
+
+ for rpn_score, rpn_delta, anchor in zip(scores, bbox_deltas,
+ anchors):
+ rpn_rois, rpn_rois_prob, rpn_rois_num, post_nms_top_n = prop_gen(
+ scores=rpn_score[i:i + 1],
+ bbox_deltas=rpn_delta[i:i + 1],
+ anchors=anchor,
+ im_shape=im_shape[i:i + 1])
rpn_rois_list.append(rpn_rois)
rpn_prob_list.append(rpn_rois_prob)
rpn_rois_num_list.append(rpn_rois_num)
- if len(scores) > 1:
- rpn_rois = paddle.concat(rpn_rois_list)
- rpn_prob = paddle.concat(rpn_prob_list).flatten()
-
- if rpn_prob.shape[0] > post_nms_top_n:
- topk_prob, topk_inds = paddle.topk(rpn_prob, post_nms_top_n)
- topk_rois = paddle.gather(rpn_rois, topk_inds)
+ if len(scores) > 1:
+ rpn_rois = paddle.concat(rpn_rois_list)
+ rpn_prob = paddle.concat(rpn_prob_list).flatten()
+
+ num_rois = paddle.shape(rpn_prob)[0].cast('int32')
+ if num_rois > post_nms_top_n:
+ topk_prob, topk_inds = paddle.topk(rpn_prob,
+ post_nms_top_n)
+ topk_rois = paddle.gather(rpn_rois, topk_inds)
+ else:
+ topk_rois = rpn_rois
+ topk_prob = rpn_prob
else:
- topk_rois = rpn_rois
- topk_prob = rpn_prob
- else:
- topk_rois = rpn_rois_list[0]
- topk_prob = rpn_prob_list[0].flatten()
+ topk_rois = rpn_rois_list[0]
+ topk_prob = rpn_prob_list[0].flatten()
- bs_rois_collect.append(topk_rois)
- bs_rois_num_collect.append(paddle.shape(topk_rois)[0])
+ bs_rois_collect.append(topk_rois)
+ bs_rois_num_collect.append(paddle.shape(topk_rois)[0])
- bs_rois_num_collect = paddle.concat(bs_rois_num_collect)
+ bs_rois_num_collect = paddle.concat(bs_rois_num_collect)
- return bs_rois_collect, bs_rois_num_collect
+ if self.export_onnx:
+ output_rois = [onnx_topk_rois]
+ output_rois_num = paddle.shape(onnx_topk_rois)[0]
+ else:
+ output_rois = bs_rois_collect
+ output_rois_num = bs_rois_num_collect
+
+ return output_rois, output_rois_num
def get_loss(self, pred_scores, pred_deltas, anchors, inputs):
"""
@@ -252,7 +301,12 @@ class RPNHead(nn.Layer):
loc_tgt = paddle.concat(loc_tgt)
loc_tgt = paddle.gather(loc_tgt, pos_ind)
loc_tgt.stop_gradient = True
- loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum()
+
+ if self.loss_rpn_bbox is None:
+ loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum()
+ else:
+ loss_rpn_reg = self.loss_rpn_bbox(loc_pred, loc_tgt).sum()
+
return {
'loss_rpn_cls': loss_rpn_cls / norm,
'loss_rpn_reg': loss_rpn_reg / norm
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/target.py b/paddlers/models/ppdet/modeling/proposal_generator/target.py
index b92d0b3..82930be 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/target.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/target.py
@@ -39,7 +39,7 @@ def rpn_anchor_target(anchors,
matches, match_labels = label_box(
anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True,
ignore_thresh, is_crowd_i, assign_on_cpu)
- # Step2: sample anchor
+ # Step2: sample anchor
fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
rpn_fg_fraction, 0, use_random)
# Fill with the ignore label (-1), then set positive and negative labels
@@ -48,7 +48,7 @@ def rpn_anchor_target(anchors,
labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds))
if fg_inds.shape[0] > 0:
labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
- # Step3: make output
+ # Step3: make output
if gt_bbox.shape[0] == 0:
matched_gt_boxes = paddle.zeros([matches.shape[0], 4])
tgt_delta = paddle.zeros([matches.shape[0], 4])
@@ -74,9 +74,11 @@ def label_box(anchors,
is_crowd=None,
assign_on_cpu=False):
if assign_on_cpu:
+ device = paddle.device.get_device()
paddle.set_device("cpu")
iou = bbox_overlaps(gt_boxes, anchors)
- paddle.set_device("gpu")
+ paddle.set_device(device)
+
else:
iou = bbox_overlaps(gt_boxes, anchors)
n_gt = gt_boxes.shape[0]
@@ -89,7 +91,7 @@ def label_box(anchors,
default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64')
default_match_labels = paddle.full((iou.shape[1], ), 0, dtype='int32')
return default_matches, default_match_labels
- # if ignore_thresh > 0, remove anchor if it is closed to
+ # if ignore_thresh > 0, remove anchor if it is closed to
# one of the crowded ground-truth
if n_gt_crowd > 0:
N_a = anchors.shape[0]
@@ -184,7 +186,8 @@ def generate_proposal_target(rpn_rois,
use_random=True,
is_cascade=False,
cascade_iou=0.5,
- assign_on_cpu=False):
+ assign_on_cpu=False,
+ add_gt_as_proposals=True):
rois_with_gt = []
tgt_labels = []
@@ -202,7 +205,7 @@ def generate_proposal_target(rpn_rois,
gt_class = paddle.squeeze(gt_classes[i], axis=-1)
# Concat RoIs and gt boxes except cascade rcnn or none gt
- if not is_cascade and gt_bbox.shape[0] > 0:
+ if add_gt_as_proposals and gt_bbox.shape[0] > 0:
bbox = paddle.concat([rpn_roi, gt_bbox])
else:
bbox = rpn_roi
@@ -211,12 +214,12 @@ def generate_proposal_target(rpn_rois,
matches, match_labels = label_box(bbox, gt_bbox, fg_thresh, bg_thresh,
False, ignore_thresh, is_crowd_i,
assign_on_cpu)
- # Step2: sample bbox
+ # Step2: sample bbox
sampled_inds, sampled_gt_classes = sample_bbox(
matches, match_labels, gt_class, batch_size_per_im, fg_fraction,
num_classes, use_random, is_cascade)
- # Step3: make output
+ # Step3: make output
rois_per_image = bbox if is_cascade else paddle.gather(bbox,
sampled_inds)
sampled_gt_ind = matches if is_cascade else paddle.gather(matches,
@@ -337,7 +340,7 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
# generate fake roi if foreground is empty
if fg_inds.numel() == 0:
has_fg = False
- fg_inds = paddle.ones([1], dtype='int32')
+ fg_inds = paddle.ones([1, 1], dtype='int64')
inds_per_im = sampled_gt_inds[k]
inds_per_im = paddle.gather(inds_per_im, fg_inds)
@@ -356,7 +359,7 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
fg_inds_new = fg_inds.reshape([-1]).numpy()
results = []
if len(gt_segms_per_im) > 0:
- for j in fg_inds_new:
+ for j in range(fg_inds_new.shape[0]):
results.append(
rasterize_polygons_within_box(new_segm[j], boxes[j],
resolution))
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/target_layer.py b/paddlers/models/ppdet/modeling/proposal_generator/target_layer.py
index a52ead1..57e5539 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/target_layer.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/target_layer.py
@@ -1,15 +1,15 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
@@ -29,27 +29,27 @@ class RPNTargetAssign(object):
The assignment consists of three steps:
1. Match anchor and ground-truth box, label the anchor with foreground
or background sample
- 2. Sample anchors to keep the properly ratio between foreground and
+ 2. Sample anchors to keep the properly ratio between foreground and
background
3. Generate the targets for classification and regression branch
Args:
- batch_size_per_im (int): Total number of RPN samples per image.
+ batch_size_per_im (int): Total number of RPN samples per image.
default 256
fg_fraction (float): Fraction of anchors that is labeled
foreground, default 0.5
positive_overlap (float): Minimum overlap required between an anchor
- and ground-truth box for the (anchor, gt box) pair to be
+ and ground-truth box for the (anchor, gt box) pair to be
a foreground sample. default 0.7
negative_overlap (float): Maximum overlap allowed between an anchor
- and ground-truth box for the (anchor, gt box) pair to be
+ and ground-truth box for the (anchor, gt box) pair to be
a background sample. default 0.3
ignore_thresh(float): Threshold for ignoring the is_crowd ground-truth
if the value is larger than zero.
- use_random (bool): Use random sampling to choose foreground and
+ use_random (bool): Use random sampling to choose foreground and
background boxes, default true.
- assign_on_cpu (bool): In case the number of gt box is too large,
+ assign_on_cpu (bool): In case the number of gt box is too large,
compute IoU on CPU, default false.
"""
@@ -104,13 +104,13 @@ class BBoxAssigner(object):
The assignment consists of three steps:
1. Match RoIs and ground-truth box, label the RoIs with foreground
or background sample
- 2. Sample anchors to keep the properly ratio between foreground and
+ 2. Sample anchors to keep the properly ratio between foreground and
background
3. Generate the targets for classification and regression branch
Args:
- batch_size_per_im (int): Total number of RoIs per image.
- default 512
+ batch_size_per_im (int): Total number of RoIs per image.
+ default 512
fg_fraction (float): Fraction of RoIs that is labeled
foreground, default 0.25
fg_thresh (float): Minimum overlap required between a RoI
@@ -121,12 +121,12 @@ class BBoxAssigner(object):
a background sample. default 0.5
ignore_thresh(float): Threshold for ignoring the is_crowd ground-truth
if the value is larger than zero.
- use_random (bool): Use random sampling to choose foreground and
+ use_random (bool): Use random sampling to choose foreground and
background boxes, default true
cascade_iou (list[iou]): The list of overlap to select foreground and
background of each stage, which is only used In Cascade RCNN.
num_classes (int): The number of class.
- assign_on_cpu (bool): In case the number of gt box is too large,
+ assign_on_cpu (bool): In case the number of gt box is too large,
compute IoU on CPU, default false.
"""
@@ -156,7 +156,8 @@ class BBoxAssigner(object):
rpn_rois_num,
inputs,
stage=0,
- is_cascade=False):
+ is_cascade=False,
+ add_gt_as_proposals=True):
gt_classes = inputs['gt_class']
gt_boxes = inputs['gt_bbox']
is_crowd = inputs.get('is_crowd', None)
@@ -166,7 +167,7 @@ class BBoxAssigner(object):
rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im,
self.fg_fraction, self.fg_thresh, self.bg_thresh, self.num_classes,
self.ignore_thresh, is_crowd, self.use_random, is_cascade,
- self.cascade_iou[stage], self.assign_on_cpu)
+ self.cascade_iou[stage], self.assign_on_cpu, add_gt_as_proposals)
rois = outs[0]
rois_num = outs[-1]
# tgt_labels, tgt_bboxes, tgt_gt_inds
@@ -254,7 +255,7 @@ class MaskAssigner(object):
The assignment consists of three steps:
1. Select RoIs labels with foreground.
- 2. Encode the RoIs and corresponding gt polygons to generate
+ 2. Encode the RoIs and corresponding gt polygons to generate
mask target
Args:
@@ -365,21 +366,11 @@ class RBoxAssigner(object):
def assign_anchor(self,
anchors,
gt_bboxes,
- gt_lables,
+ gt_labels,
pos_iou_thr,
neg_iou_thr,
min_iou_thr=0.0,
ignore_iof_thr=-2):
- """
-
- Args:
- anchors:
- gt_bboxes:[M, 5] rc,yc,w,h,angle
- gt_lables:
-
- Returns:
-
- """
assert anchors.shape[1] == 4 or anchors.shape[1] == 5
assert gt_bboxes.shape[1] == 4 or gt_bboxes.shape[1] == 5
anchors_xc_yc = anchors
@@ -392,9 +383,9 @@ class RBoxAssigner(object):
gt_bboxes_xc_yc = paddle.to_tensor(gt_bboxes_xc_yc)
try:
- from rbox_iou_ops import rbox_iou
+ from ext_op import rbox_iou
except Exception as e:
- print("import custom_ops error, try install rbox_iou_ops " \
+ print("import custom_ops error, try install ext_op " \
"following ppdet/ext_op/README.md", e)
sys.stdout.flush()
sys.exit(-1)
@@ -428,12 +419,12 @@ class RBoxAssigner(object):
# (4) assign max_iou as pos_ids >=0
anchor_gt_bbox_iou_inds = anchor_gt_bbox_inds[gt_bbox_anchor_iou_inds]
# gt_bbox_anchor_iou_inds = np.logical_and(gt_bbox_anchor_iou_inds, anchor_gt_bbox_iou >= min_iou_thr)
- labels[gt_bbox_anchor_iou_inds] = gt_lables[anchor_gt_bbox_iou_inds]
+ labels[gt_bbox_anchor_iou_inds] = gt_labels[anchor_gt_bbox_iou_inds]
# (5) assign >= pos_iou_thr as pos_ids
iou_pos_iou_thr_ids = anchor_gt_bbox_iou >= pos_iou_thr
iou_pos_iou_thr_ids_box_inds = anchor_gt_bbox_inds[iou_pos_iou_thr_ids]
- labels[iou_pos_iou_thr_ids] = gt_lables[iou_pos_iou_thr_ids_box_inds]
+ labels[iou_pos_iou_thr_ids] = gt_labels[iou_pos_iou_thr_ids_box_inds]
return anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels
def __call__(self, anchors, gt_bboxes, gt_labels, is_crowd):
diff --git a/paddlers/models/ppdet/modeling/rbox_utils.py b/paddlers/models/ppdet/modeling/rbox_utils.py
new file mode 100644
index 0000000..19bca8d
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/rbox_utils.py
@@ -0,0 +1,159 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import paddle
+import numpy as np
+import cv2
+
+
+def norm_angle(angle, range=[-np.pi / 4, np.pi]):
+ return (angle - range[0]) % range[1] + range[0]
+
+
+# rbox function implemented using numpy
+def poly2rbox_le135_np(poly):
+ """convert poly to rbox [-pi / 4, 3 * pi / 4]
+
+ Args:
+ poly: [x1, y1, x2, y2, x3, y3, x4, y4]
+
+ Returns:
+ rbox: [cx, cy, w, h, angle]
+ """
+ poly = np.array(poly[:8], dtype=np.float32)
+
+ pt1 = (poly[0], poly[1])
+ pt2 = (poly[2], poly[3])
+ pt3 = (poly[4], poly[5])
+ pt4 = (poly[6], poly[7])
+
+ edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[1]) *
+ (pt1[1] - pt2[1]))
+ edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[1]) *
+ (pt2[1] - pt3[1]))
+
+ width = max(edge1, edge2)
+ height = min(edge1, edge2)
+
+ rbox_angle = 0
+ if edge1 > edge2:
+ rbox_angle = np.arctan2(float(pt2[1] - pt1[1]), float(pt2[0] - pt1[0]))
+ elif edge2 >= edge1:
+ rbox_angle = np.arctan2(float(pt4[1] - pt1[1]), float(pt4[0] - pt1[0]))
+
+ rbox_angle = norm_angle(rbox_angle)
+
+ x_ctr = float(pt1[0] + pt3[0]) / 2
+ y_ctr = float(pt1[1] + pt3[1]) / 2
+ return [x_ctr, y_ctr, width, height, rbox_angle]
+
+
+def poly2rbox_oc_np(poly):
+ """convert poly to rbox (0, pi / 2]
+
+ Args:
+ poly: [x1, y1, x2, y2, x3, y3, x4, y4]
+
+ Returns:
+ rbox: [cx, cy, w, h, angle]
+ """
+ points = np.array(poly, dtype=np.float32).reshape((-1, 2))
+ (cx, cy), (w, h), angle = cv2.minAreaRect(points)
+ # using the new OpenCV Rotated BBox definition since 4.5.1
+ # if angle < 0, opencv is older than 4.5.1, angle is in [-90, 0)
+ if angle < 0:
+ angle += 90
+ w, h = h, w
+
+ # convert angle to [0, 90)
+ if angle == -0.0:
+ angle = 0.0
+ if angle == 90.0:
+ angle = 0.0
+ w, h = h, w
+
+ angle = angle / 180 * np.pi
+ return [cx, cy, w, h, angle]
+
+
+def poly2rbox_np(polys, rbox_type='oc'):
+ """
+ polys: [x0,y0,x1,y1,x2,y2,x3,y3]
+ to
+ rboxes: [x_ctr,y_ctr,w,h,angle]
+ """
+ assert rbox_type in ['oc', 'le135'], 'only oc or le135 is supported now'
+ poly2rbox_fn = poly2rbox_oc_np if rbox_type == 'oc' else poly2rbox_le135_np
+ rboxes = []
+ for poly in polys:
+ x, y, w, h, angle = poly2rbox_fn(poly)
+ rbox = np.array([x, y, w, h, angle], dtype=np.float32)
+ rboxes.append(rbox)
+
+ return np.array(rboxes)
+
+
+def cal_line_length(point1, point2):
+ return math.sqrt(
+ math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
+
+
+def get_best_begin_point_single(coordinate):
+ x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
+ xmin = min(x1, x2, x3, x4)
+ ymin = min(y1, y2, y3, y4)
+ xmax = max(x1, x2, x3, x4)
+ ymax = max(y1, y2, y3, y4)
+ combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
+ [[x4, y4], [x1, y1], [x2, y2], [x3, y3]],
+ [[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
+ [[x2, y2], [x3, y3], [x4, y4], [x1, y1]]]
+ dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
+ force = 100000000.0
+ force_flag = 0
+ for i in range(4):
+ temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) \
+ + cal_line_length(combinate[i][1], dst_coordinate[1]) \
+ + cal_line_length(combinate[i][2], dst_coordinate[2]) \
+ + cal_line_length(combinate[i][3], dst_coordinate[3])
+ if temp_force < force:
+ force = temp_force
+ force_flag = i
+ if force_flag != 0:
+ pass
+ return np.array(combinate[force_flag]).reshape(8)
+
+
+def rbox2poly_np(rboxes):
+ """
+ rboxes:[x_ctr,y_ctr,w,h,angle]
+ to
+ poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+ """
+ polys = []
+ for i in range(len(rboxes)):
+ x_ctr, y_ctr, width, height, angle = rboxes[i][:5]
+ tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
+ rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
+ R = np.array([[np.cos(angle), -np.sin(angle)],
+ [np.sin(angle), np.cos(angle)]])
+ poly = R.dot(rect)
+ x0, x1, x2, x3 = poly[0, :4] + x_ctr
+ y0, y1, y2, y3 = poly[1, :4] + y_ctr
+ poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
+ poly = get_best_begin_point_single(poly)
+ polys.append(poly)
+ polys = np.array(polys)
+ return polys
diff --git a/paddlers/models/ppdet/modeling/reid/__init__.py b/paddlers/models/ppdet/modeling/reid/__init__.py
index 968e95c..2630ecf 100644
--- a/paddlers/models/ppdet/modeling/reid/__init__.py
+++ b/paddlers/models/ppdet/modeling/reid/__init__.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_embedding_head
@@ -17,9 +17,11 @@ from . import fairmot_embedding_head
from . import resnet
from . import pyramidal_embedding
from . import pplcnet_embedding
+from . import resnet_embedding
from .fairmot_embedding_head import *
from .jde_embedding_head import *
from .resnet import *
from .pyramidal_embedding import *
from .pplcnet_embedding import *
+from .resnet_embedding import *
diff --git a/paddlers/models/ppdet/modeling/reid/fairmot_embedding_head.py b/paddlers/models/ppdet/modeling/reid/fairmot_embedding_head.py
old mode 100644
new mode 100755
index e4d5364..88fda65
--- a/paddlers/models/ppdet/modeling/reid/fairmot_embedding_head.py
+++ b/paddlers/models/ppdet/modeling/reid/fairmot_embedding_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
@@ -33,7 +33,7 @@ class FairMOTEmbeddingHead(nn.Layer):
ch_head (int): the channel of features before fed into embedding, 256 by default.
ch_emb (int): the channel of the embedding feature, 128 by default.
num_identities_dict (dict): the number of identities of each category,
- support single class and multi-calss, {0: 14455} as default.
+ support single class and multi-calss, {0: 14455} as default.
"""
def __init__(self,
diff --git a/paddlers/models/ppdet/modeling/reid/jde_embedding_head.py b/paddlers/models/ppdet/modeling/reid/jde_embedding_head.py
index b13e2d3..8764692 100644
--- a/paddlers/models/ppdet/modeling/reid/jde_embedding_head.py
+++ b/paddlers/models/ppdet/modeling/reid/jde_embedding_head.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -152,9 +152,8 @@ class JDEEmbeddingHead(nn.Layer):
scale_factor = targets['scale_factor'][0].numpy()
bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape,
im_shape, scale_factor)
- # tlwhs, scores, cls_ids
- pred_dets = paddle.concat(
- (bboxes[:, 2:], bboxes[:, 1:2], bboxes[:, 0:1]), axis=1)
+ # cls_ids, scores, tlwhs
+ pred_dets = bboxes
return pred_dets, pred_embs
def scale_coords(self, coords, input_shape, im_shape, scale_factor):
diff --git a/paddlers/models/ppdet/modeling/reid/pplcnet_embedding.py b/paddlers/models/ppdet/modeling/reid/pplcnet_embedding.py
index 1da21d2..1915a60 100644
--- a/paddlers/models/ppdet/modeling/reid/pplcnet_embedding.py
+++ b/paddlers/models/ppdet/modeling/reid/pplcnet_embedding.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
diff --git a/paddlers/models/ppdet/modeling/reid/pyramidal_embedding.py b/paddlers/models/ppdet/modeling/reid/pyramidal_embedding.py
index 9099ecd..6ee384d 100644
--- a/paddlers/models/ppdet/modeling/reid/pyramidal_embedding.py
+++ b/paddlers/models/ppdet/modeling/reid/pyramidal_embedding.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from paddle import ParamAttr
-from .resnet import *
+from .resnet import ResNet50, ResNet101
from paddlers.models.ppdet.core.workspace import register
__all__ = ['PCBPyramid']
@@ -46,6 +46,7 @@ class PCBPyramid(nn.Layer):
def __init__(self,
input_ch=2048,
+ model_name='ResNet101',
num_stripes=6,
used_levels=(1, 1, 1, 1, 1, 1),
num_classes=751,
@@ -60,10 +61,11 @@ class PCBPyramid(nn.Layer):
self.num_in_each_level = [i for i in range(self.num_stripes, 0, -1)]
self.num_branches = sum(self.num_in_each_level)
- self.base = ResNet101(
- lr_mult=0.1,
- last_conv_stride=last_conv_stride,
- last_conv_dilation=last_conv_dilation)
+ assert model_name in ['ResNet50', 'ResNet101'
+ ], "Unsupported ReID arch: {}".format(model_name)
+ self.base = eval(model_name)(lr_mult=0.1,
+ last_conv_stride=last_conv_stride,
+ last_conv_dilation=last_conv_dilation)
self.dropout_layer = nn.Dropout(p=0.2)
self.pyramid_conv_list0, self.pyramid_fc_list0 = self.basic_branch(
num_conv_out_channels, input_ch)
diff --git a/paddlers/models/ppdet/modeling/reid/resnet.py b/paddlers/models/ppdet/modeling/reid/resnet.py
index 4316f83..2e2a855 100644
--- a/paddlers/models/ppdet/modeling/reid/resnet.py
+++ b/paddlers/models/ppdet/modeling/reid/resnet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ class ConvBNLayer(nn.Layer):
bias_attr=False,
data_format=data_format)
- self._batch_norm = nn.BatchNorm2D(num_filters, data_layout=data_format)
+ self._batch_norm = nn.BatchNorm2D(num_filters)
self.act = act
def forward(self, inputs):
diff --git a/paddlers/models/ppdet/modeling/reid/resnet_embedding.py b/paddlers/models/ppdet/modeling/reid/resnet_embedding.py
new file mode 100644
index 0000000..32d17be
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/reid/resnet_embedding.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+from .resnet import ResNet50, ResNet101
+from paddlers.models.ppdet.core.workspace import register
+
+__all__ = ['ResNetEmbedding']
+
+
+@register
+class ResNetEmbedding(nn.Layer):
+ in_planes = 2048
+
+ def __init__(self, model_name='ResNet50', last_stride=1):
+ super(ResNetEmbedding, self).__init__()
+ assert model_name in ['ResNet50', 'ResNet101'
+ ], "Unsupported ReID arch: {}".format(model_name)
+ self.base = eval(model_name)(last_conv_stride=last_stride)
+ self.gap = nn.AdaptiveAvgPool2D(output_size=1)
+ self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
+ self.bn = nn.BatchNorm1D(self.in_planes, bias_attr=False)
+
+ def forward(self, x):
+ base_out = self.base(x)
+ global_feat = self.gap(base_out)
+ global_feat = self.flatten(global_feat)
+ global_feat = self.bn(global_feat)
+ return global_feat
diff --git a/paddlers/models/ppdet/modeling/shape_spec.py b/paddlers/models/ppdet/modeling/shape_spec.py
index d42042a..bf6d11e 100644
--- a/paddlers/models/ppdet/modeling/shape_spec.py
+++ b/paddlers/models/ppdet/modeling/shape_spec.py
@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
diff --git a/paddlers/models/ppdet/modeling/tests/__init__.py b/paddlers/models/ppdet/modeling/tests/__init__.py
new file mode 100644
index 0000000..5135585
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/paddlers/models/ppdet/modeling/tests/imgs/coco2017_val2017_000000000139.jpg b/paddlers/models/ppdet/modeling/tests/imgs/coco2017_val2017_000000000139.jpg
new file mode 100644
index 0000000..19023f7
Binary files /dev/null and b/paddlers/models/ppdet/modeling/tests/imgs/coco2017_val2017_000000000139.jpg differ
diff --git a/paddlers/models/ppdet/modeling/tests/imgs/coco2017_val2017_000000000724.jpg b/paddlers/models/ppdet/modeling/tests/imgs/coco2017_val2017_000000000724.jpg
new file mode 100644
index 0000000..2a17e0c
Binary files /dev/null and b/paddlers/models/ppdet/modeling/tests/imgs/coco2017_val2017_000000000724.jpg differ
diff --git a/paddlers/models/ppdet/modeling/tests/test_architectures.py b/paddlers/models/ppdet/modeling/tests/test_architectures.py
new file mode 100644
index 0000000..31cac3d
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/tests/test_architectures.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import paddlers.models.ppdet as ppdet
+
+
+class TestFasterRCNN(unittest.TestCase):
+ def setUp(self):
+ self.set_config()
+
+ def set_config(self):
+ self.cfg_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml'
+
+ def test_trainer(self):
+ # Trainer __init__ will build model and DataLoader
+ # 'train' and 'eval' mode include dataset loading
+ # use 'test' mode to simplify tests
+ cfg = ppdet.core.workspace.load_config(self.cfg_file)
+ trainer = ppdet.engine.Trainer(cfg, mode='test')
+
+
+class TestMaskRCNN(TestFasterRCNN):
+ def set_config(self):
+ self.cfg_file = 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml'
+
+
+class TestCascadeRCNN(TestFasterRCNN):
+ def set_config(self):
+ self.cfg_file = 'configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml'
+
+
+class TestYolov3(TestFasterRCNN):
+ def set_config(self):
+ self.cfg_file = 'configs/yolov3/yolov3_darknet53_270e_coco.yml'
+
+
+class TestSSD(TestFasterRCNN):
+ def set_config(self):
+ self.cfg_file = 'configs/ssd/ssd_vgg16_300_240e_voc.yml'
+
+
+class TestGFL(TestFasterRCNN):
+ def set_config(self):
+ self.cfg_file = 'configs/gfl/gfl_r50_fpn_1x_coco.yml'
+
+
+class TestPicoDet(TestFasterRCNN):
+ def set_config(self):
+ self.cfg_file = 'configs/picodet/picodet_s_320_coco_lcnet.yml'
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/paddlers/models/ppdet/modeling/tests/test_base.py b/paddlers/models/ppdet/modeling/tests/test_base.py
new file mode 100644
index 0000000..0123cfd
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/tests/test_base.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import unittest
+
+import contextlib
+
+import paddle
+from paddle.static import Program
+
+
+class LayerTest(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.seed = 111
+
+ @classmethod
+ def tearDownClass(cls):
+ pass
+
+ def _get_place(self, force_to_use_cpu=False):
+ # this option for ops that only have cpu kernel
+ if force_to_use_cpu:
+ return 'cpu'
+ else:
+ return paddle.device.get_device()
+
+ @contextlib.contextmanager
+ def static_graph(self):
+ paddle.enable_static()
+ scope = paddle.static.Scope()
+ program = Program()
+ with paddle.static.scope_guard(scope):
+ with paddle.static.program_guard(program):
+ paddle.seed(self.seed)
+ paddle.framework.random._manual_program_seed(self.seed)
+ yield
+
+ def get_static_graph_result(self,
+ feed,
+ fetch_list,
+ with_lod=False,
+ force_to_use_cpu=False):
+ exe = paddle.static.Executor(self._get_place(force_to_use_cpu))
+ exe.run(paddle.static.default_startup_program())
+ return exe.run(paddle.static.default_main_program(),
+ feed=feed,
+ fetch_list=fetch_list,
+ return_numpy=(not with_lod))
+
+ @contextlib.contextmanager
+ def dynamic_graph(self, force_to_use_cpu=False):
+ paddle.disable_static()
+ place = self._get_place(force_to_use_cpu=force_to_use_cpu)
+ paddle.device.set_device(place)
+ paddle.seed(self.seed)
+ paddle.framework.random._manual_program_seed(self.seed)
+ yield
diff --git a/paddlers/models/ppdet/modeling/tests/test_mstest.py b/paddlers/models/ppdet/modeling/tests/test_mstest.py
new file mode 100644
index 0000000..ba62333
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/tests/test_mstest.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import unittest
+from paddlers.models.ppdet.core.workspace import load_config
+from paddlers.models.ppdet.engine import Trainer
+
+
+class TestMultiScaleInference(unittest.TestCase):
+ def setUp(self):
+ self.set_config()
+
+ def set_config(self):
+ self.mstest_cfg_file = 'configs/faster_rcnn/faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml'
+
+ # test evaluation with multi scale test
+ def test_eval_mstest(self):
+ cfg = load_config(self.mstest_cfg_file)
+ trainer = Trainer(cfg, mode='eval')
+
+ cfg.weights = 'https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams'
+ trainer.load_weights(cfg.weights)
+
+ trainer.evaluate()
+
+ # test inference with multi scale test
+ def test_infer_mstest(self):
+ cfg = load_config(self.mstest_cfg_file)
+ trainer = Trainer(cfg, mode='test')
+
+ cfg.weights = 'https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams'
+ trainer.load_weights(cfg.weights)
+ tests_img_root = os.path.join(os.path.dirname(__file__), 'imgs')
+
+ # input images to predict
+ imgs = [
+ 'coco2017_val2017_000000000139.jpg',
+ 'coco2017_val2017_000000000724.jpg'
+ ]
+ imgs = [os.path.join(tests_img_root, img) for img in imgs]
+ trainer.predict(
+ imgs, draw_threshold=0.5, output_dir='output', save_results=False)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/paddlers/models/ppdet/modeling/tests/test_ops.py b/paddlers/models/ppdet/modeling/tests/test_ops.py
new file mode 100644
index 0000000..3bf2f28
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/tests/test_ops.py
@@ -0,0 +1,584 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import os, sys
+# add python path of PadleDetection to sys.path
+parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 4)))
+if parent_path not in sys.path:
+ sys.path.append(parent_path)
+
+import unittest
+import numpy as np
+
+import paddle
+
+import paddlers.models.ppdet.modeling.ops as ops
+from paddlers.models.ppdet.modeling.tests.test_base import LayerTest
+
+
+def make_rois(h, w, rois_num, output_size):
+ rois = np.zeros((0, 4)).astype('float32')
+ for roi_num in rois_num:
+ roi = np.zeros((roi_num, 4)).astype('float32')
+ roi[:, 0] = np.random.randint(0, h - output_size[0], size=roi_num)
+ roi[:, 1] = np.random.randint(0, w - output_size[1], size=roi_num)
+ roi[:, 2] = np.random.randint(roi[:, 0] + output_size[0], h)
+ roi[:, 3] = np.random.randint(roi[:, 1] + output_size[1], w)
+ rois = np.vstack((rois, roi))
+ return rois
+
+
+def softmax(x):
+ # clip to shiftx, otherwise, when calc loss with
+ # log(exp(shiftx)), may get log(0)=INF
+ shiftx = (x - np.max(x)).clip(-64.)
+ exps = np.exp(shiftx)
+ return exps / np.sum(exps)
+
+
+class TestDistributeFpnProposals(LayerTest):
+ def test_distribute_fpn_proposals(self):
+ rois_np = np.random.rand(10, 4).astype('float32')
+ rois_num_np = np.array([4, 6]).astype('int32')
+ with self.static_graph():
+ rois = paddle.static.data(
+ name='rois', shape=[10, 4], dtype='float32')
+ rois_num = paddle.static.data(
+ name='rois_num', shape=[None], dtype='int32')
+ multi_rois, restore_ind, rois_num_per_level = ops.distribute_fpn_proposals(
+ fpn_rois=rois,
+ min_level=2,
+ max_level=5,
+ refer_level=4,
+ refer_scale=224,
+ rois_num=rois_num)
+ fetch_list = multi_rois + [restore_ind] + rois_num_per_level
+ output_stat = self.get_static_graph_result(
+ feed={'rois': rois_np,
+ 'rois_num': rois_num_np},
+ fetch_list=fetch_list,
+ with_lod=True)
+ output_stat_np = []
+ for output in output_stat:
+ output_np = np.array(output)
+ if len(output_np) > 0:
+ output_stat_np.append(output_np)
+
+ with self.dynamic_graph():
+ rois_dy = paddle.to_tensor(rois_np)
+ rois_num_dy = paddle.to_tensor(rois_num_np)
+ multi_rois_dy, restore_ind_dy, rois_num_per_level_dy = ops.distribute_fpn_proposals(
+ fpn_rois=rois_dy,
+ min_level=2,
+ max_level=5,
+ refer_level=4,
+ refer_scale=224,
+ rois_num=rois_num_dy)
+ output_dy = multi_rois_dy + [restore_ind_dy] + rois_num_per_level_dy
+ output_dy_np = []
+ for output in output_dy:
+ output_np = output.numpy()
+ if len(output_np) > 0:
+ output_dy_np.append(output_np)
+
+ for res_stat, res_dy in zip(output_stat_np, output_dy_np):
+ self.assertTrue(np.array_equal(res_stat, res_dy))
+
+ def test_distribute_fpn_proposals_error(self):
+ with self.static_graph():
+ fpn_rois = paddle.static.data(
+ name='data_error', shape=[10, 4], dtype='int32', lod_level=1)
+ self.assertRaises(
+ TypeError,
+ ops.distribute_fpn_proposals,
+ fpn_rois=fpn_rois,
+ min_level=2,
+ max_level=5,
+ refer_level=4,
+ refer_scale=224)
+
+ paddle.disable_static()
+
+
+class TestROIAlign(LayerTest):
+ def test_roi_align(self):
+ b, c, h, w = 2, 12, 20, 20
+ inputs_np = np.random.rand(b, c, h, w).astype('float32')
+ rois_num = [4, 6]
+ output_size = (7, 7)
+ rois_np = make_rois(h, w, rois_num, output_size)
+ rois_num_np = np.array(rois_num).astype('int32')
+ with self.static_graph():
+ inputs = paddle.static.data(
+ name='inputs', shape=[b, c, h, w], dtype='float32')
+ rois = paddle.static.data(
+ name='rois', shape=[10, 4], dtype='float32')
+ rois_num = paddle.static.data(
+ name='rois_num', shape=[None], dtype='int32')
+
+ output = paddle.vision.ops.roi_align(
+ x=inputs,
+ boxes=rois,
+ boxes_num=rois_num,
+ output_size=output_size)
+ output_np, = self.get_static_graph_result(
+ feed={
+ 'inputs': inputs_np,
+ 'rois': rois_np,
+ 'rois_num': rois_num_np
+ },
+ fetch_list=output,
+ with_lod=False)
+
+ with self.dynamic_graph():
+ inputs_dy = paddle.to_tensor(inputs_np)
+ rois_dy = paddle.to_tensor(rois_np)
+ rois_num_dy = paddle.to_tensor(rois_num_np)
+
+ output_dy = paddle.vision.ops.roi_align(
+ x=inputs_dy,
+ boxes=rois_dy,
+ boxes_num=rois_num_dy,
+ output_size=output_size)
+ output_dy_np = output_dy.numpy()
+
+ self.assertTrue(np.array_equal(output_np, output_dy_np))
+
+ def test_roi_align_error(self):
+ with self.static_graph():
+ inputs = paddle.static.data(
+ name='inputs', shape=[2, 12, 20, 20], dtype='float32')
+ rois = paddle.static.data(
+ name='data_error', shape=[10, 4], dtype='int32', lod_level=1)
+ self.assertRaises(
+ TypeError,
+ paddle.vision.ops.roi_align,
+ input=inputs,
+ rois=rois,
+ output_size=(7, 7))
+
+ paddle.disable_static()
+
+
+class TestROIPool(LayerTest):
+ def test_roi_pool(self):
+ b, c, h, w = 2, 12, 20, 20
+ inputs_np = np.random.rand(b, c, h, w).astype('float32')
+ rois_num = [4, 6]
+ output_size = (7, 7)
+ rois_np = make_rois(h, w, rois_num, output_size)
+ rois_num_np = np.array(rois_num).astype('int32')
+ with self.static_graph():
+ inputs = paddle.static.data(
+ name='inputs', shape=[b, c, h, w], dtype='float32')
+ rois = paddle.static.data(
+ name='rois', shape=[10, 4], dtype='float32')
+ rois_num = paddle.static.data(
+ name='rois_num', shape=[None], dtype='int32')
+
+ output = paddle.vision.ops.roi_pool(
+ x=inputs,
+ boxes=rois,
+ boxes_num=rois_num,
+ output_size=output_size)
+ output_np, = self.get_static_graph_result(
+ feed={
+ 'inputs': inputs_np,
+ 'rois': rois_np,
+ 'rois_num': rois_num_np
+ },
+ fetch_list=[output],
+ with_lod=False)
+
+ with self.dynamic_graph():
+ inputs_dy = paddle.to_tensor(inputs_np)
+ rois_dy = paddle.to_tensor(rois_np)
+ rois_num_dy = paddle.to_tensor(rois_num_np)
+
+ output_dy = paddle.vision.ops.roi_pool(
+ x=inputs_dy,
+ boxes=rois_dy,
+ boxes_num=rois_num_dy,
+ output_size=output_size)
+ output_dy_np = output_dy.numpy()
+
+ self.assertTrue(np.array_equal(output_np, output_dy_np))
+
+ def test_roi_pool_error(self):
+ with self.static_graph():
+ inputs = paddle.static.data(
+ name='inputs', shape=[2, 12, 20, 20], dtype='float32')
+ rois = paddle.static.data(
+ name='data_error', shape=[10, 4], dtype='int32', lod_level=1)
+ self.assertRaises(
+ TypeError,
+ paddle.vision.ops.roi_pool,
+ input=inputs,
+ rois=rois,
+ output_size=(7, 7))
+
+ paddle.disable_static()
+
+
+class TestPriorBox(LayerTest):
+ def test_prior_box(self):
+ input_np = np.random.rand(2, 10, 32, 32).astype('float32')
+ image_np = np.random.rand(2, 10, 40, 40).astype('float32')
+ min_sizes = [2, 4]
+ with self.static_graph():
+ input = paddle.static.data(
+ name='input', shape=[2, 10, 32, 32], dtype='float32')
+ image = paddle.static.data(
+ name='image', shape=[2, 10, 40, 40], dtype='float32')
+
+ box, var = ops.prior_box(
+ input=input,
+ image=image,
+ min_sizes=min_sizes,
+ clip=True,
+ flip=True)
+ box_np, var_np = self.get_static_graph_result(
+ feed={
+ 'input': input_np,
+ 'image': image_np,
+ },
+ fetch_list=[box, var],
+ with_lod=False)
+
+ with self.dynamic_graph():
+ inputs_dy = paddle.to_tensor(input_np)
+ image_dy = paddle.to_tensor(image_np)
+
+ box_dy, var_dy = ops.prior_box(
+ input=inputs_dy,
+ image=image_dy,
+ min_sizes=min_sizes,
+ clip=True,
+ flip=True)
+ box_dy_np = box_dy.numpy()
+ var_dy_np = var_dy.numpy()
+
+ self.assertTrue(np.array_equal(box_np, box_dy_np))
+ self.assertTrue(np.array_equal(var_np, var_dy_np))
+
+ def test_prior_box_error(self):
+ with self.static_graph():
+ input = paddle.static.data(
+ name='input', shape=[2, 10, 32, 32], dtype='int32')
+ image = paddle.static.data(
+ name='image', shape=[2, 10, 40, 40], dtype='int32')
+ self.assertRaises(
+ TypeError,
+ ops.prior_box,
+ input=input,
+ image=image,
+ min_sizes=[2, 4],
+ clip=True,
+ flip=True)
+
+ paddle.disable_static()
+
+
+class TestMulticlassNms(LayerTest):
+ def test_multiclass_nms(self):
+ boxes_np = np.random.rand(10, 81, 4).astype('float32')
+ scores_np = np.random.rand(10, 81).astype('float32')
+ rois_num_np = np.array([2, 8]).astype('int32')
+ with self.static_graph():
+ boxes = paddle.static.data(
+ name='bboxes',
+ shape=[None, 81, 4],
+ dtype='float32',
+ lod_level=1)
+ scores = paddle.static.data(
+ name='scores', shape=[None, 81], dtype='float32', lod_level=1)
+ rois_num = paddle.static.data(
+ name='rois_num', shape=[None], dtype='int32')
+
+ output = ops.multiclass_nms(
+ bboxes=boxes,
+ scores=scores,
+ background_label=0,
+ score_threshold=0.5,
+ nms_top_k=400,
+ nms_threshold=0.3,
+ keep_top_k=200,
+ normalized=False,
+ return_index=True,
+ rois_num=rois_num)
+ out_np, index_np, nms_rois_num_np = self.get_static_graph_result(
+ feed={
+ 'bboxes': boxes_np,
+ 'scores': scores_np,
+ 'rois_num': rois_num_np
+ },
+ fetch_list=output,
+ with_lod=True)
+ out_np = np.array(out_np)
+ index_np = np.array(index_np)
+ nms_rois_num_np = np.array(nms_rois_num_np)
+
+ with self.dynamic_graph():
+ boxes_dy = paddle.to_tensor(boxes_np)
+ scores_dy = paddle.to_tensor(scores_np)
+ rois_num_dy = paddle.to_tensor(rois_num_np)
+
+ out_dy, index_dy, nms_rois_num_dy = ops.multiclass_nms(
+ bboxes=boxes_dy,
+ scores=scores_dy,
+ background_label=0,
+ score_threshold=0.5,
+ nms_top_k=400,
+ nms_threshold=0.3,
+ keep_top_k=200,
+ normalized=False,
+ return_index=True,
+ rois_num=rois_num_dy)
+ out_dy_np = out_dy.numpy()
+ index_dy_np = index_dy.numpy()
+ nms_rois_num_dy_np = nms_rois_num_dy.numpy()
+
+ self.assertTrue(np.array_equal(out_np, out_dy_np))
+ self.assertTrue(np.array_equal(index_np, index_dy_np))
+ self.assertTrue(np.array_equal(nms_rois_num_np, nms_rois_num_dy_np))
+
+ def test_multiclass_nms_error(self):
+ with self.static_graph():
+ boxes = paddle.static.data(
+ name='bboxes', shape=[81, 4], dtype='float32', lod_level=1)
+ scores = paddle.static.data(
+ name='scores', shape=[81], dtype='float32', lod_level=1)
+ rois_num = paddle.static.data(
+ name='rois_num', shape=[40, 41], dtype='int32')
+ self.assertRaises(
+ TypeError,
+ ops.multiclass_nms,
+ boxes=boxes,
+ scores=scores,
+ background_label=0,
+ score_threshold=0.5,
+ nms_top_k=400,
+ nms_threshold=0.3,
+ keep_top_k=200,
+ normalized=False,
+ return_index=True,
+ rois_num=rois_num)
+
+
+class TestMatrixNMS(LayerTest):
+ def test_matrix_nms(self):
+ N, M, C = 7, 1200, 21
+ BOX_SIZE = 4
+ nms_top_k = 400
+ keep_top_k = 200
+ score_threshold = 0.01
+ post_threshold = 0.
+
+ scores_np = np.random.random((N * M, C)).astype('float32')
+ scores_np = np.apply_along_axis(softmax, 1, scores_np)
+ scores_np = np.reshape(scores_np, (N, M, C))
+ scores_np = np.transpose(scores_np, (0, 2, 1))
+
+ boxes_np = np.random.random((N, M, BOX_SIZE)).astype('float32')
+ boxes_np[:, :, 0:2] = boxes_np[:, :, 0:2] * 0.5
+ boxes_np[:, :, 2:4] = boxes_np[:, :, 2:4] * 0.5 + 0.5
+
+ with self.static_graph():
+ boxes = paddle.static.data(
+ name='boxes', shape=[N, M, BOX_SIZE], dtype='float32')
+ scores = paddle.static.data(
+ name='scores', shape=[N, C, M], dtype='float32')
+ out, index, _ = ops.matrix_nms(
+ bboxes=boxes,
+ scores=scores,
+ score_threshold=score_threshold,
+ post_threshold=post_threshold,
+ nms_top_k=nms_top_k,
+ keep_top_k=keep_top_k,
+ return_index=True)
+ out_np, index_np = self.get_static_graph_result(
+ feed={'boxes': boxes_np,
+ 'scores': scores_np},
+ fetch_list=[out, index],
+ with_lod=True)
+
+ with self.dynamic_graph():
+ boxes_dy = paddle.to_tensor(boxes_np)
+ scores_dy = paddle.to_tensor(scores_np)
+
+ out_dy, index_dy, _ = ops.matrix_nms(
+ bboxes=boxes_dy,
+ scores=scores_dy,
+ score_threshold=score_threshold,
+ post_threshold=post_threshold,
+ nms_top_k=nms_top_k,
+ keep_top_k=keep_top_k,
+ return_index=True)
+ out_dy_np = out_dy.numpy()
+ index_dy_np = index_dy.numpy()
+
+ self.assertTrue(np.array_equal(out_np, out_dy_np))
+ self.assertTrue(np.array_equal(index_np, index_dy_np))
+
+ def test_matrix_nms_error(self):
+ with self.static_graph():
+ bboxes = paddle.static.data(
+ name='bboxes', shape=[7, 1200, 4], dtype='float32')
+ scores = paddle.static.data(
+ name='data_error', shape=[7, 21, 1200], dtype='int32')
+ self.assertRaises(
+ TypeError,
+ ops.matrix_nms,
+ bboxes=bboxes,
+ scores=scores,
+ score_threshold=0.01,
+ post_threshold=0.,
+ nms_top_k=400,
+ keep_top_k=200,
+ return_index=True)
+
+ paddle.disable_static()
+
+
+class TestBoxCoder(LayerTest):
+ def test_box_coder(self):
+
+ prior_box_np = np.random.random((81, 4)).astype('float32')
+ prior_box_var_np = np.random.random((81, 4)).astype('float32')
+ target_box_np = np.random.random((20, 81, 4)).astype('float32')
+
+ # static
+ with self.static_graph():
+ prior_box = paddle.static.data(
+ name='prior_box', shape=[81, 4], dtype='float32')
+ prior_box_var = paddle.static.data(
+ name='prior_box_var', shape=[81, 4], dtype='float32')
+ target_box = paddle.static.data(
+ name='target_box', shape=[20, 81, 4], dtype='float32')
+
+ boxes = ops.box_coder(
+ prior_box=prior_box,
+ prior_box_var=prior_box_var,
+ target_box=target_box,
+ code_type="decode_center_size",
+ box_normalized=False)
+
+ boxes_np, = self.get_static_graph_result(
+ feed={
+ 'prior_box': prior_box_np,
+ 'prior_box_var': prior_box_var_np,
+ 'target_box': target_box_np,
+ },
+ fetch_list=[boxes],
+ with_lod=False)
+
+ # dygraph
+ with self.dynamic_graph():
+ prior_box_dy = paddle.to_tensor(prior_box_np)
+ prior_box_var_dy = paddle.to_tensor(prior_box_var_np)
+ target_box_dy = paddle.to_tensor(target_box_np)
+
+ boxes_dy = ops.box_coder(
+ prior_box=prior_box_dy,
+ prior_box_var=prior_box_var_dy,
+ target_box=target_box_dy,
+ code_type="decode_center_size",
+ box_normalized=False)
+
+ boxes_dy_np = boxes_dy.numpy()
+
+ self.assertTrue(np.array_equal(boxes_np, boxes_dy_np))
+
+ def test_box_coder_error(self):
+ with self.static_graph():
+ prior_box = paddle.static.data(
+ name='prior_box', shape=[81, 4], dtype='int32')
+ prior_box_var = paddle.static.data(
+ name='prior_box_var', shape=[81, 4], dtype='float32')
+ target_box = paddle.static.data(
+ name='target_box', shape=[20, 81, 4], dtype='float32')
+
+ self.assertRaises(TypeError, ops.box_coder, prior_box,
+ prior_box_var, target_box)
+
+ paddle.disable_static()
+
+
+class TestGenerateProposals(LayerTest):
+ def test_generate_proposals(self):
+ scores_np = np.random.rand(2, 3, 4, 4).astype('float32')
+ bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32')
+ im_shape_np = np.array([[8, 8], [6, 6]]).astype('float32')
+ anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4),
+ [4, 4, 3, 4]).astype('float32')
+ variances_np = np.ones((4, 4, 3, 4)).astype('float32')
+
+ with self.static_graph():
+ scores = paddle.static.data(
+ name='scores', shape=[2, 3, 4, 4], dtype='float32')
+ bbox_deltas = paddle.static.data(
+ name='bbox_deltas', shape=[2, 12, 4, 4], dtype='float32')
+ im_shape = paddle.static.data(
+ name='im_shape', shape=[2, 2], dtype='float32')
+ anchors = paddle.static.data(
+ name='anchors', shape=[4, 4, 3, 4], dtype='float32')
+ variances = paddle.static.data(
+ name='var', shape=[4, 4, 3, 4], dtype='float32')
+ rois, roi_probs, rois_num = ops.generate_proposals(
+ scores,
+ bbox_deltas,
+ im_shape,
+ anchors,
+ variances,
+ pre_nms_top_n=10,
+ post_nms_top_n=5,
+ return_rois_num=True)
+ rois_stat, roi_probs_stat, rois_num_stat = self.get_static_graph_result(
+ feed={
+ 'scores': scores_np,
+ 'bbox_deltas': bbox_deltas_np,
+ 'im_shape': im_shape_np,
+ 'anchors': anchors_np,
+ 'var': variances_np
+ },
+ fetch_list=[rois, roi_probs, rois_num],
+ with_lod=True)
+
+ with self.dynamic_graph():
+ scores_dy = paddle.to_tensor(scores_np)
+ bbox_deltas_dy = paddle.to_tensor(bbox_deltas_np)
+ im_shape_dy = paddle.to_tensor(im_shape_np)
+ anchors_dy = paddle.to_tensor(anchors_np)
+ variances_dy = paddle.to_tensor(variances_np)
+ rois, roi_probs, rois_num = ops.generate_proposals(
+ scores_dy,
+ bbox_deltas_dy,
+ im_shape_dy,
+ anchors_dy,
+ variances_dy,
+ pre_nms_top_n=10,
+ post_nms_top_n=5,
+ return_rois_num=True)
+ rois_dy = rois.numpy()
+ roi_probs_dy = roi_probs.numpy()
+ rois_num_dy = rois_num.numpy()
+
+ self.assertTrue(np.array_equal(np.array(rois_stat), rois_dy))
+ self.assertTrue(np.array_equal(np.array(roi_probs_stat), roi_probs_dy))
+ self.assertTrue(np.array_equal(np.array(rois_num_stat), rois_num_dy))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/paddlers/models/ppdet/modeling/tests/test_yolov3_loss.py b/paddlers/models/ppdet/modeling/tests/test_yolov3_loss.py
new file mode 100644
index 0000000..af41c97
--- /dev/null
+++ b/paddlers/models/ppdet/modeling/tests/test_yolov3_loss.py
@@ -0,0 +1,406 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import unittest
+
+import paddle
+import paddle.nn.functional as F
+# add python path of PadleDetection to sys.path
+import os
+import sys
+parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 4)))
+if parent_path not in sys.path:
+ sys.path.append(parent_path)
+
+from paddlers.models.ppdet.modeling.losses import YOLOv3Loss
+from paddlers.models.ppdet.data.transform.op_helper import jaccard_overlap
+from paddlers.models.ppdet.modeling.bbox_utils import iou_similarity
+import numpy as np
+np.random.seed(0)
+
+
+def _split_output(output, an_num, num_classes):
+ """
+ Split output feature map to x, y, w, h, objectness, classification
+ along channel dimension
+ """
+ x = paddle.strided_slice(
+ output,
+ axes=[1],
+ starts=[0],
+ ends=[output.shape[1]],
+ strides=[5 + num_classes])
+ y = paddle.strided_slice(
+ output,
+ axes=[1],
+ starts=[1],
+ ends=[output.shape[1]],
+ strides=[5 + num_classes])
+ w = paddle.strided_slice(
+ output,
+ axes=[1],
+ starts=[2],
+ ends=[output.shape[1]],
+ strides=[5 + num_classes])
+ h = paddle.strided_slice(
+ output,
+ axes=[1],
+ starts=[3],
+ ends=[output.shape[1]],
+ strides=[5 + num_classes])
+ obj = paddle.strided_slice(
+ output,
+ axes=[1],
+ starts=[4],
+ ends=[output.shape[1]],
+ strides=[5 + num_classes])
+ clss = []
+ stride = output.shape[1] // an_num
+ for m in range(an_num):
+ clss.append(
+ paddle.slice(
+ output,
+ axes=[1],
+ starts=[stride * m + 5],
+ ends=[stride * m + 5 + num_classes]))
+ cls = paddle.transpose(paddle.stack(clss, axis=1), perm=[0, 1, 3, 4, 2])
+ return (x, y, w, h, obj, cls)
+
+
+def _split_target(target):
+ """
+ split target to x, y, w, h, objectness, classification
+ along dimension 2
+ target is in shape [N, an_num, 6 + class_num, H, W]
+ """
+ tx = target[:, :, 0, :, :]
+ ty = target[:, :, 1, :, :]
+ tw = target[:, :, 2, :, :]
+ th = target[:, :, 3, :, :]
+ tscale = target[:, :, 4, :, :]
+ tobj = target[:, :, 5, :, :]
+ tcls = paddle.transpose(target[:, :, 6:, :, :], perm=[0, 1, 3, 4, 2])
+ tcls.stop_gradient = True
+ return (tx, ty, tw, th, tscale, tobj, tcls)
+
+
+def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes,
+ downsample, ignore_thresh, scale_x_y):
+ # A prediction bbox overlap any gt_bbox over ignore_thresh,
+ # objectness loss will be ignored, process as follows:
+ # 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here
+ # NOTE: img_size is set as 1.0 to get noramlized pred bbox
+ bbox, prob = paddle.vision.ops.yolo_box(
+ x=output,
+ img_size=paddle.ones(
+ shape=[batch_size, 2], dtype="int32"),
+ anchors=anchors,
+ class_num=num_classes,
+ conf_thresh=0.,
+ downsample_ratio=downsample,
+ clip_bbox=False,
+ scale_x_y=scale_x_y)
+ # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox
+ # and gt bbox in each sample
+ if batch_size > 1:
+ preds = paddle.split(bbox, batch_size, axis=0)
+ gts = paddle.split(gt_box, batch_size, axis=0)
+ else:
+ preds = [bbox]
+ gts = [gt_box]
+ probs = [prob]
+ ious = []
+ for pred, gt in zip(preds, gts):
+
+ def box_xywh2xyxy(box):
+ x = box[:, 0]
+ y = box[:, 1]
+ w = box[:, 2]
+ h = box[:, 3]
+ return paddle.stack(
+ [
+ x - w / 2.,
+ y - h / 2.,
+ x + w / 2.,
+ y + h / 2.,
+ ], axis=1)
+
+ pred = paddle.squeeze(pred, axis=[0])
+ gt = box_xywh2xyxy(paddle.squeeze(gt, axis=[0]))
+ ious.append(iou_similarity(pred, gt))
+ iou = paddle.stack(ious, axis=0)
+ # 3. Get iou_mask by IoU between gt bbox and prediction bbox,
+ # Get obj_mask by tobj(holds gt_score), calculate objectness loss
+ max_iou = paddle.max(iou, axis=-1)
+ iou_mask = paddle.cast(max_iou <= ignore_thresh, dtype="float32")
+ output_shape = paddle.shape(output)
+ an_num = len(anchors) // 2
+ iou_mask = paddle.reshape(iou_mask, (-1, an_num, output_shape[2],
+ output_shape[3]))
+ iou_mask.stop_gradient = True
+ # NOTE: tobj holds gt_score, obj_mask holds object existence mask
+ obj_mask = paddle.cast(tobj > 0., dtype="float32")
+ obj_mask.stop_gradient = True
+ # For positive objectness grids, objectness loss should be calculated
+ # For negative objectness grids, objectness loss is calculated only iou_mask == 1.0
+ obj_sigmoid = F.sigmoid(obj)
+ loss_obj = F.binary_cross_entropy(obj_sigmoid, obj_mask, reduction='none')
+ loss_obj_pos = paddle.sum(loss_obj * tobj, axis=[1, 2, 3])
+ loss_obj_neg = paddle.sum(loss_obj * (1.0 - obj_mask) * iou_mask,
+ axis=[1, 2, 3])
+ return loss_obj_pos, loss_obj_neg
+
+
+def fine_grained_loss(output,
+ target,
+ gt_box,
+ batch_size,
+ num_classes,
+ anchors,
+ ignore_thresh,
+ downsample,
+ scale_x_y=1.,
+ eps=1e-10):
+ an_num = len(anchors) // 2
+ x, y, w, h, obj, cls = _split_output(output, an_num, num_classes)
+ tx, ty, tw, th, tscale, tobj, tcls = _split_target(target)
+
+ tscale_tobj = tscale * tobj
+
+ scale_x_y = scale_x_y
+
+ if (abs(scale_x_y - 1.0) < eps):
+ x = F.sigmoid(x)
+ y = F.sigmoid(y)
+ loss_x = F.binary_cross_entropy(x, tx, reduction='none') * tscale_tobj
+ loss_x = paddle.sum(loss_x, axis=[1, 2, 3])
+ loss_y = F.binary_cross_entropy(y, ty, reduction='none') * tscale_tobj
+ loss_y = paddle.sum(loss_y, axis=[1, 2, 3])
+ else:
+ dx = scale_x_y * F.sigmoid(x) - 0.5 * (scale_x_y - 1.0)
+ dy = scale_x_y * F.sigmoid(y) - 0.5 * (scale_x_y - 1.0)
+ loss_x = paddle.abs(dx - tx) * tscale_tobj
+ loss_x = paddle.sum(loss_x, axis=[1, 2, 3])
+ loss_y = paddle.abs(dy - ty) * tscale_tobj
+ loss_y = paddle.sum(loss_y, axis=[1, 2, 3])
+
+ # NOTE: we refined loss function of (w, h) as L1Loss
+ loss_w = paddle.abs(w - tw) * tscale_tobj
+ loss_w = paddle.sum(loss_w, axis=[1, 2, 3])
+ loss_h = paddle.abs(h - th) * tscale_tobj
+ loss_h = paddle.sum(loss_h, axis=[1, 2, 3])
+
+ loss_obj_pos, loss_obj_neg = _calc_obj_loss(
+ output, obj, tobj, gt_box, batch_size, anchors, num_classes, downsample,
+ ignore_thresh, scale_x_y)
+
+ cls = F.sigmoid(cls)
+ loss_cls = F.binary_cross_entropy(cls, tcls, reduction='none')
+ tobj = paddle.unsqueeze(tobj, axis=-1)
+
+ loss_cls = paddle.multiply(loss_cls, tobj)
+ loss_cls = paddle.sum(loss_cls, axis=[1, 2, 3, 4])
+
+ loss_xys = paddle.mean(loss_x + loss_y)
+ loss_whs = paddle.mean(loss_w + loss_h)
+ loss_objs = paddle.mean(loss_obj_pos + loss_obj_neg)
+ loss_clss = paddle.mean(loss_cls)
+
+ losses_all = {
+ "loss_xy": paddle.sum(loss_xys),
+ "loss_wh": paddle.sum(loss_whs),
+ "loss_loc": paddle.sum(loss_xys) + paddle.sum(loss_whs),
+ "loss_obj": paddle.sum(loss_objs),
+ "loss_cls": paddle.sum(loss_clss),
+ }
+ return losses_all, x, y, tx, ty
+
+
+def gt2yolotarget(gt_bbox, gt_class, gt_score, anchors, mask, num_classes, size,
+ stride):
+ grid_h, grid_w = size
+ h, w = grid_h * stride, grid_w * stride
+ an_hw = np.array(anchors) / np.array([[w, h]])
+ target = np.zeros(
+ (len(mask), 6 + num_classes, grid_h, grid_w), dtype=np.float32)
+ for b in range(gt_bbox.shape[0]):
+ gx, gy, gw, gh = gt_bbox[b, :]
+ cls = gt_class[b]
+ score = gt_score[b]
+ if gw <= 0. or gh <= 0. or score <= 0.:
+ continue
+
+ # find best match anchor index
+ best_iou = 0.
+ best_idx = -1
+ for an_idx in range(an_hw.shape[0]):
+ iou = jaccard_overlap([0., 0., gw, gh],
+ [0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
+ if iou > best_iou:
+ best_iou = iou
+ best_idx = an_idx
+
+ gi = int(gx * grid_w)
+ gj = int(gy * grid_h)
+
+ # gtbox should be regresed in this layes if best match
+ # anchor index in anchor mask of this layer
+ if best_idx in mask:
+ best_n = mask.index(best_idx)
+
+ # x, y, w, h, scale
+ target[best_n, 0, gj, gi] = gx * grid_w - gi
+ target[best_n, 1, gj, gi] = gy * grid_h - gj
+ target[best_n, 2, gj, gi] = np.log(gw * w / anchors[best_idx][0])
+ target[best_n, 3, gj, gi] = np.log(gh * h / anchors[best_idx][1])
+ target[best_n, 4, gj, gi] = 2.0 - gw * gh
+
+ # objectness record gt_score
+ # if target[best_n, 5, gj, gi] > 0:
+ # print('find 1 duplicate')
+ target[best_n, 5, gj, gi] = score
+
+ # classification
+ target[best_n, 6 + cls, gj, gi] = 1.
+
+ return target
+
+
+class TestYolov3LossOp(unittest.TestCase):
+ def setUp(self):
+ self.initTestCase()
+ x = np.random.uniform(0, 1, self.x_shape).astype('float64')
+ gtbox = np.random.random(size=self.gtbox_shape).astype('float64')
+ gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2])
+ gtmask = np.random.randint(0, 2, self.gtbox_shape[:2])
+ gtbox = gtbox * gtmask[:, :, np.newaxis]
+ gtlabel = gtlabel * gtmask
+
+ gtscore = np.ones(self.gtbox_shape[:2]).astype('float64')
+ if self.gtscore:
+ gtscore = np.random.random(self.gtbox_shape[:2]).astype('float64')
+
+ target = []
+ for box, label, score in zip(gtbox, gtlabel, gtscore):
+ target.append(
+ gt2yolotarget(box, label, score, self.anchors, self.anchor_mask,
+ self.class_num, (self.h, self.w
+ ), self.downsample_ratio))
+
+ self.target = np.array(target).astype('float64')
+
+ self.mask_anchors = []
+ for i in self.anchor_mask:
+ self.mask_anchors.extend(self.anchors[i])
+ self.x = x
+ self.gtbox = gtbox
+ self.gtlabel = gtlabel
+ self.gtscore = gtscore
+
+ def initTestCase(self):
+ self.b = 8
+ self.h = 19
+ self.w = 19
+ self.anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
+ [59, 119], [116, 90], [156, 198], [373, 326]]
+ self.anchor_mask = [6, 7, 8]
+ self.na = len(self.anchor_mask)
+ self.class_num = 80
+ self.ignore_thresh = 0.7
+ self.downsample_ratio = 32
+ self.x_shape = (self.b, len(self.anchor_mask) * (5 + self.class_num),
+ self.h, self.w)
+ self.gtbox_shape = (self.b, 40, 4)
+ self.gtscore = True
+ self.use_label_smooth = False
+ self.scale_x_y = 1.
+
+ def test_loss(self):
+ x, gtbox, gtlabel, gtscore, target = self.x, self.gtbox, self.gtlabel, self.gtscore, self.target
+ yolo_loss = YOLOv3Loss(
+ ignore_thresh=self.ignore_thresh,
+ label_smooth=self.use_label_smooth,
+ num_classes=self.class_num,
+ downsample=self.downsample_ratio,
+ scale_x_y=self.scale_x_y)
+ x = paddle.to_tensor(x.astype(np.float32))
+ gtbox = paddle.to_tensor(gtbox.astype(np.float32))
+ gtlabel = paddle.to_tensor(gtlabel.astype(np.float32))
+ gtscore = paddle.to_tensor(gtscore.astype(np.float32))
+ t = paddle.to_tensor(target.astype(np.float32))
+ anchor = [self.anchors[i] for i in self.anchor_mask]
+ (yolo_loss1, px, py, tx, ty) = fine_grained_loss(
+ output=x,
+ target=t,
+ gt_box=gtbox,
+ batch_size=self.b,
+ num_classes=self.class_num,
+ anchors=self.mask_anchors,
+ ignore_thresh=self.ignore_thresh,
+ downsample=self.downsample_ratio,
+ scale_x_y=self.scale_x_y)
+ yolo_loss2 = yolo_loss.yolov3_loss(
+ x, t, gtbox, anchor, self.downsample_ratio, self.scale_x_y)
+ for k in yolo_loss2:
+ self.assertAlmostEqual(
+ yolo_loss1[k].numpy()[0],
+ yolo_loss2[k].numpy()[0],
+ delta=1e-2,
+ msg=k)
+
+
+class TestYolov3LossNoGTScore(TestYolov3LossOp):
+ def initTestCase(self):
+ self.b = 1
+ self.h = 76
+ self.w = 76
+ self.anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
+ [59, 119], [116, 90], [156, 198], [373, 326]]
+ self.anchor_mask = [0, 1, 2]
+ self.na = len(self.anchor_mask)
+ self.class_num = 80
+ self.ignore_thresh = 0.7
+ self.downsample_ratio = 8
+ self.x_shape = (self.b, len(self.anchor_mask) * (5 + self.class_num),
+ self.h, self.w)
+ self.gtbox_shape = (self.b, 40, 4)
+ self.gtscore = False
+ self.use_label_smooth = False
+ self.scale_x_y = 1.
+
+
+class TestYolov3LossWithScaleXY(TestYolov3LossOp):
+ def initTestCase(self):
+ self.b = 5
+ self.h = 38
+ self.w = 38
+ self.anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
+ [59, 119], [116, 90], [156, 198], [373, 326]]
+ self.anchor_mask = [3, 4, 5]
+ self.na = len(self.anchor_mask)
+ self.class_num = 80
+ self.ignore_thresh = 0.7
+ self.downsample_ratio = 16
+ self.x_shape = (self.b, len(self.anchor_mask) * (5 + self.class_num),
+ self.h, self.w)
+ self.gtbox_shape = (self.b, 40, 4)
+ self.gtscore = True
+ self.use_label_smooth = False
+ self.scale_x_y = 1.2
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/paddlers/models/ppdet/modeling/transformers/deformable_transformer.py b/paddlers/models/ppdet/modeling/transformers/deformable_transformer.py
index 76ff1bb..5c79004 100644
--- a/paddlers/models/ppdet/modeling/transformers/deformable_transformer.py
+++ b/paddlers/models/ppdet/modeling/transformers/deformable_transformer.py
@@ -13,7 +13,7 @@
# limitations under the License.
#
# Modified from Deformable-DETR (https://github.com/fundamentalvision/Deformable-DETR)
-# Copyright (c) 2022 SenseTime. All Rights Reserved.
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
diff --git a/paddlers/models/ppdet/optimizer/__init__.py b/paddlers/models/ppdet/optimizer/__init__.py
new file mode 100644
index 0000000..6173792
--- /dev/null
+++ b/paddlers/models/ppdet/optimizer/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .optimizer import *
+from .ema import ModelEMA
diff --git a/paddlers/models/ppdet/optimizer/adamw.py b/paddlers/models/ppdet/optimizer/adamw.py
new file mode 100644
index 0000000..821135d
--- /dev/null
+++ b/paddlers/models/ppdet/optimizer/adamw.py
@@ -0,0 +1,244 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle.optimizer import AdamW
+from functools import partial
+import re
+
+
+def layerwise_lr_decay(decay_rate, name_dict, n_layers, param):
+ """
+ Args:
+ decay_rate (float):
+ The layer-wise decay ratio.
+ name_dict (dict):
+ The keys of name_dict is dynamic name of model while the value
+ of name_dict is static name.
+ Use model.named_parameters() to get name_dict.
+ n_layers (int):
+ Total number of layers in the transformer encoder.
+ """
+ ratio = 1.0
+ static_name = name_dict[param.name]
+ if 'blocks.' in static_name or 'layers.' in static_name:
+ idx_1 = static_name.find('blocks.')
+ idx_2 = static_name.find('layers.')
+ assert any([x >= 0 for x in [idx_1, idx_2]]), ''
+ idx = idx_1 if idx_1 >= 0 else idx_2
+ # idx = re.findall('[blocks|layers]\.(\d+)\.', static_name)[0]
+
+ layer = int(static_name[idx:].split('.')[1])
+ ratio = decay_rate**(n_layers - layer)
+
+ elif 'cls_token' in static_name or 'patch_embed' in static_name:
+ ratio = decay_rate**(n_layers + 1)
+
+ param.optimize_attr['learning_rate'] *= ratio
+
+
+class AdamWDL(AdamW):
+ r"""
+ The AdamWDL optimizer is implemented based on the AdamW Optimization with dynamic lr setting.
+ Generally it's used for transformer model.
+
+ We use "layerwise_lr_decay" as default dynamic lr setting method of AdamWDL.
+ “Layer-wise decay” means exponentially decaying the learning rates of individual
+ layers in a top-down manner. For example, suppose the 24-th layer uses a learning
+ rate l, and the Layer-wise decay rate is α, then the learning rate of layer m
+ is lα^(24-m). See more details on: https://arxiv.org/abs/1906.08237.
+
+ .. math::
+ & t = t + 1
+
+ & moment\_1\_out = {\beta}_1 * moment\_1 + (1 - {\beta}_1) * grad
+
+ & moment\_2\_out = {\beta}_2 * moment\_2 + (1 - {\beta}_2) * grad * grad
+
+ & learning\_rate = learning\_rate * \frac{\sqrt{1 - {\beta}_2^t}}{1 - {\beta}_1^t}
+
+ & param\_out = param - learning\_rate * (\frac{moment\_1}{\sqrt{moment\_2} + \epsilon} + \lambda * param)
+
+ Args:
+ learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
+ It can be a float value or a LRScheduler. The default value is 0.001.
+ beta1 (float, optional): The exponential decay rate for the 1st moment estimates.
+ It should be a float number or a Tensor with shape [1] and data type as float32.
+ The default value is 0.9.
+ beta2 (float, optional): The exponential decay rate for the 2nd moment estimates.
+ It should be a float number or a Tensor with shape [1] and data type as float32.
+ The default value is 0.999.
+ epsilon (float, optional): A small float value for numerical stability.
+ It should be a float number or a Tensor with shape [1] and data type as float32.
+ The default value is 1e-08.
+ parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
+ This parameter is required in dygraph mode. \
+ The default value is None in static mode, at this time all parameters will be updated.
+ weight_decay (float, optional): The weight decay coefficient, it can be float or Tensor. The default value is 0.01.
+ apply_decay_param_fun (function|None, optional): If it is not None,
+ only tensors that makes apply_decay_param_fun(Tensor.name)==True
+ will be updated. It only works when we want to specify tensors.
+ Default: None.
+ grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
+ some derived class of ``GradientClipBase`` . There are three cliping strategies
+ ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
+ :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
+ lazy_mode (bool, optional): The official Adam algorithm has two moving-average accumulators.
+ The accumulators are updated at every step. Every element of the two moving-average
+ is updated in both dense mode and sparse mode. If the size of parameter is very large,
+ then the update may be very slow. The lazy mode only update the element that has
+ gradient in current mini-batch, so it will be much more faster. But this mode has
+ different semantics with the original Adam algorithm and may lead to different result.
+ The default value is False.
+ multi_precision (bool, optional): Whether to use multi-precision during weight updating. Default is false.
+ layerwise_decay (float, optional): The layer-wise decay ratio. Defaults to 1.0.
+ n_layers (int, optional): The total number of encoder layers. Defaults to 12.
+ set_param_lr_fun (function|None, optional): If it's not None, set_param_lr_fun() will set the the parameter
+ learning rate before it executes Adam Operator. Defaults to :ref:`layerwise_lr_decay`.
+ name_dict (dict, optional): The keys of name_dict is dynamic name of model while the value
+ of name_dict is static name. Use model.named_parameters() to get name_dict.
+ name (str, optional): Normally there is no need for user to set this property.
+ For more information, please refer to :ref:`api_guide_Name`.
+ The default value is None.
+
+ Examples:
+ .. code-block:: python
+
+ import paddle
+ from paddlenlp.ops.optimizer import AdamWDL
+ def simple_lr_setting(decay_rate, name_dict, n_layers, param):
+ ratio = 1.0
+ static_name = name_dict[param.name]
+ if "weight" in static_name:
+ ratio = decay_rate**0.5
+ param.optimize_attr["learning_rate"] *= ratio
+
+ linear = paddle.nn.Linear(10, 10)
+
+ name_dict = dict()
+ for n, p in linear.named_parameters():
+ name_dict[p.name] = n
+
+ inp = paddle.rand([10,10], dtype="float32")
+ out = linear(inp)
+ loss = paddle.mean(out)
+
+ adamwdl = AdamWDL(
+ learning_rate=1e-4,
+ parameters=linear.parameters(),
+ set_param_lr_fun=simple_lr_setting,
+ layerwise_decay=0.8,
+ name_dict=name_dict)
+
+ loss.backward()
+ adamwdl.step()
+ adamwdl.clear_grad()
+ """
+
+ def __init__(self,
+ learning_rate=0.001,
+ beta1=0.9,
+ beta2=0.999,
+ epsilon=1e-8,
+ parameters=None,
+ weight_decay=0.01,
+ apply_decay_param_fun=None,
+ grad_clip=None,
+ lazy_mode=False,
+ multi_precision=False,
+ layerwise_decay=1.0,
+ n_layers=12,
+ set_param_lr_func=None,
+ name_dict=None,
+ name=None):
+ if not isinstance(layerwise_decay, float):
+ raise TypeError("coeff should be float or Tensor.")
+ self.layerwise_decay = layerwise_decay
+ self.n_layers = n_layers
+ self.set_param_lr_func = partial(
+ set_param_lr_func, layerwise_decay, name_dict,
+ n_layers) if set_param_lr_func is not None else set_param_lr_func
+ super(AdamWDL, self).__init__(
+ learning_rate=learning_rate,
+ parameters=parameters,
+ beta1=beta1,
+ beta2=beta2,
+ epsilon=epsilon,
+ grad_clip=grad_clip,
+ name=name,
+ apply_decay_param_fun=apply_decay_param_fun,
+ weight_decay=weight_decay,
+ lazy_mode=lazy_mode,
+ multi_precision=multi_precision)
+
+ def _append_optimize_op(self, block, param_and_grad):
+ if self.set_param_lr_func is None:
+ return super(AdamWDL, self)._append_optimize_op(block,
+ param_and_grad)
+
+ self._append_decoupled_weight_decay(block, param_and_grad)
+ prev_lr = param_and_grad[0].optimize_attr["learning_rate"]
+ self.set_param_lr_func(param_and_grad[0])
+ # excute Adam op
+ res = super(AdamW, self)._append_optimize_op(block, param_and_grad)
+ param_and_grad[0].optimize_attr["learning_rate"] = prev_lr
+ return res
+
+
+def build_adamwdl(model,
+ lr=1e-4,
+ weight_decay=0.05,
+ betas=(0.9, 0.999),
+ layer_decay=0.65,
+ num_layers=None,
+ filter_bias_and_bn=True,
+ skip_decay_names=None,
+ set_param_lr_func='layerwise_lr_decay'):
+
+ if skip_decay_names and filter_bias_and_bn:
+ decay_dict = {
+ param.name: not (len(param.shape) == 1 or name.endswith('.bias') or
+ any([_n in name for _n in skip_decay_names]))
+ for name, param in model.named_parameters()
+ }
+ parameters = [p for p in model.parameters()]
+
+ else:
+ parameters = model.parameters()
+
+ opt_args = dict(
+ parameters=parameters, learning_rate=lr, weight_decay=weight_decay)
+
+ if decay_dict is not None:
+ opt_args['apply_decay_param_fun'] = lambda n: decay_dict[n]
+
+ if isinstance(set_param_lr_func, str):
+ set_param_lr_func = eval(set_param_lr_func)
+ opt_args['set_param_lr_func'] = set_param_lr_func
+
+ opt_args['beta1'] = betas[0]
+ opt_args['beta2'] = betas[1]
+
+ opt_args['layerwise_decay'] = layer_decay
+ name_dict = {p.name: n for n, p in model.named_parameters()}
+
+ opt_args['name_dict'] = name_dict
+ opt_args['n_layers'] = num_layers
+
+ optimizer = AdamWDL(**opt_args)
+
+ return optimizer
diff --git a/paddlers/models/ppdet/optimizer/ema.py b/paddlers/models/ppdet/optimizer/ema.py
new file mode 100644
index 0000000..927d357
--- /dev/null
+++ b/paddlers/models/ppdet/optimizer/ema.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+import weakref
+
+
+class ModelEMA(object):
+ """
+ Exponential Weighted Average for Deep Neutal Networks
+ Args:
+ model (nn.Layer): Detector of model.
+ decay (int): The decay used for updating ema parameter.
+ Ema's parameter are updated with the formula:
+ `ema_param = decay * ema_param + (1 - decay) * cur_param`.
+ Defaults is 0.9998.
+ ema_decay_type (str): type in ['threshold', 'normal', 'exponential'],
+ 'threshold' as default.
+ cycle_epoch (int): The epoch of interval to reset ema_param and
+ step. Defaults is -1, which means not reset. Its function is to
+ add a regular effect to ema, which is set according to experience
+ and is effective when the total training epoch is large.
+ ema_black_list (set|list|tuple, optional): The custom EMA black_list.
+ Blacklist of weight names that will not participate in EMA
+ calculation. Default: None.
+ """
+
+ def __init__(self,
+ model,
+ decay=0.9998,
+ ema_decay_type='threshold',
+ cycle_epoch=-1,
+ ema_black_list=None):
+ self.step = 0
+ self.epoch = 0
+ self.decay = decay
+ self.ema_decay_type = ema_decay_type
+ self.cycle_epoch = cycle_epoch
+ self.ema_black_list = self._match_ema_black_list(
+ model.state_dict().keys(), ema_black_list)
+ self.state_dict = dict()
+ for k, v in model.state_dict().items():
+ if k in self.ema_black_list:
+ self.state_dict[k] = v
+ else:
+ self.state_dict[k] = paddle.zeros_like(v)
+
+ self._model_state = {
+ k: weakref.ref(p)
+ for k, p in model.state_dict().items()
+ }
+
+ def reset(self):
+ self.step = 0
+ self.epoch = 0
+ for k, v in self.state_dict.items():
+ if k in self.ema_black_list:
+ self.state_dict[k] = v
+ else:
+ self.state_dict[k] = paddle.zeros_like(v)
+
+ def resume(self, state_dict, step=0):
+ for k, v in state_dict.items():
+ if k in self.state_dict:
+ if self.state_dict[k].dtype == v.dtype:
+ self.state_dict[k] = v
+ else:
+ self.state_dict[k] = v.astype(self.state_dict[k].dtype)
+ self.step = step
+
+ def update(self, model=None):
+ if self.ema_decay_type == 'threshold':
+ decay = min(self.decay, (1 + self.step) / (10 + self.step))
+ elif self.ema_decay_type == 'exponential':
+ decay = self.decay * (1 - math.exp(-(self.step + 1) / 2000))
+ else:
+ decay = self.decay
+ self._decay = decay
+
+ if model is not None:
+ model_dict = model.state_dict()
+ else:
+ model_dict = {k: p() for k, p in self._model_state.items()}
+ assert all(
+ [v is not None for _, v in model_dict.items()]), 'python gc.'
+
+ for k, v in self.state_dict.items():
+ if k not in self.ema_black_list:
+ v = decay * v + (1 - decay) * model_dict[k]
+ v.stop_gradient = True
+ self.state_dict[k] = v
+ self.step += 1
+
+ def apply(self):
+ if self.step == 0:
+ return self.state_dict
+ state_dict = dict()
+ for k, v in self.state_dict.items():
+ if k in self.ema_black_list:
+ v.stop_gradient = True
+ state_dict[k] = v
+ else:
+ if self.ema_decay_type != 'exponential':
+ v = v / (1 - self._decay**self.step)
+ v.stop_gradient = True
+ state_dict[k] = v
+ self.epoch += 1
+ if self.cycle_epoch > 0 and self.epoch == self.cycle_epoch:
+ self.reset()
+
+ return state_dict
+
+ def _match_ema_black_list(self, weight_name, ema_black_list=None):
+ out_list = set()
+ if ema_black_list:
+ for name in weight_name:
+ for key in ema_black_list:
+ if key in name:
+ out_list.add(name)
+ return out_list
diff --git a/paddlers/models/ppdet/optimizer.py b/paddlers/models/ppdet/optimizer/optimizer.py
similarity index 59%
rename from paddlers/models/ppdet/optimizer.py
rename to paddlers/models/ppdet/optimizer/optimizer.py
index c13df2d..7566dd8 100644
--- a/paddlers/models/ppdet/optimizer.py
+++ b/paddlers/models/ppdet/optimizer/optimizer.py
@@ -16,6 +16,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import sys
import math
import paddle
import paddle.nn as nn
@@ -24,6 +25,9 @@ import paddle.optimizer as optimizer
import paddle.regularizer as regularizer
from paddlers.models.ppdet.core.workspace import register, serializable
+import copy
+
+from .adamw import AdamWDL, build_adamwdl
__all__ = ['LearningRate', 'OptimizerBuilder']
@@ -40,12 +44,21 @@ class CosineDecay(object):
max_epochs (int): max epochs for the training process.
if you commbine cosine decay with warmup, it is recommended that
the max_iters is much larger than the warmup iter
+ use_warmup (bool): whether to use warmup. Default: True.
+ min_lr_ratio (float): minimum learning rate ratio. Default: 0.
+ last_plateau_epochs (int): use minimum learning rate in
+ the last few epochs. Default: 0.
"""
- def __init__(self, max_epochs=1000, use_warmup=True, eta_min=0):
+ def __init__(self,
+ max_epochs=1000,
+ use_warmup=True,
+ min_lr_ratio=0.,
+ last_plateau_epochs=0):
self.max_epochs = max_epochs
self.use_warmup = use_warmup
- self.eta_min = eta_min
+ self.min_lr_ratio = min_lr_ratio
+ self.last_plateau_epochs = last_plateau_epochs
def __call__(self,
base_lr=None,
@@ -55,20 +68,38 @@ class CosineDecay(object):
assert base_lr is not None, "either base LR or values should be provided"
max_iters = self.max_epochs * int(step_per_epoch)
-
+ last_plateau_iters = self.last_plateau_epochs * int(step_per_epoch)
+ min_lr = base_lr * self.min_lr_ratio
if boundary is not None and value is not None and self.use_warmup:
+ # use warmup
warmup_iters = len(boundary)
for i in range(int(boundary[-1]), max_iters):
boundary.append(i)
-
- decayed_lr = base_lr * 0.5 * (math.cos(
- (i - warmup_iters) * math.pi /
- (max_iters - warmup_iters)) + 1)
- value.append(decayed_lr)
+ if i < max_iters - last_plateau_iters:
+ decayed_lr = min_lr + (base_lr - min_lr) * 0.5 * (math.cos(
+ (i - warmup_iters) * math.pi /
+ (max_iters - warmup_iters - last_plateau_iters)) + 1)
+ value.append(decayed_lr)
+ else:
+ value.append(min_lr)
+ return optimizer.lr.PiecewiseDecay(boundary, value)
+ elif last_plateau_iters > 0:
+ # not use warmup, but set `last_plateau_epochs` > 0
+ boundary = []
+ value = []
+ for i in range(max_iters):
+ if i < max_iters - last_plateau_iters:
+ decayed_lr = min_lr + (base_lr - min_lr) * 0.5 * (math.cos(
+ i * math.pi / (max_iters - last_plateau_iters)) + 1)
+ value.append(decayed_lr)
+ else:
+ value.append(min_lr)
+ if i > 0:
+ boundary.append(i)
return optimizer.lr.PiecewiseDecay(boundary, value)
return optimizer.lr.CosineAnnealingDecay(
- base_lr, T_max=max_iters, eta_min=self.eta_min)
+ base_lr, T_max=max_iters, eta_min=min_lr)
@serializable
@@ -130,19 +161,25 @@ class LinearWarmup(object):
Args:
steps (int): warm up steps
start_factor (float): initial learning rate factor
+ epochs (int|None): use epochs as warm up steps, the priority
+ of `epochs` is higher than `steps`. Default: None.
"""
- def __init__(self, steps=500, start_factor=1. / 3):
+ def __init__(self, steps=500, start_factor=1. / 3, epochs=None):
super(LinearWarmup, self).__init__()
self.steps = steps
self.start_factor = start_factor
+ self.epochs = epochs
def __call__(self, base_lr, step_per_epoch):
boundary = []
value = []
- for i in range(self.steps + 1):
- if self.steps > 0:
- alpha = i / self.steps
+ warmup_steps = self.epochs * step_per_epoch \
+ if self.epochs is not None else self.steps
+ warmup_steps = max(warmup_steps, 1)
+ for i in range(warmup_steps + 1):
+ if warmup_steps > 0:
+ alpha = i / warmup_steps
factor = self.start_factor * (1 - alpha) + alpha
lr = base_lr * factor
value.append(lr)
@@ -152,25 +189,30 @@ class LinearWarmup(object):
@serializable
-class BurninWarmup(object):
+class ExpWarmup(object):
"""
- Warm up learning rate in burnin mode
+ Warm up learning rate in exponential mode
Args:
- steps (int): warm up steps
+ steps (int): warm up steps.
+ epochs (int|None): use epochs as warm up steps, the priority
+ of `epochs` is higher than `steps`. Default: None.
+ power (int): Exponential coefficient. Default: 2.
"""
- def __init__(self, steps=1000):
- super(BurninWarmup, self).__init__()
+ def __init__(self, steps=1000, epochs=None, power=2):
+ super(ExpWarmup, self).__init__()
self.steps = steps
+ self.epochs = epochs
+ self.power = power
def __call__(self, base_lr, step_per_epoch):
boundary = []
value = []
- burnin = min(self.steps, step_per_epoch)
- for i in range(burnin + 1):
- factor = (i * 1.0 / burnin)**4
- lr = base_lr * factor
- value.append(lr)
+ warmup_steps = self.epochs * step_per_epoch if self.epochs is not None else self.steps
+ warmup_steps = max(warmup_steps, 1)
+ for i in range(warmup_steps + 1):
+ factor = (i / float(warmup_steps))**self.power
+ value.append(base_lr * factor)
if i > 0:
boundary.append(i)
return boundary, value
@@ -192,7 +234,18 @@ class LearningRate(object):
schedulers=[PiecewiseDecay(), LinearWarmup()]):
super(LearningRate, self).__init__()
self.base_lr = base_lr
- self.schedulers = schedulers
+ self.schedulers = []
+
+ schedulers = copy.deepcopy(schedulers)
+ for sched in schedulers:
+ if isinstance(sched, dict):
+ # support dict sched instantiate
+ module = sys.modules[__name__]
+ type = sched.pop("name")
+ scheduler = getattr(module, type)(**sched)
+ self.schedulers.append(scheduler)
+ else:
+ self.schedulers.append(sched)
def __call__(self, step_per_epoch):
assert len(self.schedulers) >= 1
@@ -245,93 +298,53 @@ class OptimizerBuilder():
optim_args = self.optimizer.copy()
optim_type = optim_args['type']
del optim_args['type']
+
+ if optim_type == 'AdamWDL':
+ return build_adamwdl(model, lr=learning_rate, **optim_args)
+
if optim_type != 'AdamW':
optim_args['weight_decay'] = regularization
+
op = getattr(optimizer, optim_type)
- if 'without_weight_decay_params' in optim_args:
- keys = optim_args['without_weight_decay_params']
- params = [{
- 'params': [
- p for n, p in model.named_parameters()
- if any([k in n for k in keys])
- ],
- 'weight_decay': 0.
- }, {
- 'params': [
- p for n, p in model.named_parameters()
- if all([k not in n for k in keys])
- ]
- }]
- del optim_args['without_weight_decay_params']
+ if 'param_groups' in optim_args:
+ assert isinstance(optim_args['param_groups'], list), ''
+
+ param_groups = optim_args.pop('param_groups')
+
+ params, visited = [], []
+ for group in param_groups:
+ assert isinstance(group,
+ dict) and 'params' in group and isinstance(
+ group['params'], list), ''
+ _params = {
+ n: p
+ for n, p in model.named_parameters()
+ if any([k in n
+ for k in group['params']]) and p.trainable is True
+ }
+ _group = group.copy()
+ _group.update({'params': list(_params.values())})
+
+ params.append(_group)
+ visited.extend(list(_params.keys()))
+
+ ext_params = [
+ p for n, p in model.named_parameters()
+ if n not in visited and p.trainable is True
+ ]
+
+ if len(ext_params) < len(model.parameters()):
+ params.append({'params': ext_params})
+
+ elif len(ext_params) > len(model.parameters()):
+ raise RuntimeError
+
else:
- params = model.parameters()
+ _params = model.parameters()
+ params = [param for param in _params if param.trainable is True]
return op(learning_rate=learning_rate,
parameters=params,
grad_clip=grad_clip,
**optim_args)
-
-
-class ModelEMA(object):
- """
- Exponential Weighted Average for Deep Neutal Networks
- Args:
- model (nn.Layer): Detector of model.
- decay (int): The decay used for updating ema parameter.
- Ema's parameter are updated with the formula:
- `ema_param = decay * ema_param + (1 - decay) * cur_param`.
- Defaults is 0.9998.
- use_thres_step (bool): Whether set decay by thres_step or not
- cycle_epoch (int): The epoch of interval to reset ema_param and
- step. Defaults is -1, which means not reset. Its function is to
- add a regular effect to ema, which is set according to experience
- and is effective when the total training epoch is large.
- """
-
- def __init__(self,
- model,
- decay=0.9998,
- use_thres_step=False,
- cycle_epoch=-1):
- self.step = 0
- self.epoch = 0
- self.decay = decay
- self.state_dict = dict()
- for k, v in model.state_dict().items():
- self.state_dict[k] = paddle.zeros_like(v)
- self.use_thres_step = use_thres_step
- self.cycle_epoch = cycle_epoch
-
- def reset(self):
- self.step = 0
- self.epoch = 0
- for k, v in self.state_dict.items():
- self.state_dict[k] = paddle.zeros_like(v)
-
- def update(self, model):
- if self.use_thres_step:
- decay = min(self.decay, (1 + self.step) / (10 + self.step))
- else:
- decay = self.decay
- self._decay = decay
- model_dict = model.state_dict()
- for k, v in self.state_dict.items():
- v = decay * v + (1 - decay) * model_dict[k]
- v.stop_gradient = True
- self.state_dict[k] = v
- self.step += 1
-
- def apply(self):
- if self.step == 0:
- return self.state_dict
- state_dict = dict()
- for k, v in self.state_dict.items():
- v = v / (1 - self._decay**self.step)
- v.stop_gradient = True
- state_dict[k] = v
- self.epoch += 1
- if self.cycle_epoch > 0 and self.epoch == self.cycle_epoch:
- self.reset()
-
- return state_dict
diff --git a/paddlers/models/ppdet/slim/__init__.py b/paddlers/models/ppdet/slim/__init__.py
index 1cc1541..6a3f210 100644
--- a/paddlers/models/ppdet/slim/__init__.py
+++ b/paddlers/models/ppdet/slim/__init__.py
@@ -21,6 +21,7 @@ from .prune import *
from .quant import *
from .distill import *
from .unstructured_prune import *
+from .ofa import *
import yaml
from paddlers.models.ppdet.core.workspace import load_config
@@ -34,8 +35,21 @@ def build_slim_model(cfg, slim_cfg, mode='train'):
return cfg
if slim_load_cfg['slim'] == 'Distill':
- model = DistillModel(cfg, slim_cfg)
+ if "slim_method" in slim_load_cfg and slim_load_cfg[
+ 'slim_method'] == "FGD":
+ model = FGDDistillModel(cfg, slim_cfg)
+ else:
+ model = DistillModel(cfg, slim_cfg)
cfg['model'] = model
+ cfg['slim_type'] = cfg.slim
+ elif slim_load_cfg['slim'] == 'OFA':
+ load_config(slim_cfg)
+ model = create(cfg.architecture)
+ load_pretrain_weight(model, cfg.weights)
+ slim = create(cfg.slim)
+ cfg['slim'] = slim
+ cfg['model'] = slim(model, model.state_dict())
+ cfg['slim_type'] = cfg.slim
elif slim_load_cfg['slim'] == 'DistillPrune':
if mode == 'train':
model = DistillModel(cfg, slim_cfg)
@@ -55,9 +69,9 @@ def build_slim_model(cfg, slim_cfg, mode='train'):
load_config(slim_cfg)
load_pretrain_weight(model, cfg.weights)
slim = create(cfg.slim)
- cfg['slim_type'] = cfg.slim
- cfg['model'] = slim(model)
cfg['slim'] = slim
+ cfg['model'] = slim(model)
+ cfg['slim_type'] = cfg.slim
elif slim_load_cfg['slim'] == 'UnstructuredPruner':
load_config(slim_cfg)
slim = create(cfg.slim)
@@ -72,7 +86,7 @@ def build_slim_model(cfg, slim_cfg, mode='train'):
slim = create(cfg.slim)
cfg['slim_type'] = cfg.slim
# TODO: fix quant export model in framework.
- if mode == 'test' and slim_load_cfg['slim'] == 'QAT':
+ if mode == 'test' and 'QAT' in slim_load_cfg['slim']:
slim.quant_config['activation_preprocess_type'] = None
cfg['model'] = slim(model)
cfg['slim'] = slim
diff --git a/paddlers/models/ppdet/slim/distill.py b/paddlers/models/ppdet/slim/distill.py
index 2562363..9fb29b1 100644
--- a/paddlers/models/ppdet/slim/distill.py
+++ b/paddlers/models/ppdet/slim/distill.py
@@ -19,6 +19,7 @@ from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
+from paddle import ParamAttr
from paddlers.models.ppdet.core.workspace import register, create, load_config
from paddlers.models.ppdet.modeling import ops
@@ -63,6 +64,111 @@ class DistillModel(nn.Layer):
return self.student_model(inputs)
+class FGDDistillModel(nn.Layer):
+ """
+ Build FGD distill model.
+ Args:
+ cfg: The student config.
+ slim_cfg: The teacher and distill config.
+ """
+
+ def __init__(self, cfg, slim_cfg):
+ super(FGDDistillModel, self).__init__()
+
+ self.is_inherit = True
+ # build student model before load slim config
+ self.student_model = create(cfg.architecture)
+ self.arch = cfg.architecture
+ stu_pretrain = cfg['pretrain_weights']
+ slim_cfg = load_config(slim_cfg)
+ self.teacher_cfg = slim_cfg
+ self.loss_cfg = slim_cfg
+ tea_pretrain = cfg['pretrain_weights']
+
+ self.teacher_model = create(self.teacher_cfg.architecture)
+ self.teacher_model.eval()
+
+ for param in self.teacher_model.parameters():
+ param.trainable = False
+
+ if 'pretrain_weights' in cfg and stu_pretrain:
+ if self.is_inherit and 'pretrain_weights' in self.teacher_cfg and self.teacher_cfg.pretrain_weights:
+ load_pretrain_weight(self.student_model,
+ self.teacher_cfg.pretrain_weights)
+ logger.debug(
+ "Inheriting! loading teacher weights to student model!")
+
+ load_pretrain_weight(self.student_model, stu_pretrain)
+
+ if 'pretrain_weights' in self.teacher_cfg and self.teacher_cfg.pretrain_weights:
+ load_pretrain_weight(self.teacher_model,
+ self.teacher_cfg.pretrain_weights)
+
+ self.fgd_loss_dic = self.build_loss(
+ self.loss_cfg.distill_loss,
+ name_list=self.loss_cfg['distill_loss_name'])
+
+ def build_loss(self,
+ cfg,
+ name_list=[
+ 'neck_f_4', 'neck_f_3', 'neck_f_2', 'neck_f_1',
+ 'neck_f_0'
+ ]):
+ loss_func = dict()
+ for idx, k in enumerate(name_list):
+ loss_func[k] = create(cfg)
+ return loss_func
+
+ def forward(self, inputs):
+ if self.training:
+ s_body_feats = self.student_model.backbone(inputs)
+ s_neck_feats = self.student_model.neck(s_body_feats)
+
+ with paddle.no_grad():
+ t_body_feats = self.teacher_model.backbone(inputs)
+ t_neck_feats = self.teacher_model.neck(t_body_feats)
+
+ loss_dict = {}
+ for idx, k in enumerate(self.fgd_loss_dic):
+ loss_dict[k] = self.fgd_loss_dic[k](s_neck_feats[idx],
+ t_neck_feats[idx], inputs)
+ if self.arch == "RetinaNet":
+ loss = self.student_model.head(s_neck_feats, inputs)
+ elif self.arch == "PicoDet":
+ head_outs = self.student_model.head(
+ s_neck_feats, self.student_model.export_post_process)
+ loss_gfl = self.student_model.head.get_loss(head_outs, inputs)
+ total_loss = paddle.add_n(list(loss_gfl.values()))
+ loss = {}
+ loss.update(loss_gfl)
+ loss.update({'loss': total_loss})
+ else:
+ raise ValueError(f"Unsupported model {self.arch}")
+ for k in loss_dict:
+ loss['loss'] += loss_dict[k]
+ loss[k] = loss_dict[k]
+ return loss
+ else:
+ body_feats = self.student_model.backbone(inputs)
+ neck_feats = self.student_model.neck(body_feats)
+ head_outs = self.student_model.head(neck_feats)
+ if self.arch == "RetinaNet":
+ bbox, bbox_num = self.student_model.head.post_process(
+ head_outs, inputs['im_shape'], inputs['scale_factor'])
+ return {'bbox': bbox, 'bbox_num': bbox_num}
+ elif self.arch == "PicoDet":
+ head_outs = self.student_model.head(
+ neck_feats, self.student_model.export_post_process)
+ scale_factor = inputs['scale_factor']
+ bboxes, bbox_num = self.student_model.head.post_process(
+ head_outs,
+ scale_factor,
+ export_nms=self.student_model.export_nms)
+ return {'bbox': bboxes, 'bbox_num': bbox_num}
+ else:
+ raise ValueError(f"Unsupported model {self.arch}")
+
+
@register
class DistillYOLOv3Loss(nn.Layer):
def __init__(self, weight=1000):
@@ -107,3 +213,278 @@ class DistillYOLOv3Loss(nn.Layer):
loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss
) * self.weight
return loss
+
+
+def parameter_init(mode="kaiming", value=0.):
+ if mode == "kaiming":
+ weight_attr = paddle.nn.initializer.KaimingUniform()
+ elif mode == "constant":
+ weight_attr = paddle.nn.initializer.Constant(value=value)
+ else:
+ weight_attr = paddle.nn.initializer.KaimingUniform()
+
+ weight_init = ParamAttr(initializer=weight_attr)
+ return weight_init
+
+
+@register
+class FGDFeatureLoss(nn.Layer):
+ """
+ The code is reference from https://github.com/yzd-v/FGD/blob/master/mmdet/distillation/losses/fgd.py
+ Paddle version of `Focal and Global Knowledge Distillation for Detectors`
+
+ Args:
+ student_channels(int): The number of channels in the student's FPN feature map. Default to 256.
+ teacher_channels(int): The number of channels in the teacher's FPN feature map. Default to 256.
+ temp (float, optional): The temperature coefficient. Defaults to 0.5.
+ alpha_fgd (float, optional): The weight of fg_loss. Defaults to 0.001
+ beta_fgd (float, optional): The weight of bg_loss. Defaults to 0.0005
+ gamma_fgd (float, optional): The weight of mask_loss. Defaults to 0.001
+ lambda_fgd (float, optional): The weight of relation_loss. Defaults to 0.000005
+ """
+
+ def __init__(self,
+ student_channels=256,
+ teacher_channels=256,
+ temp=0.5,
+ alpha_fgd=0.001,
+ beta_fgd=0.0005,
+ gamma_fgd=0.001,
+ lambda_fgd=0.000005):
+ super(FGDFeatureLoss, self).__init__()
+ self.temp = temp
+ self.alpha_fgd = alpha_fgd
+ self.beta_fgd = beta_fgd
+ self.gamma_fgd = gamma_fgd
+ self.lambda_fgd = lambda_fgd
+
+ kaiming_init = parameter_init("kaiming")
+ zeros_init = parameter_init("constant", 0.0)
+
+ if student_channels != teacher_channels:
+ self.align = nn.Conv2D(
+ student_channels,
+ teacher_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ weight_attr=kaiming_init)
+ student_channels = teacher_channels
+ else:
+ self.align = None
+
+ self.conv_mask_s = nn.Conv2D(
+ student_channels, 1, kernel_size=1, weight_attr=kaiming_init)
+ self.conv_mask_t = nn.Conv2D(
+ teacher_channels, 1, kernel_size=1, weight_attr=kaiming_init)
+
+ self.stu_conv_block = nn.Sequential(
+ nn.Conv2D(
+ student_channels,
+ student_channels // 2,
+ kernel_size=1,
+ weight_attr=zeros_init),
+ nn.LayerNorm([student_channels // 2, 1, 1]),
+ nn.ReLU(),
+ nn.Conv2D(
+ student_channels // 2,
+ student_channels,
+ kernel_size=1,
+ weight_attr=zeros_init))
+ self.tea_conv_block = nn.Sequential(
+ nn.Conv2D(
+ teacher_channels,
+ teacher_channels // 2,
+ kernel_size=1,
+ weight_attr=zeros_init),
+ nn.LayerNorm([teacher_channels // 2, 1, 1]),
+ nn.ReLU(),
+ nn.Conv2D(
+ teacher_channels // 2,
+ teacher_channels,
+ kernel_size=1,
+ weight_attr=zeros_init))
+
+ def spatial_channel_attention(self, x, t=0.5):
+ shape = paddle.shape(x)
+ N, C, H, W = shape
+
+ _f = paddle.abs(x)
+ spatial_map = paddle.reshape(
+ paddle.mean(
+ _f, axis=1, keepdim=True) / t, [N, -1])
+ spatial_map = F.softmax(spatial_map, axis=1, dtype="float32") * H * W
+ spatial_att = paddle.reshape(spatial_map, [N, H, W])
+
+ channel_map = paddle.mean(
+ paddle.mean(
+ _f, axis=2, keepdim=False), axis=2, keepdim=False)
+ channel_att = F.softmax(channel_map / t, axis=1, dtype="float32") * C
+ return [spatial_att, channel_att]
+
+ def spatial_pool(self, x, mode="teacher"):
+ batch, channel, width, height = x.shape
+ x_copy = x
+ x_copy = paddle.reshape(x_copy, [batch, channel, height * width])
+ x_copy = x_copy.unsqueeze(1)
+ if mode.lower() == "student":
+ context_mask = self.conv_mask_s(x)
+ else:
+ context_mask = self.conv_mask_t(x)
+
+ context_mask = paddle.reshape(context_mask, [batch, 1, height * width])
+ context_mask = F.softmax(context_mask, axis=2)
+ context_mask = context_mask.unsqueeze(-1)
+ context = paddle.matmul(x_copy, context_mask)
+ context = paddle.reshape(context, [batch, channel, 1, 1])
+
+ return context
+
+ def mask_loss(self, stu_channel_att, tea_channel_att, stu_spatial_att,
+ tea_spatial_att):
+ def _func(a, b):
+ return paddle.sum(paddle.abs(a - b)) / len(a)
+
+ mask_loss = _func(stu_channel_att, tea_channel_att) + _func(
+ stu_spatial_att, tea_spatial_att)
+
+ return mask_loss
+
+ def feature_loss(self, stu_feature, tea_feature, Mask_fg, Mask_bg,
+ tea_channel_att, tea_spatial_att):
+
+ Mask_fg = Mask_fg.unsqueeze(axis=1)
+ Mask_bg = Mask_bg.unsqueeze(axis=1)
+
+ tea_channel_att = tea_channel_att.unsqueeze(axis=-1)
+ tea_channel_att = tea_channel_att.unsqueeze(axis=-1)
+
+ tea_spatial_att = tea_spatial_att.unsqueeze(axis=1)
+
+ fea_t = paddle.multiply(tea_feature, paddle.sqrt(tea_spatial_att))
+ fea_t = paddle.multiply(fea_t, paddle.sqrt(tea_channel_att))
+ fg_fea_t = paddle.multiply(fea_t, paddle.sqrt(Mask_fg))
+ bg_fea_t = paddle.multiply(fea_t, paddle.sqrt(Mask_bg))
+
+ fea_s = paddle.multiply(stu_feature, paddle.sqrt(tea_spatial_att))
+ fea_s = paddle.multiply(fea_s, paddle.sqrt(tea_channel_att))
+ fg_fea_s = paddle.multiply(fea_s, paddle.sqrt(Mask_fg))
+ bg_fea_s = paddle.multiply(fea_s, paddle.sqrt(Mask_bg))
+
+ fg_loss = F.mse_loss(fg_fea_s, fg_fea_t, reduction="sum") / len(Mask_fg)
+ bg_loss = F.mse_loss(bg_fea_s, bg_fea_t, reduction="sum") / len(Mask_bg)
+
+ return fg_loss, bg_loss
+
+ def relation_loss(self, stu_feature, tea_feature):
+ context_s = self.spatial_pool(stu_feature, "student")
+ context_t = self.spatial_pool(tea_feature, "teacher")
+
+ out_s = stu_feature + self.stu_conv_block(context_s)
+ out_t = tea_feature + self.tea_conv_block(context_t)
+
+ rela_loss = F.mse_loss(out_s, out_t, reduction="sum") / len(out_s)
+
+ return rela_loss
+
+ def mask_value(self, mask, xl, xr, yl, yr, value):
+ mask[xl:xr, yl:yr] = paddle.maximum(mask[xl:xr, yl:yr], value)
+ return mask
+
+ def forward(self, stu_feature, tea_feature, inputs):
+ """Forward function.
+ Args:
+ stu_feature(Tensor): Bs*C*H*W, student's feature map
+ tea_feature(Tensor): Bs*C*H*W, teacher's feature map
+ inputs: The inputs with gt bbox and input shape info.
+ """
+ assert stu_feature.shape[-2:] == stu_feature.shape[-2:], \
+ f'The shape of Student feature {stu_feature.shape} and Teacher feature {tea_feature.shape} should be the same.'
+ assert "gt_bbox" in inputs.keys() and "im_shape" in inputs.keys(
+ ), "ERROR! FGDFeatureLoss need gt_bbox and im_shape as inputs."
+ gt_bboxes = inputs['gt_bbox']
+ ins_shape = [
+ inputs['im_shape'][i] for i in range(inputs['im_shape'].shape[0])
+ ]
+
+ index_gt = []
+ for i in range(len(gt_bboxes)):
+ if gt_bboxes[i].size > 2:
+ index_gt.append(i)
+ # only distill feature with labeled GTbox
+ if len(index_gt) != len(gt_bboxes):
+ index_gt_t = paddle.to_tensor(index_gt)
+ preds_S = paddle.index_select(preds_S, index_gt_t)
+ preds_T = paddle.index_select(preds_T, index_gt_t)
+
+ ins_shape = [ins_shape[c] for c in index_gt]
+ gt_bboxes = [gt_bboxes[c] for c in index_gt]
+ assert len(gt_bboxes) == preds_T.shape[
+ 0], f"The number of selected GT box [{len(gt_bboxes)}] should be same with first dim of input tensor [{preds_T.shape[0]}]."
+
+ if self.align is not None:
+ stu_feature = self.align(stu_feature)
+
+ N, C, H, W = stu_feature.shape
+
+ tea_spatial_att, tea_channel_att = self.spatial_channel_attention(
+ tea_feature, self.temp)
+ stu_spatial_att, stu_channel_att = self.spatial_channel_attention(
+ stu_feature, self.temp)
+
+ Mask_fg = paddle.zeros(tea_spatial_att.shape)
+ Mask_bg = paddle.ones_like(tea_spatial_att)
+ one_tmp = paddle.ones([*tea_spatial_att.shape[1:]])
+ zero_tmp = paddle.zeros([*tea_spatial_att.shape[1:]])
+ Mask_fg.stop_gradient = True
+ Mask_bg.stop_gradient = True
+ one_tmp.stop_gradient = True
+ zero_tmp.stop_gradient = True
+
+ wmin, wmax, hmin, hmax, area = [], [], [], [], []
+
+ for i in range(N):
+ tmp_box = paddle.ones_like(gt_bboxes[i])
+ tmp_box.stop_gradient = True
+ tmp_box[:, 0] = gt_bboxes[i][:, 0] / ins_shape[i][1] * W
+ tmp_box[:, 2] = gt_bboxes[i][:, 2] / ins_shape[i][1] * W
+ tmp_box[:, 1] = gt_bboxes[i][:, 1] / ins_shape[i][0] * H
+ tmp_box[:, 3] = gt_bboxes[i][:, 3] / ins_shape[i][0] * H
+
+ zero = paddle.zeros_like(tmp_box[:, 0], dtype="int32")
+ ones = paddle.ones_like(tmp_box[:, 2], dtype="int32")
+ zero.stop_gradient = True
+ ones.stop_gradient = True
+
+ wmin.append(
+ paddle.cast(paddle.floor(tmp_box[:, 0]), "int32").maximum(zero))
+ wmax.append(paddle.cast(paddle.ceil(tmp_box[:, 2]), "int32"))
+ hmin.append(
+ paddle.cast(paddle.floor(tmp_box[:, 1]), "int32").maximum(zero))
+ hmax.append(paddle.cast(paddle.ceil(tmp_box[:, 3]), "int32"))
+
+ area_recip = 1.0 / (
+ hmax[i].reshape([1, -1]) + 1 - hmin[i].reshape([1, -1])) / (
+ wmax[i].reshape([1, -1]) + 1 - wmin[i].reshape([1, -1]))
+
+ for j in range(len(gt_bboxes[i])):
+ Mask_fg[i] = self.mask_value(Mask_fg[i], hmin[i][j],
+ hmax[i][j] + 1, wmin[i][j],
+ wmax[i][j] + 1, area_recip[0][j])
+
+ Mask_bg[i] = paddle.where(Mask_fg[i] > zero_tmp, zero_tmp, one_tmp)
+
+ if paddle.sum(Mask_bg[i]):
+ Mask_bg[i] /= paddle.sum(Mask_bg[i])
+
+ fg_loss, bg_loss = self.feature_loss(stu_feature, tea_feature, Mask_fg,
+ Mask_bg, tea_channel_att,
+ tea_spatial_att)
+ mask_loss = self.mask_loss(stu_channel_att, tea_channel_att,
+ stu_spatial_att, tea_spatial_att)
+ rela_loss = self.relation_loss(stu_feature, tea_feature)
+
+ loss = self.alpha_fgd * fg_loss + self.beta_fgd * bg_loss \
+ + self.gamma_fgd * mask_loss + self.lambda_fgd * rela_loss
+
+ return loss
diff --git a/paddlers/models/ppdet/slim/ofa.py b/paddlers/models/ppdet/slim/ofa.py
new file mode 100644
index 0000000..8e6f942
--- /dev/null
+++ b/paddlers/models/ppdet/slim/ofa.py
@@ -0,0 +1,89 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppdet.core.workspace import load_config, merge_config, create
+from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
+from paddlers.models.ppdet.utils.logger import setup_logger
+from paddlers.models.ppdet.core.workspace import register, serializable
+
+from paddle.utils import try_import
+
+logger = setup_logger(__name__)
+
+
+@register
+@serializable
+class OFA(object):
+ def __init__(self, ofa_config):
+ super(OFA, self).__init__()
+ self.ofa_config = ofa_config
+
+ def __call__(self, model, param_state_dict):
+
+ paddleslim = try_import('paddleslim')
+ from paddleslim.nas.ofa import OFA, RunConfig, utils
+ from paddleslim.nas.ofa.convert_super import Convert, supernet
+ task = self.ofa_config['task']
+ expand_ratio = self.ofa_config['expand_ratio']
+
+ skip_neck = self.ofa_config['skip_neck']
+ skip_head = self.ofa_config['skip_head']
+
+ run_config = self.ofa_config['RunConfig']
+ if 'skip_layers' in run_config:
+ skip_layers = run_config['skip_layers']
+ else:
+ skip_layers = []
+
+ # supernet config
+ sp_config = supernet(expand_ratio=expand_ratio)
+ # convert to supernet
+ model = Convert(sp_config).convert(model)
+
+ skip_names = []
+ if skip_neck:
+ skip_names.append('neck.')
+ if skip_head:
+ skip_names.append('head.')
+
+ for name, sublayer in model.named_sublayers():
+ for n in skip_names:
+ if n in name:
+ skip_layers.append(name)
+
+ run_config['skip_layers'] = skip_layers
+ run_config = RunConfig(**run_config)
+
+ # build ofa model
+ ofa_model = OFA(model, run_config=run_config)
+
+ ofa_model.set_epoch(0)
+ ofa_model.set_task(task)
+
+ input_spec = [{
+ "image": paddle.ones(
+ shape=[1, 3, 640, 640], dtype='float32'),
+ "im_shape": paddle.full(
+ [1, 2], 640, dtype='float32'),
+ "scale_factor": paddle.ones(
+ shape=[1, 2], dtype='float32')
+ }]
+
+ ofa_model._clear_search_space(input_spec=input_spec)
+ ofa_model._build_ss = True
+ check_ss = ofa_model._sample_config('expand_ratio', phase=None)
+ # tokenize the search space
+ ofa_model.tokenize()
+ # check token map, search cands and search space
+ logger.info('Token map is {}'.format(ofa_model.token_map))
+ logger.info('Search candidates is {}'.format(ofa_model.search_cands))
+ logger.info('The length of search_space is {}, search_space is {}'.
+ format(len(ofa_model._ofa_layers), ofa_model._ofa_layers))
+ # set model state dict into ofa model
+ utils.set_state_dict(ofa_model.model, param_state_dict)
+ return ofa_model
diff --git a/paddlers/models/ppdet/slim/prune.py b/paddlers/models/ppdet/slim/prune.py
index 5c7928e..4ba287f 100644
--- a/paddlers/models/ppdet/slim/prune.py
+++ b/paddlers/models/ppdet/slim/prune.py
@@ -83,3 +83,69 @@ class Pruner(object):
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
return model
+
+
+@register
+@serializable
+class PrunerQAT(object):
+ def __init__(self, criterion, pruned_params, pruned_ratios,
+ print_prune_params, quant_config, print_qat_model):
+ super(PrunerQAT, self).__init__()
+ assert criterion in ['l1_norm', 'fpgm'], \
+ "unsupported prune criterion: {}".format(criterion)
+ # Pruner hyperparameter
+ self.criterion = criterion
+ self.pruned_params = pruned_params
+ self.pruned_ratios = pruned_ratios
+ self.print_prune_params = print_prune_params
+ # QAT hyperparameter
+ self.quant_config = quant_config
+ self.print_qat_model = print_qat_model
+
+ def __call__(self, model):
+ # FIXME: adapt to network graph when Training and inference are
+ # inconsistent, now only supports prune inference network graph.
+ model.eval()
+ paddleslim = try_import('paddleslim')
+ from paddleslim.analysis import dygraph_flops as flops
+ input_spec = [{
+ "image": paddle.ones(
+ shape=[1, 3, 640, 640], dtype='float32'),
+ "im_shape": paddle.full(
+ [1, 2], 640, dtype='float32'),
+ "scale_factor": paddle.ones(
+ shape=[1, 2], dtype='float32')
+ }]
+ if self.print_prune_params:
+ print_prune_params(model)
+
+ ori_flops = flops(model, input_spec) / 1000
+ logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
+ if self.criterion == 'fpgm':
+ pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
+ elif self.criterion == 'l1_norm':
+ pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
+
+ logger.info("pruned params: {}".format(self.pruned_params))
+ pruned_ratios = [float(n) for n in self.pruned_ratios]
+ ratios = {}
+ for i, param in enumerate(self.pruned_params):
+ ratios[param] = pruned_ratios[i]
+ pruner.prune_vars(ratios, [0])
+ pruned_flops = flops(model, input_spec) / 1000
+ logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
+ pruned_flops, (ori_flops - pruned_flops) / ori_flops))
+
+ self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
+
+ self.quanter.quantize(model)
+
+ if self.print_qat_model:
+ logger.info("Quantized model:")
+ logger.info(model)
+
+ return model
+
+ def save_quantized_model(self, layer, path, input_spec=None, **config):
+ self.quanter.save_quantized_model(
+ model=layer, path=path, input_spec=input_spec, **config)
diff --git a/paddlers/models/ppdet/slim/quant.py b/paddlers/models/ppdet/slim/quant.py
index a0fb0e6..7c4d40b 100644
--- a/paddlers/models/ppdet/slim/quant.py
+++ b/paddlers/models/ppdet/slim/quant.py
@@ -38,6 +38,11 @@ class QAT(object):
logger.info("Model before quant:")
logger.info(model)
+ # For PP-YOLOE, convert model to deploy firstly.
+ for layer in model.sublayers():
+ if hasattr(layer, 'convert_to_deploy'):
+ layer.convert_to_deploy()
+
self.quanter.quantize(model)
if self.print_model:
diff --git a/paddlers/models/ppdet/utils/check.py b/paddlers/models/ppdet/utils/check.py
index 4a0a176..3480893 100644
--- a/paddlers/models/ppdet/utils/check.py
+++ b/paddlers/models/ppdet/utils/check.py
@@ -20,12 +20,14 @@ import sys
import paddle
import six
-import paddle.version as fluid_version
+import paddle.version as paddle_version
from .logger import setup_logger
logger = setup_logger(__name__)
-__all__ = ['check_gpu', 'check_npu', 'check_version', 'check_config']
+__all__ = [
+ 'check_gpu', 'check_npu', 'check_xpu', 'check_version', 'check_config'
+]
def check_npu(use_npu):
@@ -47,6 +49,25 @@ def check_npu(use_npu):
pass
+def check_xpu(use_xpu):
+ """
+ Log error and exit when set use_xpu=true in paddlepaddle
+ cpu/gpu/npu version.
+ """
+ err = "Config use_xpu cannot be set as true while you are " \
+ "using paddlepaddle cpu/gpu/npu version ! \nPlease try: \n" \
+ "\t1. Install paddlepaddle-xpu to run model on XPU \n" \
+ "\t2. Set use_xpu as false in config file to run " \
+ "model on CPU/GPU/NPU"
+
+ try:
+ if use_xpu and not paddle.is_compiled_with_xpu():
+ logger.error(err)
+ sys.exit(1)
+ except Exception as e:
+ pass
+
+
def check_gpu(use_gpu):
"""
Log error and exit when set use_gpu=true in paddlepaddle
@@ -66,21 +87,23 @@ def check_gpu(use_gpu):
pass
-def check_version(version='2.0'):
+def check_version(version='2.2'):
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version {} or higher is required, " \
- "or a suitable develop version is satisfied as well. \n" \
+ "or a suitable release/2.5 version is satisfied as well. \n" \
"Please make sure the version is good with your code.".format(version)
version_installed = [
- fluid_version.major, fluid_version.minor, fluid_version.patch,
- fluid_version.rc
+ paddle_version.major, paddle_version.minor, paddle_version.patch,
+ paddle_version.rc
]
+
if version_installed == ['0', '0', '0', '0']:
return
+
version_split = version.split('.')
length = min(len(version_installed), len(version_split))
diff --git a/paddlers/models/ppdet/utils/checkpoint.py b/paddlers/models/ppdet/utils/checkpoint.py
index 3a6087c..04aec4d 100644
--- a/paddlers/models/ppdet/utils/checkpoint.py
+++ b/paddlers/models/ppdet/utils/checkpoint.py
@@ -62,7 +62,7 @@ def _strip_postfix(path):
return path
-def load_weight(model, weight, optimizer=None):
+def load_weight(model, weight, optimizer=None, ema=None):
if is_url(weight):
weight = get_weights_path(weight)
@@ -72,14 +72,26 @@ def load_weight(model, weight, optimizer=None):
raise ValueError("Model pretrain path {} does not "
"exists.".format(pdparam_path))
- param_state_dict = paddle.load(pdparam_path)
+ if ema is not None and os.path.exists(path + '.pdema'):
+ # Exchange model and ema_model to load
+ ema_state_dict = paddle.load(pdparam_path)
+ param_state_dict = paddle.load(path + '.pdema')
+ else:
+ ema_state_dict = None
+ param_state_dict = paddle.load(pdparam_path)
+
model_dict = model.state_dict()
model_weight = {}
incorrect_keys = 0
- for key in model_dict.keys():
+ for key, value in model_dict.items():
if key in param_state_dict.keys():
- model_weight[key] = param_state_dict[key]
+ if isinstance(param_state_dict[key], np.ndarray):
+ param_state_dict[key] = paddle.to_tensor(param_state_dict[key])
+ if value.dtype == param_state_dict[key].dtype:
+ model_weight[key] = param_state_dict[key]
+ else:
+ model_weight[key] = param_state_dict[key].astype(value.dtype)
else:
logger.info('Unmatched key: {}'.format(key))
incorrect_keys += 1
@@ -102,6 +114,11 @@ def load_weight(model, weight, optimizer=None):
last_epoch = optim_state_dict.pop('last_epoch')
optimizer.set_state_dict(optim_state_dict)
+ if ema_state_dict is not None:
+ ema.resume(ema_state_dict,
+ optim_state_dict['LR_Scheduler']['last_epoch'])
+ elif ema_state_dict is not None:
+ ema.resume(ema_state_dict)
return last_epoch
@@ -197,33 +214,52 @@ def load_pretrain_weight(model, pretrain_weight):
param_state_dict = paddle.load(weights_path)
param_state_dict = match_state_dict(model_dict, param_state_dict)
+ for k, v in param_state_dict.items():
+ if isinstance(v, np.ndarray):
+ v = paddle.to_tensor(v)
+ if model_dict[k].dtype != v.dtype:
+ param_state_dict[k] = v.astype(model_dict[k].dtype)
+
model.set_dict(param_state_dict)
logger.info('Finish loading model weights: {}'.format(weights_path))
-def save_model(model, optimizer, save_dir, save_name, last_epoch):
+def save_model(model,
+ optimizer,
+ save_dir,
+ save_name,
+ last_epoch,
+ ema_model=None):
"""
save model into disk.
Args:
- model (paddle.nn.Layer): the Layer instalce to save parameters.
+ model (dict): the model state_dict to save parameters.
optimizer (paddle.optimizer.Optimizer): the Optimizer instance to
save optimizer states.
save_dir (str): the directory to be saved.
save_name (str): the path to be saved.
last_epoch (int): the epoch index.
+ ema_model (dict|None): the ema_model state_dict to save parameters.
"""
if paddle.distributed.get_rank() != 0:
return
+ assert isinstance(model, dict), ("model is not a instance of dict, "
+ "please call model.state_dict() to get.")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save_path = os.path.join(save_dir, save_name)
- if isinstance(model, nn.Layer):
- paddle.save(model.state_dict(), save_path + ".pdparams")
- else:
- assert isinstance(model,
- dict), 'model is not a instance of nn.layer or dict'
+ # save model
+ if ema_model is None:
paddle.save(model, save_path + ".pdparams")
+ else:
+ assert isinstance(ema_model,
+ dict), ("ema_model is not a instance of dict, "
+ "please call model.state_dict() to get.")
+ # Exchange model and ema_model to save
+ paddle.save(ema_model, save_path + ".pdparams")
+ paddle.save(model, save_path + ".pdema")
+ # save optimizer
state_dict = optimizer.state_dict()
state_dict['last_epoch'] = last_epoch
paddle.save(state_dict, save_path + ".pdopt")
diff --git a/paddlers/models/ppdet/utils/cli.py b/paddlers/models/ppdet/utils/cli.py
index afe5f3f..295757b 100644
--- a/paddlers/models/ppdet/utils/cli.py
+++ b/paddlers/models/ppdet/utils/cli.py
@@ -81,6 +81,13 @@ class ArgsParser(ArgumentParser):
return config
+def merge_args(config, args, exclude_args=['config', 'opt', 'slim_config']):
+ for k, v in vars(args).items():
+ if k not in exclude_args:
+ config[k] = v
+ return config
+
+
def print_total_cfg(config):
modules = get_registered_modules()
color_tty = ColorTTY()
diff --git a/paddlers/models/ppdet/utils/download.py b/paddlers/models/ppdet/utils/download.py
index 2c00787..c36b236 100644
--- a/paddlers/models/ppdet/utils/download.py
+++ b/paddlers/models/ppdet/utils/download.py
@@ -96,8 +96,8 @@ DATASETS = {
'https://paddlemodels.bj.bcebos.com/object_detection/roadsign_coco.tar',
'49ce5a9b5ad0d6266163cd01de4b018e', ), ], ['annotations', 'images']),
'spine_coco': ([(
- 'https://paddledet.bj.bcebos.com/data/spine_coco.tar',
- '7ed69ae73f842cd2a8cf4f58dc3c5535', ), ], ['annotations', 'images']),
+ 'https://paddledet.bj.bcebos.com/data/spine.tar',
+ '8a3a353c2c54a2284ad7d2780b65f6a6', ), ], ['annotations', 'images']),
'mot': (),
'objects365': (),
'coco_ce': ([(
@@ -235,7 +235,7 @@ def create_voc_list(data_dir, devkit_subdir='VOCdevkit'):
years = ['2007', '2012']
# NOTE: since using auto download VOC
- # dataset, VOC default label list should be used,
+ # dataset, VOC default label list should be used,
# do not generate label_list.txt here. For default
# label, see ../data/source/voc.py
create_list(devkit_dir, years, data_dir)
@@ -387,13 +387,18 @@ def _download(url, path, md5sum=None):
if chunk:
f.write(chunk)
shutil.move(tmp_fullname, fullname)
- return fullname
+ return fullname
def _download_dist(url, path, md5sum=None):
env = os.environ
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
- trainer_id = int(env['PADDLE_TRAINER_ID'])
+ # Mainly used to solve the problem of downloading data from
+ # different machines in the case of multiple machines.
+ # Different nodes will download data, and the same node
+ # will only download data once.
+ # Reference https://github.com/PaddlePaddle/PaddleClas/blob/release/2.5/ppcls/utils/download.py#L108
+ rank_id_curr_node = int(os.environ.get("PADDLE_RANK_IN_NODE", 0))
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
if num_trainers <= 1:
return _download(url, path, md5sum)
@@ -406,12 +411,9 @@ def _download_dist(url, path, md5sum=None):
os.makedirs(path)
if not osp.exists(fullname):
- from paddle.distributed import ParallelEnv
- unique_endpoints = _get_unique_endpoints(ParallelEnv()
- .trainer_endpoints[:])
- with open(lock_path, 'w'): # touch
+ with open(lock_path, 'w'): # touch
os.utime(lock_path, None)
- if ParallelEnv().current_endpoint in unique_endpoints:
+ if rank_id_curr_node == 0:
_download(url, path, md5sum)
os.remove(lock_path)
else:
@@ -423,7 +425,7 @@ def _download_dist(url, path, md5sum=None):
def _check_exist_file_md5(filename, md5sum, url):
- # if md5sum is None, and file to check is weights file,
+ # if md5sum is None, and file to check is weights file,
# read md5um from url and check, else check md5sum directly
return _md5check_from_url(filename, url) if md5sum is None \
and filename.endswith('pdparams') \
@@ -523,7 +525,7 @@ def _decompress_dist(fname):
# trainer pipeline in order
# **change this if you have more elegent methods**
if ParallelEnv().current_endpoint in unique_endpoints:
- with open(lock_path, 'w'): # touch
+ with open(lock_path, 'w'): # touch
os.utime(lock_path, None)
_decompress(fname)
os.remove(lock_path)
diff --git a/paddlers/models/ppdet/utils/fuse_utils.py b/paddlers/models/ppdet/utils/fuse_utils.py
new file mode 100644
index 0000000..647fa99
--- /dev/null
+++ b/paddlers/models/ppdet/utils/fuse_utils.py
@@ -0,0 +1,179 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import paddle
+import paddle.nn as nn
+
+__all__ = ['fuse_conv_bn']
+
+
+def fuse_conv_bn(model):
+ is_train = False
+ if model.training:
+ model.eval()
+ is_train = True
+ fuse_list = []
+ tmp_pair = [None, None]
+ for name, layer in model.named_sublayers():
+ if isinstance(layer, nn.Conv2D):
+ tmp_pair[0] = name
+ if isinstance(layer, nn.BatchNorm2D):
+ tmp_pair[1] = name
+
+ if tmp_pair[0] and tmp_pair[1] and len(tmp_pair) == 2:
+ fuse_list.append(tmp_pair)
+ tmp_pair = [None, None]
+ model = fuse_layers(model, fuse_list)
+ if is_train:
+ model.train()
+ return model
+
+
+def find_parent_layer_and_sub_name(model, name):
+ """
+ Given the model and the name of a layer, find the parent layer and
+ the sub_name of the layer.
+ For example, if name is 'block_1/convbn_1/conv_1', the parent layer is
+ 'block_1/convbn_1' and the sub_name is `conv_1`.
+ Args:
+ model(paddle.nn.Layer): the model to be quantized.
+ name(string): the name of a layer
+
+ Returns:
+ parent_layer, subname
+ """
+ assert isinstance(model, nn.Layer), \
+ "The model must be the instance of paddle.nn.Layer."
+ assert len(name) > 0, "The input (name) should not be empty."
+
+ last_idx = 0
+ idx = 0
+ parent_layer = model
+ while idx < len(name):
+ if name[idx] == '.':
+ sub_name = name[last_idx:idx]
+ if hasattr(parent_layer, sub_name):
+ parent_layer = getattr(parent_layer, sub_name)
+ last_idx = idx + 1
+ idx += 1
+ sub_name = name[last_idx:idx]
+ return parent_layer, sub_name
+
+
+class Identity(nn.Layer):
+ '''a layer to replace bn or relu layers'''
+
+ def __init__(self, *args, **kwargs):
+ super(Identity, self).__init__()
+
+ def forward(self, input):
+ return input
+
+
+def fuse_layers(model, layers_to_fuse, inplace=False):
+ '''
+ fuse layers in layers_to_fuse
+
+ Args:
+ model(nn.Layer): The model to be fused.
+ layers_to_fuse(list): The layers' names to be fused. For
+ example,"fuse_list = [["conv1", "bn1"], ["conv2", "bn2"]]".
+ A TypeError would be raised if "fuse" was set as
+ True but "fuse_list" was None.
+ Default: None.
+ inplace(bool): Whether apply fusing to the input model.
+ Default: False.
+
+ Return
+ fused_model(paddle.nn.Layer): The fused model.
+ '''
+ if not inplace:
+ model = copy.deepcopy(model)
+ for layers_list in layers_to_fuse:
+ layer_list = []
+ for layer_name in layers_list:
+ parent_layer, sub_name = find_parent_layer_and_sub_name(model,
+ layer_name)
+ layer_list.append(getattr(parent_layer, sub_name))
+ new_layers = _fuse_func(layer_list)
+ for i, item in enumerate(layers_list):
+ parent_layer, sub_name = find_parent_layer_and_sub_name(model, item)
+ setattr(parent_layer, sub_name, new_layers[i])
+ return model
+
+
+def _fuse_func(layer_list):
+ '''choose the fuser method and fuse layers'''
+ types = tuple(type(m) for m in layer_list)
+ fusion_method = types_to_fusion_method.get(types, None)
+ new_layers = [None] * len(layer_list)
+ fused_layer = fusion_method(*layer_list)
+ for handle_id, pre_hook_fn in layer_list[0]._forward_pre_hooks.items():
+ fused_layer.register_forward_pre_hook(pre_hook_fn)
+ del layer_list[0]._forward_pre_hooks[handle_id]
+ for handle_id, hook_fn in layer_list[-1]._forward_post_hooks.items():
+ fused_layer.register_forward_post_hook(hook_fn)
+ del layer_list[-1]._forward_post_hooks[handle_id]
+ new_layers[0] = fused_layer
+ for i in range(1, len(layer_list)):
+ identity = Identity()
+ identity.training = layer_list[0].training
+ new_layers[i] = identity
+ return new_layers
+
+
+def _fuse_conv_bn(conv, bn):
+ '''fuse conv and bn for train or eval'''
+ assert(conv.training == bn.training),\
+ "Conv and BN both must be in the same mode (train or eval)."
+ if conv.training:
+ assert bn._num_features == conv._out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d'
+ raise NotImplementedError
+ else:
+ return _fuse_conv_bn_eval(conv, bn)
+
+
+def _fuse_conv_bn_eval(conv, bn):
+ '''fuse conv and bn for eval'''
+ assert (not (conv.training or bn.training)), "Fusion only for eval!"
+ fused_conv = copy.deepcopy(conv)
+
+ fused_weight, fused_bias = _fuse_conv_bn_weights(
+ fused_conv.weight, fused_conv.bias, bn._mean, bn._variance, bn._epsilon,
+ bn.weight, bn.bias)
+ fused_conv.weight.set_value(fused_weight)
+ if fused_conv.bias is None:
+ fused_conv.bias = paddle.create_parameter(
+ shape=[fused_conv._out_channels], is_bias=True, dtype=bn.bias.dtype)
+ fused_conv.bias.set_value(fused_bias)
+ return fused_conv
+
+
+def _fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b):
+ '''fuse weights and bias of conv and bn'''
+ if conv_b is None:
+ conv_b = paddle.zeros_like(bn_rm)
+ if bn_w is None:
+ bn_w = paddle.ones_like(bn_rm)
+ if bn_b is None:
+ bn_b = paddle.zeros_like(bn_rm)
+ bn_var_rsqrt = paddle.rsqrt(bn_rv + bn_eps)
+ conv_w = conv_w * \
+ (bn_w * bn_var_rsqrt).reshape([-1] + [1] * (len(conv_w.shape) - 1))
+ conv_b = (conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b
+ return conv_w, conv_b
+
+
+types_to_fusion_method = {(nn.Conv2D, nn.BatchNorm2D): _fuse_conv_bn, }
diff --git a/paddlers/tasks/classifier.py b/paddlers/tasks/classifier.py
index c1074a3..dff79e7 100644
--- a/paddlers/tasks/classifier.py
+++ b/paddlers/tasks/classifier.py
@@ -167,13 +167,8 @@ class BaseClassifier(BaseModel):
weight_decay=paddle.regularizer.L2Decay(L2_coeff))
return optimizer
- def default_postprocess(self, class_id_map_file):
- default_config = {
- "name": "Topk",
- "topk": 1,
- "class_id_map_file": class_id_map_file
- }
- return build_postprocess(default_config)
+ def default_postprocess(self):
+ return self.build_postprocess_from_labels(topk=1)
def build_postprocess_from_labels(self, topk=1):
label_dict = dict()
@@ -250,7 +245,7 @@ class BaseClassifier(BaseModel):
if self.losses is None:
self.losses = self.default_loss()
self.metrics = self.default_metric()
- self.postprocess = self.default_postprocess(train_dataset.label_list)
+ self.postprocess = self.default_postprocess()
if optimizer is None:
num_steps_each_epoch = train_dataset.num_samples // train_batch_size