Merge pull request #40 from Bobholamovic/update_ppseg

[Feat] Update ppseg and Add CondenseNet V2
own
cc 2 years ago committed by GitHub
commit ebceda8419
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      docs/apis/train.md
  2. 25
      docs/intro/model_zoo.md
  3. 1
      examples/README.md
  4. 1
      examples/rs_research/config_utils.py
  5. 7
      examples/rs_research/run_task.py
  6. 38
      paddlers/deploy/predictor.py
  7. 1
      paddlers/models/hash.txt
  8. 2
      paddlers/models/ppseg/__init__.py
  9. 107
      paddlers/models/ppseg/core/infer.py
  10. 30
      paddlers/models/ppseg/core/predict.py
  11. 71
      paddlers/models/ppseg/core/train.py
  12. 102
      paddlers/models/ppseg/core/val.py
  13. 205
      paddlers/models/ppseg/cvlibs/config.py
  14. 26
      paddlers/models/ppseg/cvlibs/param_init.py
  15. 1
      paddlers/models/ppseg/datasets/__init__.py
  16. 24
      paddlers/models/ppseg/datasets/ade.py
  17. 54
      paddlers/models/ppseg/datasets/dataset.py
  18. 135
      paddlers/models/ppseg/datasets/pssl.py
  19. 9
      paddlers/models/ppseg/models/__init__.py
  20. 6
      paddlers/models/ppseg/models/attention_unet.py
  21. 4
      paddlers/models/ppseg/models/backbones/__init__.py
  22. 318
      paddlers/models/ppseg/models/backbones/ghostnet.py
  23. 4
      paddlers/models/ppseg/models/backbones/hrnet.py
  24. 974
      paddlers/models/ppseg/models/backbones/lite_hrnet.py
  25. 6
      paddlers/models/ppseg/models/backbones/mix_transformer.py
  26. 329
      paddlers/models/ppseg/models/backbones/mobilenetv2.py
  27. 496
      paddlers/models/ppseg/models/backbones/mobilenetv3.py
  28. 7
      paddlers/models/ppseg/models/backbones/resnet_vd.py
  29. 315
      paddlers/models/ppseg/models/backbones/shufflenetv2.py
  30. 180
      paddlers/models/ppseg/models/backbones/stdcnet.py
  31. 8
      paddlers/models/ppseg/models/backbones/swin_transformer.py
  32. 716
      paddlers/models/ppseg/models/backbones/top_transformer.py
  33. 4
      paddlers/models/ppseg/models/backbones/transformer_utils.py
  34. 6
      paddlers/models/ppseg/models/backbones/vision_transformer.py
  35. 11
      paddlers/models/ppseg/models/backbones/xception_deeplab.py
  36. 18
      paddlers/models/ppseg/models/bisenet.py
  37. 174
      paddlers/models/ppseg/models/ccnet.py
  38. 403
      paddlers/models/ppseg/models/ddrnet.py
  39. 4
      paddlers/models/ppseg/models/emanet.py
  40. 4
      paddlers/models/ppseg/models/enet.py
  41. 16
      paddlers/models/ppseg/models/fast_scnn.py
  42. 2
      paddlers/models/ppseg/models/ginet.py
  43. 198
      paddlers/models/ppseg/models/glore.py
  44. 4
      paddlers/models/ppseg/models/hardnet.py
  45. 3
      paddlers/models/ppseg/models/layers/__init__.py
  46. 126
      paddlers/models/ppseg/models/layers/attention.py
  47. 57
      paddlers/models/ppseg/models/layers/layer_libs.py
  48. 285
      paddlers/models/ppseg/models/layers/tensor_fusion.py
  49. 133
      paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
  50. 2
      paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
  51. 4
      paddlers/models/ppseg/models/losses/cross_entropy_loss.py
  52. 2
      paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
  53. 2
      paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
  54. 65
      paddlers/models/ppseg/models/losses/dice_loss.py
  55. 122
      paddlers/models/ppseg/models/losses/focal_loss.py
  56. 22
      paddlers/models/ppseg/models/losses/l1_loss.py
  57. 16
      paddlers/models/ppseg/models/losses/lovasz_loss.py
  58. 2
      paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
  59. 5
      paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
  60. 12
      paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
  61. 162
      paddlers/models/ppseg/models/lraspp.py
  62. 2
      paddlers/models/ppseg/models/mla_transformer.py
  63. 289
      paddlers/models/ppseg/models/mobileseg.py
  64. 2
      paddlers/models/ppseg/models/pointrend.py
  65. 16
      paddlers/models/ppseg/models/portraitnet.py
  66. 273
      paddlers/models/ppseg/models/pp_liteseg.py
  67. 8
      paddlers/models/ppseg/models/pphumanseg_lite.py
  68. 48
      paddlers/models/ppseg/models/segformer.py
  69. 4
      paddlers/models/ppseg/models/segnet.py
  70. 449
      paddlers/models/ppseg/models/sinet.py
  71. 2
      paddlers/models/ppseg/models/stdcseg.py
  72. 155
      paddlers/models/ppseg/models/topformer.py
  73. 10
      paddlers/models/ppseg/models/u2net.py
  74. 10
      paddlers/models/ppseg/models/unet.py
  75. 4
      paddlers/models/ppseg/models/unet_plusplus.py
  76. 173
      paddlers/models/ppseg/models/upernet.py
  77. 9
      paddlers/models/ppseg/transforms/functional.py
  78. 756
      paddlers/models/ppseg/transforms/transforms.py
  79. 1
      paddlers/models/ppseg/utils/__init__.py
  80. 59
      paddlers/models/ppseg/utils/config_check.py
  81. 2
      paddlers/models/ppseg/utils/env/__init__.py
  82. 2
      paddlers/models/ppseg/utils/env/seg_env.py
  83. 12
      paddlers/models/ppseg/utils/env/sys_env.py
  84. 70
      paddlers/models/ppseg/utils/metrics.py
  85. 2
      paddlers/models/ppseg/utils/train_profiler.py
  86. 2
      paddlers/models/ppseg/utils/utils.py
  87. 40
      paddlers/models/ppseg/utils/visualize.py
  88. 63
      paddlers/rs_models/cd/losses/fccdn_loss.py
  89. 2
      paddlers/rs_models/clas/__init__.py
  90. 884
      paddlers/rs_models/clas/condensenetv2.py
  91. 2
      paddlers/tasks/change_detector.py
  92. 24
      paddlers/tasks/classifier.py
  93. 2
      paddlers/tasks/restorer.py
  94. 32
      paddlers/tasks/segmenter.py
  95. 9
      paddlers/utils/checkpoint.py
  96. 4
      test_tipc/README.md
  97. 1
      test_tipc/config_utils.py
  98. 2
      test_tipc/configs/cd/_base_/airchange.yaml
  99. 2
      test_tipc/configs/cd/_base_/levircd.yaml
  100. 8
      test_tipc/configs/cd/bit/bit.yaml
  101. Some files were not shown because too many files have changed in this diff Show More

@ -34,7 +34,7 @@
### 初始化`BaseSegmenter`子类对象
- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数,分别表示输入通道数、输出类别数以及是否使用预置的混合损失。部分模型如`FarSeg`暂不支持对`in_channels`参数的设置。
- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数,分别表示输入通道数、输出类别数以及是否使用预置的混合损失。
- `use_mixed_loss`参将在未来被弃用,因此不建议使用。
- 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典,其中`'types'`键和`'coef'`键对应的值为两个等长的列表,分别表示损失函数对象(一个可调用对象)和损失函数的权重。例如:`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数:`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`,其中`logits`和`labels`分别是模型输出和真值标签。
- 不同的子类支持与模型相关的输入参数,详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。

@ -20,18 +20,21 @@ PaddleRS目前已支持的全部模型如下(标注\*的为遥感专用模型
| 变化检测 | \*FCCDN | 是 |
| 变化检测 | \*SNUNet | 是 |
| 变化检测 | \*STANet | 是 |
| 场景分类 | CondenseNetV2 | 是 |
| 场景分类 | HRNet | |
| 场景分类 | MobileNetV3 | |
| 场景分类 | ResNet50-vd | |
| 场景分类 | CondenseNet V2 | 是 |
| 场景分类 | HRNet | |
| 场景分类 | MobileNetV3 | |
| 场景分类 | ResNet50-vd | |
| 图像复原 | DRN | 否 |
| 图像复原 | ESRGAN | |
| 图像复原 | ESRGAN | |
| 图像复原 | LESRCNN | 否 |
| 目标检测 | Faster R-CNN | 是 |
| 目标检测 | PP-YOLO | 是 |
| 目标检测 | PP-YOLO Tiny | 是 |
| 目标检测 | PP-YOLOv2 | 是 |
| 目标检测 | YOLOv3 | 是 |
| 目标检测 | Faster R-CNN | 否 |
| 目标检测 | PP-YOLO | 否 |
| 目标检测 | PP-YOLO Tiny | 否 |
| 目标检测 | PP-YOLOv2 | 否 |
| 目标检测 | YOLOv3 | 否 |
| 图像分割 | BiSeNet V2 | 是 |
| 图像分割 | DeepLab V3+ | 是 |
| 图像分割 | \*FarSeg | 否 |
| 图像分割 | \*FarSeg | 是 |
| 图像分割 | Fast-SCNN | 是 |
| 图像分割 | HRNet | 是 |
| 图像分割 | UNet | 是 |

@ -53,3 +53,4 @@ PaddleRS提供从科学研究到产业应用的丰富示例,希望帮助遥感
|[【官方】第十一届 “中国软件杯”百度遥感赛项:目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测,比赛基线|
|[【十一届软件杯】遥感解译赛道:变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测,高分方案|
|[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测,高分方案|
|[遥感变化检测助力信贷场景下工程进度管控](https://aistudio.baidu.com/aistudio/projectdetail/4543160)|古代飞|产业范例|变化检测,金融风控|

@ -133,6 +133,7 @@ def parse_args(*args, **kwargs):
# Global settings
parser.add_argument('cmd', choices=['train', 'eval'])
parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
parser.add_argument('--seed', type=int, default=None)
# Data
parser.add_argument('--datasets', type=dict, default={})

@ -15,7 +15,9 @@
# limitations under the License.
import os
import random
import numpy as np
# Import cv2 and sklearn before paddlers to solve the
# "ImportError: dlopen: cannot load any more object with static TLS" issue.
import cv2
@ -62,6 +64,11 @@ if __name__ == '__main__':
cfg = parse_args()
print(format_cfg(cfg))
if cfg['seed'] is not None:
random.seed(cfg['seed'])
np.random.seed(cfg['seed'])
paddle.seed(cfg['seed'])
# Automatically download data
if cfg['download_on']:
paddlers.utils.download_and_decompress(

@ -103,11 +103,11 @@ class Predictor(object):
config.enable_use_gpu(200, gpu_id)
config.switch_ir_optim(True)
if use_trt:
if self._model.model_type == 'segmenter':
if self.model_type == 'segmenter':
logging.warning(
"Semantic segmentation models do not support TensorRT acceleration, "
"TensorRT is forcibly disabled.")
elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
elif self.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
logging.warning(
"RCNN models do not support TensorRT acceleration, "
"TensorRT is forcibly disabled.")
@ -150,30 +150,29 @@ class Predictor(object):
def preprocess(self, images, transforms):
preprocessed_samples = self._model.preprocess(
images, transforms, to_tensor=False)
if self._model.model_type == 'classifier':
if self.model_type == 'classifier':
preprocessed_samples = {'image': preprocessed_samples[0]}
elif self._model.model_type == 'segmenter':
elif self.model_type == 'segmenter':
preprocessed_samples = {
'image': preprocessed_samples[0],
'ori_shape': preprocessed_samples[1]
}
elif self._model.model_type == 'detector':
elif self.model_type == 'detector':
pass
elif self._model.model_type == 'change_detector':
elif self.model_type == 'change_detector':
preprocessed_samples = {
'image': preprocessed_samples[0],
'image2': preprocessed_samples[1],
'ori_shape': preprocessed_samples[2]
}
elif self._model.model_type == 'restorer':
elif self.model_type == 'restorer':
preprocessed_samples = {
'image': preprocessed_samples[0],
'tar_shape': preprocessed_samples[1]
}
else:
logging.error(
"Invalid model type {}".format(self._model.model_type),
exit=True)
"Invalid model type {}".format(self.model_type), exit=True)
return preprocessed_samples
def postprocess(self,
@ -182,7 +181,7 @@ class Predictor(object):
ori_shape=None,
tar_shape=None,
transforms=None):
if self._model.model_type == 'classifier':
if self.model_type == 'classifier':
true_topk = min(self._model.num_classes, topk)
if self._model.postprocess is None:
self._model.build_postprocess_from_labels(topk)
@ -198,7 +197,7 @@ class Predictor(object):
'scores_map': s,
'label_names_map': n,
} for l, s, n in zip(class_ids, scores, label_names)]
elif self._model.model_type in ('segmenter', 'change_detector'):
elif self.model_type in ('segmenter', 'change_detector'):
label_map, score_map = self._model.postprocess(
net_outputs,
batch_origin_shape=ori_shape,
@ -207,13 +206,13 @@ class Predictor(object):
'label_map': l,
'score_map': s
} for l, s in zip(label_map, score_map)]
elif self._model.model_type == 'detector':
elif self.model_type == 'detector':
net_outputs = {
k: v
for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
}
preds = self._model.postprocess(net_outputs)
elif self._model.model_type == 'restorer':
elif self.model_type == 'restorer':
res_maps = self._model.postprocess(
net_outputs[0],
batch_tar_shape=tar_shape,
@ -221,8 +220,7 @@ class Predictor(object):
preds = [{'res_map': res_map} for res_map in res_maps]
else:
logging.error(
"Invalid model type {}.".format(self._model.model_type),
exit=True)
"Invalid model type {}.".format(self.model_type), exit=True)
return preds
@ -360,6 +358,12 @@ class Predictor(object):
batch_size (int, optional): Batch size used in inference. Defaults to 1.
quiet (bool, optional): If True, disable the progress bar. Defaults to False.
"""
if self.model_type not in ('segmenter', 'change_detector'):
raise RuntimeError(
"Model type is {}, which does not support inference with sliding windows.".
format(self.model_type))
slider_predict(
partial(
self.predict, quiet=True),
@ -375,3 +379,7 @@ class Predictor(object):
def batch_predict(self, image_list, **params):
return self.predict(img_file=image_list, **params)
@property
def model_type(self):
return self._model.model_type

@ -0,0 +1 @@
ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef

@ -1,4 +1,4 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -21,88 +21,16 @@ import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['ResizeByLong']:
reverse_list.append(('resize', (h, w)))
long_edge = max(h, w)
short_edge = min(h, w)
short_edge = int(round(short_edge * op.long_size / long_edge))
long_edge = op.long_size
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
if op.__class__.__name__ in ['ResizeByShort']:
reverse_list.append(('resize', (h, w)))
long_edge = max(h, w)
short_edge = min(h, w)
long_edge = int(round(long_edge * op.short_size / short_edge))
short_edge = op.short_size
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
if op.__class__.__name__ in ['Pad']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['PadByAspectRatio']:
reverse_list.append(('padding', (h, w)))
ratio = w / h
if ratio == op.aspect_ratio:
pass
elif ratio > op.aspect_ratio:
h = int(w / op.aspect_ratio)
else:
w = int(h * op.aspect_ratio)
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
def reverse_transform(pred, trans_info, mode='nearest'):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
dtype = pred.dtype
for item in reverse_list[::-1]:
if item[0] == 'resize':
for item in trans_info[::-1]:
if isinstance(item[0], list):
trans_mode = item[0][0]
else:
trans_mode = item[0]
if trans_mode == 'resize':
h, w = item[1][0], item[1][1]
if paddle.get_device() == 'cpu' and dtype in intTypeList:
pred = paddle.cast(pred, 'float32')
@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
pred = paddle.cast(pred, dtype)
else:
pred = F.interpolate(pred, (h, w), mode=mode)
elif item[0] == 'padding':
elif trans_mode == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):
def inference(model,
im,
ori_shape=None,
transforms=None,
trans_info=None,
is_slide=False,
stride=None,
crop_size=None):
@ -216,8 +143,7 @@ def inference(model,
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
trans_info (list): Image shape informating changed process. Default: None.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
@ -239,8 +165,8 @@ def inference(model,
logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
if hasattr(model, 'data_format') and model.data_format == 'NHWC':
logit = logit.transpose((0, 3, 1, 2))
if ori_shape is not None:
logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
if trans_info is not None:
logit = reverse_transform(logit, trans_info, mode='bilinear')
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
return pred, logit
else:
@ -249,8 +175,7 @@ def inference(model,
def aug_inference(model,
im,
ori_shape,
transforms,
trans_info,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
@ -263,8 +188,7 @@ def aug_inference(model,
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
trans_info (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
@ -302,8 +226,7 @@ def aug_inference(model,
logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
final_logit = reverse_transform(
final_logit, ori_shape, transforms, mode='bilinear')
final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
return pred, final_logit

@ -36,6 +36,15 @@ def partition_list(arr, m):
return [arr[i:i + n] for i in range(0, len(arr), n)]
def preprocess(im_path, transforms):
data = {}
data['img'] = im_path
data = transforms(data)
data['img'] = data['img'][np.newaxis, ...]
data['img'] = paddle.to_tensor(data['img'])
return data
def predict(model,
model_path,
transforms,
@ -89,18 +98,13 @@ def predict(model,
color_map = visualize.get_color_map_list(256, custom_color=custom_color)
with paddle.no_grad():
for i, im_path in enumerate(img_lists[local_rank]):
im = cv2.imread(im_path)
ori_shape = im.shape[:2]
im, _ = transforms(im)
im = im[np.newaxis, ...]
im = paddle.to_tensor(im)
data = preprocess(im_path, transforms)
if aug_pred:
pred, _ = infer.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
data['img'],
trans_info=data['trans_info'],
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
@ -110,9 +114,8 @@ def predict(model,
else:
pred, _ = infer.inference(
model,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
data['img'],
trans_info=data['trans_info'],
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
@ -141,9 +144,4 @@ def predict(model,
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
# pred_im = utils.visualize(im_path, pred, weight=0.0)
# pred_saved_path = os.path.join(pred_saved_dir, im_file)
# mkdir(pred_saved_path)
# cv2.imwrite(pred_saved_path, pred_im)
progbar_pred.update(i + 1)

@ -35,17 +35,15 @@ def check_logits_losses(logits_list, losses):
.format(len_logits, len_losses))
def loss_computation(logits_list, labels, losses, edges=None):
def loss_computation(logits_list, labels, edges, losses):
check_logits_losses(logits_list, losses)
loss_list = []
for i in range(len(logits_list)):
logits = logits_list[i]
loss_i = losses['types'][i]
coef_i = losses['coef'][i]
if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
) and loss_i.edge_label:
# If use edges as labels According to loss type.
if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
# Use edges as labels According to loss type.
loss_list.append(coef_i * loss_i(logits, edges))
elif loss_i.__class__.__name__ == 'MixedLoss':
mixed_loss_list = loss_i(logits, labels)
@ -75,13 +73,14 @@ def train(model,
keep_checkpoint_max=5,
test_config=None,
precision='fp32',
amp_level='O1',
profiler_options=None,
to_static_training=False):
"""
Launch training.
Args:
modelnn.Layer): A sementic segmentation model.
modelnn.Layer): A semantic segmentation model.
train_dataset (paddle.io.Dataset): Used to read and process training datasets.
val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
optimizer (paddle.optimizer.Optimizer): The optimizer.
@ -98,6 +97,9 @@ def train(model,
keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
test_config(dict, optional): Evaluation config.
precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
amp_level (str, optional): Auto mixed precision level. Accepted values are O1 and O2: O1 represent mixed precision,
the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators
parameters and input data will be casted to fp16, except operators in black_list, dont support fp16 kernel and batchnorm. Default is O1(amp)
profiler_options (str, optional): The option of train profiler.
to_static_training (bool, optional): Whether to use @to_static for training.
"""
@ -112,7 +114,18 @@ def train(model,
if not os.path.isdir(save_dir):
if os.path.exists(save_dir):
os.remove(save_dir)
os.makedirs(save_dir)
os.makedirs(save_dir, exist_ok=True)
# use amp
if precision == 'fp16':
logger.info('use AMP to train. AMP level = {}'.format(amp_level))
scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
if amp_level == 'O2':
model, optimizer = paddle.amp.decorate(
models=model,
optimizers=optimizer,
level='O2',
save_dtype='float32')
if nranks > 1:
paddle.distributed.fleet.init(is_collective=True)
@ -130,18 +143,13 @@ def train(model,
return_list=True,
worker_init_fn=worker_init_fn, )
# use amp
if precision == 'fp16':
logger.info('use amp to train')
scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
if use_vdl:
from visualdl import LogWriter
log_writer = LogWriter(save_dir)
if to_static_training:
model = paddle.jit.to_static(model)
logger.info("Successfully to apply @to_static")
logger.info("Successfully applied @to_static")
avg_loss = 0.0
avg_loss_list = []
@ -164,30 +172,29 @@ def train(model,
else:
break
reader_cost_averager.record(time.time() - batch_start)
images = data[0]
labels = data[1].astype('int64')
images = data['img']
labels = data['label'].astype('int64')
edges = None
if len(data) == 3:
edges = data[2].astype('int64')
if 'edge' in data.keys():
edges = data['edge'].astype('int64')
if hasattr(model, 'data_format') and model.data_format == 'NHWC':
images = images.transpose((0, 2, 3, 1))
if precision == 'fp16':
with paddle.amp.auto_cast(
level=amp_level,
enable=True,
custom_white_list={
"elementwise_add", "batch_norm", "sync_batch_norm"
},
custom_black_list={'bilinear_interp_v2'}):
if nranks > 1:
logits_list = ddp_model(images)
else:
logits_list = model(images)
logits_list = ddp_model(images) if nranks > 1 else model(
images)
loss_list = loss_computation(
logits_list=logits_list,
labels=labels,
losses=losses,
edges=edges)
edges=edges,
losses=losses)
loss = sum(loss_list)
scaled = scaler.scale(loss) # scale the loss
@ -197,15 +204,12 @@ def train(model,
else:
scaler.minimize(optimizer, scaled) # update parameters
else:
if nranks > 1:
logits_list = ddp_model(images)
else:
logits_list = model(images)
logits_list = ddp_model(images) if nranks > 1 else model(images)
loss_list = loss_computation(
logits_list=logits_list,
labels=labels,
losses=losses,
edges=edges)
edges=edges,
losses=losses)
loss = sum(loss_list)
loss.backward()
# if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
@ -278,7 +282,12 @@ def train(model,
test_config = {}
mean_iou, acc, _, _, _ = evaluate(
model, val_dataset, num_workers=num_workers, **test_config)
model,
val_dataset,
num_workers=num_workers,
precision=precision,
amp_level=amp_level,
**test_config)
model.train()
@ -314,7 +323,7 @@ def train(model,
batch_start = time.time()
# Calculate flops.
if local_rank == 0:
if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
_, c, h, w = images.shape
_ = paddle.flops(
model, [1, c, h, w],

@ -34,6 +34,8 @@ def evaluate(model,
is_slide=False,
stride=None,
crop_size=None,
precision='fp32',
amp_level='O1',
num_workers=0,
print_detail=True,
auc_roc=False):
@ -41,7 +43,7 @@ def evaluate(model,
Launch evalution.
Args:
modelnn.Layer): A sementic segmentation model.
modelnn.Layer): A semantic segmentation model.
eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
@ -52,6 +54,8 @@ def evaluate(model,
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
amp_level (str, optional): Auto mixed precision level. Accepted values are O1 and O2: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, dont support fp16 kernel and batchnorm. Default is O1(amp)
num_workers (int, optional): Num workers for data loader. Default: 0.
print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
auc_roc(bool, optional): whether add auc_roc metric
@ -93,32 +97,66 @@ def evaluate(model,
batch_cost_averager = TimeAverager()
batch_start = time.time()
with paddle.no_grad():
for iter, (im, label) in enumerate(loader):
for iter, data in enumerate(loader):
reader_cost_averager.record(time.time() - batch_start)
label = label.astype('int64')
label = data['label'].astype('int64')
ori_shape = label.shape[-2:]
if aug_eval:
pred, logits = infer.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
if precision == 'fp16':
with paddle.amp.auto_cast(
level=amp_level,
enable=True,
custom_white_list={
"elementwise_add", "batch_norm",
"sync_batch_norm"
},
custom_black_list={'bilinear_interp_v2'}):
pred, logits = infer.aug_inference(
model,
data['img'],
trans_info=data['trans_info'],
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred, logits = infer.aug_inference(
model,
data['img'],
trans_info=data['trans_info'],
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred, logits = infer.inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
if precision == 'fp16':
with paddle.amp.auto_cast(
level=amp_level,
enable=True,
custom_white_list={
"elementwise_add", "batch_norm",
"sync_batch_norm"
},
custom_black_list={'bilinear_interp_v2'}):
pred, logits = infer.inference(
model,
data['img'],
trans_info=data['trans_info'],
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred, logits = infer.inference(
model,
data['img'],
trans_info=data['trans_info'],
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
intersect_area, pred_area, label_area = metrics.calculate_area(
pred,
@ -175,12 +213,12 @@ def evaluate(model,
batch_cost_averager.reset()
batch_start = time.time()
class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
label_area_all)
class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
label_area_all)
metrics_input = (intersect_area_all, pred_area_all, label_area_all)
class_iou, miou = metrics.mean_iou(*metrics_input)
acc, class_precision, class_recall = metrics.class_measurement(
*metrics_input)
kappa = metrics.kappa(*metrics_input)
class_dice, mdice = metrics.dice(*metrics_input)
if auc_roc:
auc_roc = metrics.auc_roc(
@ -193,5 +231,7 @@ def evaluate(model,
infor = infor + auc_infor if auc_roc else infor
logger.info(infor)
logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
return miou, acc, class_iou, class_acc, kappa
logger.info("[EVAL] Class Precision: \n" + str(
np.round(class_precision, 4)))
logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
return miou, acc, class_iou, class_precision, kappa

@ -15,9 +15,12 @@
import codecs
import os
from typing import Any, Dict, Generic
import warnings
from ast import literal_eval
import paddle
import yaml
import six
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import logger
@ -69,7 +72,8 @@ class Config(object):
path: str,
learning_rate: float=None,
batch_size: int=None,
iters: int=None):
iters: int=None,
opts: list=None):
if not path:
raise ValueError('Please specify the configuration file path.')
@ -84,7 +88,18 @@ class Config(object):
raise RuntimeError('Config file should in yaml format!')
self.update(
learning_rate=learning_rate, batch_size=batch_size, iters=iters)
learning_rate=learning_rate,
batch_size=batch_size,
iters=iters,
opts=opts)
model_cfg = self.dic.get('model', None)
if model_cfg is None:
raise RuntimeError('No model specified in the configuration file.')
if (not self.train_dataset_config) and (not self.val_dataset_config):
raise ValueError(
'One of `train_dataset` or `val_dataset should be given, but there are none.'
)
def _update_dic(self, dic, base_dic):
"""
@ -121,7 +136,8 @@ class Config(object):
def update(self,
learning_rate: float=None,
batch_size: int=None,
iters: int=None):
iters: int=None,
opts: list=None):
'''Update config'''
if learning_rate:
if 'lr_scheduler' in self.dic:
@ -135,6 +151,27 @@ class Config(object):
if iters:
self.dic['iters'] = iters
# fix parameters by --opts of command
if opts is not None:
if len(opts) % 2 != 0 or len(opts) == 0:
raise ValueError(
"Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
format(opts))
for key, value in zip(opts[0::2], opts[1::2]):
if isinstance(value, six.string_types):
try:
value = literal_eval(value)
except ValueError:
pass
except SyntaxError:
pass
key_list = key.split('.')
dic = self.dic
for subkey in key_list[:-1]:
dic.setdefault(subkey, dict())
dic = dic[subkey]
dic[key_list[-1]] = value
@property
def batch_size(self) -> int:
return self.dic.get('batch_size', 1)
@ -153,13 +190,32 @@ class Config(object):
'No `lr_scheduler` specified in the configuration file.')
params = self.dic.get('lr_scheduler')
use_warmup = False
if 'warmup_iters' in params:
use_warmup = True
warmup_iters = params.pop('warmup_iters')
assert 'warmup_start_lr' in params, \
"When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
warmup_start_lr = params.pop('warmup_start_lr')
end_lr = params['learning_rate']
lr_type = params.pop('type')
if lr_type == 'PolynomialDecay':
params.setdefault('decay_steps', self.iters)
iters = self.iters - warmup_iters if use_warmup else self.iters
iters = max(iters, 1)
params.setdefault('decay_steps', iters)
params.setdefault('end_lr', 0)
params.setdefault('power', 0.9)
lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
if use_warmup:
lr_sche = paddle.optimizer.lr.LinearWarmup(
learning_rate=lr_sche,
warmup_steps=warmup_iters,
start_lr=warmup_start_lr,
end_lr=end_lr)
return getattr(paddle.optimizer.lr, lr_type)(**params)
return lr_sche
@property
def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
@ -202,15 +258,33 @@ class Config(object):
args = self.optimizer_args
optimizer_type = args.pop('type')
params = self.model.parameters()
if 'backbone_lr_mult' in args:
if not hasattr(self.model, 'backbone'):
logger.warning('The backbone_lr_mult is not effective because'
' the model does not have backbone')
else:
backbone_lr_mult = args.pop('backbone_lr_mult')
backbone_params = self.model.backbone.parameters()
backbone_params_id = [id(x) for x in backbone_params]
other_params = [
x for x in params if id(x) not in backbone_params_id
]
params = [{
'params': backbone_params,
'learning_rate': backbone_lr_mult
}, {
'params': other_params
}]
if optimizer_type == 'sgd':
return paddle.optimizer.Momentum(
lr, parameters=self.model.parameters(), **args)
return paddle.optimizer.Momentum(lr, parameters=params, **args)
elif optimizer_type == 'adam':
return paddle.optimizer.Adam(
lr, parameters=self.model.parameters(), **args)
return paddle.optimizer.Adam(lr, parameters=params, **args)
elif optimizer_type in paddle.optimizer.__all__:
return getattr(paddle.optimizer, optimizer_type)(
lr, parameters=self.model.parameters(), **args)
return getattr(paddle.optimizer, optimizer_type)(lr,
parameters=params,
**args)
raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
@ -295,24 +369,6 @@ class Config(object):
@property
def model(self) -> paddle.nn.Layer:
model_cfg = self.dic.get('model').copy()
if not model_cfg:
raise RuntimeError('No model specified in the configuration file.')
if not 'num_classes' in model_cfg:
num_classes = None
if self.train_dataset_config:
if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
num_classes = self.train_dataset_class.NUM_CLASSES
elif hasattr(self.train_dataset, 'num_classes'):
num_classes = self.train_dataset.num_classes
elif self.val_dataset_config:
if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
num_classes = self.val_dataset_class.NUM_CLASSES
elif hasattr(self.val_dataset, 'num_classes'):
num_classes = self.val_dataset.num_classes
if num_classes is not None:
model_cfg['num_classes'] = num_classes
if not self._model:
self._model = self._load_object(model_cfg)
return self._model
@ -401,3 +457,94 @@ class Config(object):
def __str__(self) -> str:
return yaml.dump(self.dic)
@property
def val_transforms(self) -> list:
"""Get val_transform from val_dataset"""
_val_dataset = self.val_dataset_config
if not _val_dataset:
return []
_transforms = _val_dataset.get('transforms', [])
transforms = []
for i in _transforms:
transforms.append(self._load_object(i))
return transforms
def check_sync_info(self) -> None:
"""
Check and sync the info, such as num_classes and img_channels,
between the config of model, train_dataset and val_dataset.
"""
self._check_sync_num_classes()
self._check_sync_img_channels()
def _check_sync_num_classes(self):
num_classes_set = set()
if self.dic['model'].get('num_classes', None) is not None:
num_classes_set.add(self.dic['model'].get('num_classes'))
if self.train_dataset_config:
if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
elif 'num_classes' in self.train_dataset_config:
num_classes_set.add(self.train_dataset_config['num_classes'])
if self.val_dataset_config:
if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
elif 'num_classes' in self.val_dataset_config:
num_classes_set.add(self.val_dataset_config['num_classes'])
if len(num_classes_set) == 0:
raise ValueError(
'`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
)
elif len(num_classes_set) > 1:
raise ValueError(
'`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
.format(num_classes_set))
num_classes = num_classes_set.pop()
self.dic['model']['num_classes'] = num_classes
if self.train_dataset_config and \
(not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
self.dic['train_dataset']['num_classes'] = num_classes
if self.val_dataset_config and \
(not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
self.dic['val_dataset']['num_classes'] = num_classes
def _check_sync_img_channels(self):
img_channels_set = set()
model_cfg = self.dic['model']
# If the model has backbone, in_channels is the input params of backbone.
# Otherwise, in_channels is the input params of the model.
if 'backbone' in model_cfg:
x = model_cfg['backbone'].get('in_channels', None)
if x is not None:
img_channels_set.add(x)
elif model_cfg.get('in_channels', None) is not None:
img_channels_set.add(model_cfg.get('in_channels'))
if self.train_dataset_config and \
('img_channels' in self.train_dataset_config):
img_channels_set.add(self.train_dataset_config['img_channels'])
if self.val_dataset_config and \
('img_channels' in self.val_dataset_config):
img_channels_set.add(self.val_dataset_config['img_channels'])
if len(img_channels_set) > 1:
raise ValueError(
'`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
.format(img_channels_set))
img_channels = 3 if len(img_channels_set) == 0 \
else img_channels_set.pop()
if 'backbone' in model_cfg:
self.dic['model']['backbone']['in_channels'] = img_channels
else:
self.dic['model']['in_channels'] = img_channels
if self.train_dataset_config and \
self.train_dataset_config['type'] == "Dataset":
self.dic['train_dataset']['img_channels'] = img_channels
if self.val_dataset_config and \
self.val_dataset_config['type'] == "Dataset":
self.dic['val_dataset']['img_channels'] = img_channels

@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):
initializer = nn.initializer.KaimingUniform(**kwargs)
initializer(param, param.block)
def xavier_uniform(param, **kwargs):
r"""
This implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where
.. math::
x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
Args:
param (Tensor): Tensor that needs to be initialized.
Examples:
from paddlers.models.ppseg.cvlibs import param_init
import paddle.nn as nn
linear = nn.Linear(2, 4)
param_init.xavier_uniform(linear.weight)
"""
initializer = nn.initializer.XavierUniform(**kwargs)
initializer(param, param.block)

@ -27,3 +27,4 @@ from .drive import DRIVE
from .hrf import HRF
from .chase_db1 import CHASEDB1
from .pp_humanseg14k import PPHumanSeg14K
from .pssl import PSSLDataset

@ -89,23 +89,31 @@ class ADE20K(Dataset):
self.file_list.append([img_path, label_path])
def __getitem__(self, idx):
data = {}
data['trans_info'] = []
image_path, label_path = self.file_list[idx]
data['img'] = image_path
data['gt_fields'] = [
] # If key in gt_fields, the data[key] have transforms synchronous.
if self.mode == 'val':
im, _ = self.transforms(im=image_path)
data = self.transforms(data)
label = np.asarray(Image.open(label_path))
# The class 0 is ignored. And it will equal to 255 after
# subtracted 1, because the dtype of label is uint8.
label = label - 1
label = label[np.newaxis, :, :]
return im, label
data['label'] = label
return data
else:
im, label = self.transforms(im=image_path, label=label_path)
label = label - 1
data['label'] = label_path
data['gt_fields'].append('label')
data = self.transforms(data)
data['label'] = data['label'] - 1
# Recover the ignore pixels adding by transform
label[label == 254] = 255
data['label'][data['label'] == 254] = 255
if self.edge:
edge_mask = F.mask_to_binary_edge(
label, radius=2, num_classes=self.num_classes)
return im, label, edge_mask
else:
return im, label
data['edge'] = edge_mask
return data

@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):
Examples:
import paddlers.models.ppseg.transforms as T
import paddlers.models.ppseg as ppseg.transforms as T
from paddlers.models.ppseg.datasets import Dataset
transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
dataset_root = 'dataset_root_path'
train_path = 'train_path'
num_classes = 2
@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
"""
def __init__(self,
transforms,
mode,
dataset_root,
transforms,
num_classes,
mode='train',
img_channels=3,
train_path=None,
val_path=None,
test_path=None,
@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
ignore_index=255,
edge=False):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.transforms = Compose(transforms, img_channels=img_channels)
self.file_list = list()
self.mode = mode.lower()
self.num_classes = num_classes
self.img_channels = img_channels
self.ignore_index = ignore_index
self.edge = edge
@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
raise ValueError(
"mode should be 'train', 'val' or 'test', but got {}.".format(
self.mode))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
if not os.path.exists(self.dataset_root):
raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
if num_classes < 1:
raise ValueError(
"`num_classes` should be greater than 1, but got {}".format(
num_classes))
if img_channels not in [1, 3]:
raise ValueError("`img_channels` should in [1, 3], but got {}".
format(img_channels))
if self.mode == 'train':
if train_path is None:
@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
self.file_list.append([image_path, label_path])
def __getitem__(self, idx):
data = {}
data['trans_info'] = []
image_path, label_path = self.file_list[idx]
if self.mode == 'test':
im, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, image_path
elif self.mode == 'val':
im, _ = self.transforms(im=image_path)
label = np.asarray(Image.open(label_path))
label = label[np.newaxis, :, :]
return im, label
data['img'] = image_path
data['label'] = label_path
# If key in gt_fields, the data[key] have transforms synchronous.
data['gt_fields'] = []
if self.mode == 'val':
data = self.transforms(data)
data['label'] = data['label'][np.newaxis, :, :]
else:
im, label = self.transforms(im=image_path, label=label_path)
data['gt_fields'].append('label')
data = self.transforms(data)
if self.edge:
edge_mask = F.mask_to_binary_edge(
label, radius=2, num_classes=self.num_classes)
return im, label, edge_mask
else:
return im, label
data['label'], radius=2, num_classes=self.num_classes)
data['edge'] = edge_mask
return data
def __len__(self):
return len(self.file_list)

@ -0,0 +1,135 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
@manager.DATASETS.add_component
class PSSLDataset(Dataset):
"""
The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
is computed by the Consensus explanation algorithm.
The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation
Models" (https://arxiv.org/abs/2207.03335).
The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification
Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
as follows:
imagenet_root
|
|--train
| |--n01440764
| | |--n01440764_10026.JPEG
| | |--...
| |--nxxxxxxxx
| |--...
where only the "train" set is needed.
The PSSL dataset has the folder structure as follows:
pssl_root
|
|--train
| |--n01440764
| | |--n01440764_10026.JPEG_eiseg.npz
| | |--...
| |--nxxxxxxxx
| |--...
|
|--imagenet_lsvrc_2015_synsets.txt
|--train.txt
where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
Args:
transforms (list): Transforms for image.
imagenet_root (str): The path to the original ImageNet dataset.
pssl_root (str): The path to the PSSL dataset.
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
edge (bool, optional): Whether to compute edge while training. Default: False.
"""
ignore_index = 1001 # 0~999 is target class, 1000 is bg
NUM_CLASSES = 1001 # consider target class and bg
def __init__(self,
transforms,
imagenet_root,
pssl_root,
mode='train',
edge=False):
mode = mode.lower()
if mode not in ['train']:
raise ValueError("mode should be 'train', but got {}.".format(mode))
if transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
self.transforms = Compose(transforms)
self.mode = mode
self.edge = edge
self.num_classes = self.NUM_CLASSES
self.ignore_index = self.num_classes # 1001
self.file_list = []
self.class_id_dict = {}
if imagenet_root is None or not os.path.isdir(pssl_root):
raise ValueError(
"The dataset is not Found or the folder structure is nonconfoumance."
)
train_list_file = os.path.join(pssl_root, "train.txt")
if not os.path.exists(train_list_file):
raise ValueError("Train list file isn't exists.")
for idx, line in enumerate(open(train_list_file)):
# line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
label_path = line.strip()
img_path = label_path.split('.JPEG')[0] + '.JPEG'
label_path = os.path.join(pssl_root, label_path)
img_path = os.path.join(imagenet_root, img_path)
self.file_list.append([img_path, label_path])
# mapping class name to class id.
class_id_file = os.path.join(pssl_root,
"imagenet_lsvrc_2015_synsets.txt")
if not os.path.exists(class_id_file):
raise ValueError("Class id file isn't exists.")
for idx, line in enumerate(open(class_id_file)):
class_name = line.strip()
self.class_id_dict[class_name] = idx
def __getitem__(self, idx):
image_path, label_path = self.file_list[idx]
# transform label
class_name = (image_path.split('/')[-1]).split('_')[0]
class_id = self.class_id_dict[class_name]
pssl_seg = np.load(label_path)['arr_0']
gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
# [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
gt_semantic_seg[pssl_seg == 1] = class_id
im, label = self.transforms(im=image_path, label=gt_semantic_seg)
return im, label

@ -49,9 +49,18 @@ from .segnet import SegNet
from .encnet import ENCNet
from .hrnet_contrast import HRNetW48Contrast
from .espnet import ESPNetV2
from .pp_liteseg import PPLiteSeg
from .dmnet import DMNet
from .espnetv1 import ESPNetV1
from .enet import ENet
from .bisenetv1 import BiseNetV1
from .fastfcn import FastFCN
from .pfpnnet import PFPNNet
from .glore import GloRe
from .ddrnet import DDRNet_23
from .ccnet import CCNet
from .mobileseg import MobileSeg
from .upernet import UPerNet
from .sinet import SINet
from .lraspp import LRASPP
from .topformer import TopFormer

@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self, num_classes, pretrained=None):
def __init__(self, num_classes, in_channels=3, pretrained=None):
super().__init__()
n_channels = 3
self.encoder = Encoder(n_channels, [64, 128, 256, 512])
self.encoder = Encoder(in_channels, [64, 128, 256, 512])
filters = np.array([64, 128, 256, 512, 1024])
self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
self.att5 = AttentionBlock(

@ -21,3 +21,7 @@ from .swin_transformer import *
from .mobilenetv2 import *
from .mix_transformer import *
from .stdcnet import *
from .lite_hrnet import *
from .shufflenetv2 import *
from .ghostnet import *
from .top_transformer import *

@ -0,0 +1,318 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Uniform, KaimingNormal
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils, logger
__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
initializer=KaimingNormal(), name=name + "_weights"),
bias_attr=False)
bn_name = name + "_bn"
self._batch_norm = BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(
name=bn_name + "_scale", regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(
name=bn_name + "_offset", regularizer=L2Decay(0.0)),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance")
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class SEBlock(nn.Layer):
def __init__(self, num_channels, reduction_ratio=4, name=None):
super(SEBlock, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
stdv = 1.0 / math.sqrt(num_channels * 1.0)
med_ch = num_channels // reduction_ratio
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
bias_attr=ParamAttr(name=name + "_1_offset"))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_channels,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
bias_attr=ParamAttr(name=name + "_2_offset"))
def forward(self, inputs):
pool = self.pool2d_gap(inputs)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = paddle.clip(x=excitation, min=0, max=1)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = paddle.multiply(inputs, excitation)
return out
class GhostModule(nn.Layer):
def __init__(self,
in_channels,
output_channels,
kernel_size=1,
ratio=2,
dw_size=3,
stride=1,
relu=True,
name=None):
super(GhostModule, self).__init__()
init_channels = int(math.ceil(output_channels / ratio))
new_channels = int(init_channels * (ratio - 1))
self.primary_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=init_channels,
kernel_size=kernel_size,
stride=stride,
groups=1,
act="relu" if relu else None,
name=name + "_primary_conv")
self.cheap_operation = ConvBNLayer(
in_channels=init_channels,
out_channels=new_channels,
kernel_size=dw_size,
stride=1,
groups=init_channels,
act="relu" if relu else None,
name=name + "_cheap_operation")
def forward(self, inputs):
x = self.primary_conv(inputs)
y = self.cheap_operation(x)
out = paddle.concat([x, y], axis=1)
return out
class GhostBottleneck(nn.Layer):
def __init__(self,
in_channels,
hidden_dim,
output_channels,
kernel_size,
stride,
use_se,
name=None):
super(GhostBottleneck, self).__init__()
self._stride = stride
self._use_se = use_se
self._num_channels = in_channels
self._output_channels = output_channels
self.ghost_module_1 = GhostModule(
in_channels=in_channels,
output_channels=hidden_dim,
kernel_size=1,
stride=1,
relu=True,
name=name + "_ghost_module_1")
if stride == 2:
self.depthwise_conv = ConvBNLayer(
in_channels=hidden_dim,
out_channels=hidden_dim,
kernel_size=kernel_size,
stride=stride,
groups=hidden_dim,
act=None,
name=name +
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
if use_se:
self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
self.ghost_module_2 = GhostModule(
in_channels=hidden_dim,
output_channels=output_channels,
kernel_size=1,
relu=False,
name=name + "_ghost_module_2")
if stride != 1 or in_channels != output_channels:
self.shortcut_depthwise = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
groups=in_channels,
act=None,
name=name +
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
self.shortcut_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=output_channels,
kernel_size=1,
stride=1,
groups=1,
act=None,
name=name + "_shortcut_conv")
def forward(self, inputs):
x = self.ghost_module_1(inputs)
if self._stride == 2:
x = self.depthwise_conv(x)
if self._use_se:
x = self.se_block(x)
x = self.ghost_module_2(x)
if self._stride == 1 and self._num_channels == self._output_channels:
shortcut = inputs
else:
shortcut = self.shortcut_depthwise(inputs)
shortcut = self.shortcut_conv(shortcut)
return paddle.add(x=x, y=shortcut)
class GhostNet(nn.Layer):
def __init__(self, scale, in_channels=3, pretrained=None):
super(GhostNet, self).__init__()
self.cfgs = [
# k, t, c, SE, s
[3, 16, 16, 0, 1],
[3, 48, 24, 0, 2],
[3, 72, 24, 0, 1], # x4
[5, 72, 40, 1, 2],
[5, 120, 40, 1, 1], # x8
[3, 240, 80, 0, 2],
[3, 200, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 480, 112, 1, 1],
[3, 672, 112, 1, 1], # x16
[5, 672, 160, 1, 2],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1] # x32
]
self.scale = scale
self.pretrained = pretrained
output_channels = int(self._make_divisible(16 * self.scale, 4))
self.conv1 = ConvBNLayer(
in_channels=in_channels,
out_channels=output_channels,
kernel_size=3,
stride=2,
groups=1,
act="relu",
name="conv1")
# build inverted residual blocks
self.out_index = [2, 4, 10, 15]
self.feat_channels = []
self.ghost_bottleneck_list = []
for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
in_channels = output_channels
output_channels = int(self._make_divisible(c * self.scale, 4))
hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
ghost_bottleneck = self.add_sublayer(
name="_ghostbottleneck_" + str(idx),
sublayer=GhostBottleneck(
in_channels=in_channels,
hidden_dim=hidden_dim,
output_channels=output_channels,
kernel_size=k,
stride=s,
use_se=use_se,
name="_ghostbottleneck_" + str(idx)))
self.ghost_bottleneck_list.append(ghost_bottleneck)
if idx in self.out_index:
self.feat_channels.append(output_channels)
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, inputs):
feat_list = []
x = self.conv1(inputs)
for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
x = ghost_bottleneck(x)
if idx in self.out_index:
feat_list.append(x)
return feat_list
def _make_divisible(self, v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
@manager.BACKBONES.add_component
def GhostNet_x0_5(**kwargs):
model = GhostNet(scale=0.5, **kwargs)
return model
@manager.BACKBONES.add_component
def GhostNet_x1_0(**kwargs):
model = GhostNet(scale=1.0, **kwargs)
return model
@manager.BACKBONES.add_component
def GhostNet_x1_3(**kwargs):
model = GhostNet(scale=1.3, **kwargs)
return model

@ -37,6 +37,7 @@ class HRNet(nn.Layer):
(https://arxiv.org/pdf/1908.07919.pdf).
Args:
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path of pretrained model.
stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
@ -56,6 +57,7 @@ class HRNet(nn.Layer):
"""
def __init__(self,
in_channels=3,
pretrained=None,
stage1_num_modules=1,
stage1_num_blocks=(4, ),
@ -91,7 +93,7 @@ class HRNet(nn.Layer):
self.feat_channels = [sum(stage4_num_channels)]
self.conv_layer1_1 = layers.ConvBNReLU(
in_channels=3,
in_channels=in_channels,
out_channels=64,
kernel_size=3,
stride=2,

@ -0,0 +1,974 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on
https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
"""
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from numbers import Integral
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Normal, Constant
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg import utils
__all__ = [
"Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
"Lite_HRNet_wider_naive", "LiteHRNet"
]
def Conv2d(in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
weight_init=Normal(std=0.001),
bias_init=Constant(0.)):
weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
if bias:
bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
else:
bias_attr = False
conv = nn.Conv2D(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
weight_attr=weight_attr,
bias_attr=bias_attr)
return conv
def channel_shuffle(x, groups):
x_shape = paddle.shape(x)
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
num_channels = x.shape[1]
channels_per_group = num_channels // groups
x = paddle.reshape(
x=x, shape=[batch_size, groups, channels_per_group, height, width])
x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
class ConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
groups=1,
norm_type=None,
norm_groups=32,
norm_decay=0.,
freeze_norm=False,
act=None):
super(ConvNormLayer, self).__init__()
self.act = act
norm_lr = 0. if freeze_norm else 1.
if norm_type is not None:
assert norm_type in ['bn', 'sync_bn', 'gn'], \
"norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
param_attr = ParamAttr(
initializer=Constant(1.0),
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay), )
bias_attr = ParamAttr(
learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
global_stats = True if freeze_norm else None
if norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(
ch_out,
weight_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats, )
elif norm_type == 'gn':
self.norm = nn.GroupNorm(
num_groups=norm_groups,
num_channels=ch_out,
weight_attr=param_attr,
bias_attr=bias_attr)
norm_params = self.norm.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
conv_bias_attr = False
else:
conv_bias_attr = True
self.norm = None
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.001)),
bias_attr=conv_bias_attr)
def forward(self, inputs):
out = self.conv(inputs)
if self.norm is not None:
out = self.norm(out)
if self.act == 'relu':
out = F.relu(out)
elif self.act == 'sigmoid':
out = F.sigmoid(out)
return out
class DepthWiseSeparableConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
dw_norm_type=None,
pw_norm_type=None,
norm_decay=0.,
freeze_norm=False,
dw_act=None,
pw_act=None):
super(DepthWiseSeparableConvNormLayer, self).__init__()
self.depthwise_conv = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_in,
filter_size=filter_size,
stride=stride,
groups=ch_in,
norm_type=dw_norm_type,
act=dw_act,
norm_decay=norm_decay,
freeze_norm=freeze_norm, )
self.pointwise_conv = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
norm_type=pw_norm_type,
act=pw_act,
norm_decay=norm_decay,
freeze_norm=freeze_norm, )
def forward(self, x):
x = self.depthwise_conv(x)
x = self.pointwise_conv(x)
return x
class CrossResolutionWeightingModule(nn.Layer):
def __init__(self,
channels,
ratio=16,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(CrossResolutionWeightingModule, self).__init__()
self.channels = channels
total_channel = sum(channels)
self.conv1 = ConvNormLayer(
ch_in=total_channel,
ch_out=total_channel // ratio,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.conv2 = ConvNormLayer(
ch_in=total_channel // ratio,
ch_out=total_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='sigmoid',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
out = []
for idx, xi in enumerate(x[:-1]):
kernel_size = stride = pow(2, len(x) - idx - 1)
xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
out.append(xi)
out.append(x[-1])
out = paddle.concat(out, 1)
out = self.conv1(out)
out = self.conv2(out)
out = paddle.split(out, self.channels, 1)
out = [
s * F.interpolate(
a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
]
return out
class SpatialWeightingModule(nn.Layer):
def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
super(SpatialWeightingModule, self).__init__()
self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
self.conv1 = ConvNormLayer(
ch_in=in_channel,
ch_out=in_channel // ratio,
filter_size=1,
stride=1,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.conv2 = ConvNormLayer(
ch_in=in_channel // ratio,
ch_out=in_channel,
filter_size=1,
stride=1,
act='sigmoid',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
out = self.global_avgpooling(x)
out = self.conv1(out)
out = self.conv2(out)
return x * out
class ConditionalChannelWeightingBlock(nn.Layer):
def __init__(self,
in_channels,
stride,
reduce_ratio,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(ConditionalChannelWeightingBlock, self).__init__()
assert stride in [1, 2]
branch_channels = [channel // 2 for channel in in_channels]
self.cross_resolution_weighting = CrossResolutionWeightingModule(
branch_channels,
ratio=reduce_ratio,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.depthwise_convs = nn.LayerList([
ConvNormLayer(
channel,
channel,
filter_size=3,
stride=stride,
groups=channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay) for channel in branch_channels
])
self.spatial_weighting = nn.LayerList([
SpatialWeightingModule(
channel,
ratio=4,
freeze_norm=freeze_norm,
norm_decay=norm_decay) for channel in branch_channels
])
def forward(self, x):
x = [s.chunk(2, axis=1) for s in x]
x1 = [s[0] for s in x]
x2 = [s[1] for s in x]
x2 = self.cross_resolution_weighting(x2)
x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
out = [channel_shuffle(s, groups=2) for s in out]
return out
class ShuffleUnit(nn.Layer):
def __init__(self,
in_channel,
out_channel,
stride,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(ShuffleUnit, self).__init__()
branch_channel = out_channel // 2
self.stride = stride
if self.stride == 1:
assert in_channel == branch_channel * 2, \
"when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
if stride > 1:
self.branch1 = nn.Sequential(
ConvNormLayer(
ch_in=in_channel,
ch_out=in_channel,
filter_size=3,
stride=self.stride,
groups=in_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=in_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
self.branch2 = nn.Sequential(
ConvNormLayer(
ch_in=branch_channel if stride == 1 else in_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=3,
stride=self.stride,
groups=branch_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
def forward(self, x):
if self.stride > 1:
x1 = self.branch1(x)
x2 = self.branch2(x)
else:
x1, x2 = x.chunk(2, axis=1)
x2 = self.branch2(x2)
out = paddle.concat([x1, x2], axis=1)
out = channel_shuffle(out, groups=2)
return out
class IterativeHead(nn.Layer):
def __init__(self,
in_channels,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(IterativeHead, self).__init__()
num_branches = len(in_channels)
self.in_channels = in_channels[::-1]
projects = []
for i in range(num_branches):
if i != num_branches - 1:
projects.append(
DepthWiseSeparableConvNormLayer(
ch_in=self.in_channels[i],
ch_out=self.in_channels[i + 1],
filter_size=3,
stride=1,
dw_act=None,
pw_act='relu',
dw_norm_type=norm_type,
pw_norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
else:
projects.append(
DepthWiseSeparableConvNormLayer(
ch_in=self.in_channels[i],
ch_out=self.in_channels[i],
filter_size=3,
stride=1,
dw_act=None,
pw_act='relu',
dw_norm_type=norm_type,
pw_norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
self.projects = nn.LayerList(projects)
def forward(self, x):
x = x[::-1]
y = []
last_x = None
for i, s in enumerate(x):
if last_x is not None:
last_x = F.interpolate(
last_x,
size=paddle.shape(s)[-2:],
mode='bilinear',
align_corners=True)
s = s + last_x
s = self.projects[i](s)
y.append(s)
last_x = s
return y[::-1]
class Stem(nn.Layer):
def __init__(self,
in_channel,
stem_channel,
out_channel,
expand_ratio,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(Stem, self).__init__()
self.conv1 = ConvNormLayer(
in_channel,
stem_channel,
filter_size=3,
stride=2,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
mid_channel = int(round(stem_channel * expand_ratio))
branch_channel = stem_channel // 2
if stem_channel == out_channel:
inc_channel = out_channel - branch_channel
else:
inc_channel = out_channel - stem_channel
self.branch1 = nn.Sequential(
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=3,
stride=2,
groups=branch_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=inc_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
self.expand_conv = ConvNormLayer(
ch_in=branch_channel,
ch_out=mid_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.depthwise_conv = ConvNormLayer(
ch_in=mid_channel,
ch_out=mid_channel,
filter_size=3,
stride=2,
groups=mid_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.linear_conv = ConvNormLayer(
ch_in=mid_channel,
ch_out=branch_channel
if stem_channel == out_channel else stem_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
x = self.conv1(x)
x1, x2 = x.chunk(2, axis=1)
x1 = self.branch1(x1)
x2 = self.expand_conv(x2)
x2 = self.depthwise_conv(x2)
x2 = self.linear_conv(x2)
out = paddle.concat([x1, x2], axis=1)
out = channel_shuffle(out, groups=2)
return out
class LiteHRNetModule(nn.Layer):
def __init__(self,
num_branches,
num_blocks,
in_channels,
reduce_ratio,
module_type,
multiscale_output=False,
with_fuse=True,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(LiteHRNetModule, self).__init__()
assert num_branches == len(in_channels),\
"num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
assert module_type in [
'LITE', 'NAIVE'
], "module_type should be one of ['LITE', 'NAIVE']"
self.num_branches = num_branches
self.in_channels = in_channels
self.multiscale_output = multiscale_output
self.with_fuse = with_fuse
self.norm_type = 'bn'
self.module_type = module_type
if self.module_type == 'LITE':
self.layers = self._make_weighting_blocks(
num_blocks,
reduce_ratio,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
elif self.module_type == 'NAIVE':
self.layers = self._make_naive_branches(
num_branches,
num_blocks,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
if self.with_fuse:
self.fuse_layers = self._make_fuse_layers(
freeze_norm=freeze_norm, norm_decay=norm_decay)
self.relu = nn.ReLU()
def _make_weighting_blocks(self,
num_blocks,
reduce_ratio,
stride=1,
freeze_norm=False,
norm_decay=0.):
layers = []
for i in range(num_blocks):
layers.append(
ConditionalChannelWeightingBlock(
self.in_channels,
stride=stride,
reduce_ratio=reduce_ratio,
norm_type=self.norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
return nn.Sequential(*layers)
def _make_naive_branches(self,
num_branches,
num_blocks,
freeze_norm=False,
norm_decay=0.):
branches = []
for branch_idx in range(num_branches):
layers = []
for i in range(num_blocks):
layers.append(
ShuffleUnit(
self.in_channels[branch_idx],
self.in_channels[branch_idx],
stride=1,
norm_type=self.norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
branches.append(nn.Sequential(*layers))
return nn.LayerList(branches)
def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
if self.num_branches == 1:
return None
fuse_layers = []
num_out_branches = self.num_branches if self.multiscale_output else 1
for i in range(num_out_branches):
fuse_layer = []
for j in range(self.num_branches):
if j > i:
fuse_layer.append(
nn.Sequential(
Conv2d(
self.in_channels[j],
self.in_channels[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[i]),
nn.Upsample(
scale_factor=2**(j - i), mode='nearest')))
elif j == i:
fuse_layer.append(None)
else:
conv_downsamples = []
for k in range(i - j):
if k == i - j - 1:
conv_downsamples.append(
nn.Sequential(
Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=3,
stride=2,
padding=1,
groups=self.in_channels[j],
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
Conv2d(
self.in_channels[j],
self.in_channels[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[i])))
else:
conv_downsamples.append(
nn.Sequential(
Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=3,
stride=2,
padding=1,
groups=self.in_channels[j],
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
nn.ReLU()))
fuse_layer.append(nn.Sequential(*conv_downsamples))
fuse_layers.append(nn.LayerList(fuse_layer))
return nn.LayerList(fuse_layers)
def forward(self, x):
if self.num_branches == 1:
return [self.layers[0](x[0])]
if self.module_type == 'LITE':
out = self.layers(x)
elif self.module_type == 'NAIVE':
for i in range(self.num_branches):
x[i] = self.layers[i](x[i])
out = x
if self.with_fuse:
out_fuse = []
for i in range(len(self.fuse_layers)):
y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
for j in range(self.num_branches):
if j == 0:
y += y
elif i == j:
y += out[j]
else:
y += self.fuse_layers[i][j](out[j])
if i == 0:
out[i] = y
out_fuse.append(self.relu(y))
out = out_fuse
elif not self.multiscale_output:
out = [out[0]]
return out
class LiteHRNet(nn.Layer):
"""
@inproceedings{Yulitehrnet21,
title={Lite-HRNet: A Lightweight High-Resolution Network},
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
booktitle={CVPR},year={2021}
}
Args:
network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
"naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
"wider_naive": Naive network with wider channels in each block.
"lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
"lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
in_channels (int, optional): The channels of input image. Default: 3.
freeze_at (int): the stage to freeze
freeze_norm (bool): whether to freeze norm in HRNet
norm_decay (float): weight decay for normalization layer weights
return_idx (List): the stage to return
"""
def __init__(self,
network_type,
in_channels=3,
freeze_at=0,
freeze_norm=True,
norm_decay=0.,
return_idx=[0, 1, 2, 3],
use_head=False,
pretrained=None):
super(LiteHRNet, self).__init__()
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
"the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
assert len(return_idx) > 0, "need one or more return index"
self.freeze_at = freeze_at
self.freeze_norm = freeze_norm
self.norm_decay = norm_decay
self.return_idx = return_idx
self.norm_type = 'bn'
self.use_head = use_head
self.pretrained = pretrained
self.module_configs = {
"lite_18": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["LITE", "LITE", "LITE"],
"reduce_ratios": [8, 8, 8],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
"lite_30": {
"num_modules": [3, 8, 3],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["LITE", "LITE", "LITE"],
"reduce_ratios": [8, 8, 8],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
"naive": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["NAIVE", "NAIVE", "NAIVE"],
"reduce_ratios": [1, 1, 1],
"num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
},
"wider_naive": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["NAIVE", "NAIVE", "NAIVE"],
"reduce_ratios": [1, 1, 1],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
}
self.stages_config = self.module_configs[network_type]
self.stem = Stem(in_channels, 32, 32, 1)
num_channels_pre_layer = [32]
for stage_idx in range(3):
num_channels = self.stages_config["num_channels"][stage_idx]
setattr(self, 'transition{}'.format(stage_idx),
self._make_transition_layer(num_channels_pre_layer,
num_channels, self.freeze_norm,
self.norm_decay))
stage, num_channels_pre_layer = self._make_stage(
self.stages_config, stage_idx, num_channels, True,
self.freeze_norm, self.norm_decay)
setattr(self, 'stage{}'.format(stage_idx), stage)
num_channels = self.stages_config["num_channels"][-1]
self.feat_channels = num_channels
if self.use_head:
self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
self.freeze_norm, self.norm_decay)
self.feat_channels = [num_channels[0]]
for i in range(1, len(num_channels)):
self.feat_channels.append(num_channels[i] // 2)
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def _make_transition_layer(self,
num_channels_pre_layer,
num_channels_cur_layer,
freeze_norm=False,
norm_decay=0.):
num_branches_pre = len(num_channels_pre_layer)
num_branches_cur = len(num_channels_cur_layer)
transition_layers = []
for i in range(num_branches_cur):
if i < num_branches_pre:
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
transition_layers.append(
nn.Sequential(
Conv2d(
num_channels_pre_layer[i],
num_channels_pre_layer[i],
kernel_size=3,
stride=1,
padding=1,
groups=num_channels_pre_layer[i],
bias=False),
nn.BatchNorm2D(num_channels_pre_layer[i]),
Conv2d(
num_channels_pre_layer[i],
num_channels_cur_layer[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(num_channels_cur_layer[i]),
nn.ReLU()))
else:
transition_layers.append(None)
else:
conv_downsamples = []
for j in range(i + 1 - num_branches_pre):
conv_downsamples.append(
nn.Sequential(
Conv2d(
num_channels_pre_layer[-1],
num_channels_pre_layer[-1],
groups=num_channels_pre_layer[-1],
kernel_size=3,
stride=2,
padding=1,
bias=False, ),
nn.BatchNorm2D(num_channels_pre_layer[-1]),
Conv2d(
num_channels_pre_layer[-1],
num_channels_cur_layer[i]
if j == i - num_branches_pre else
num_channels_pre_layer[-1],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(num_channels_cur_layer[i]
if j == i - num_branches_pre else
num_channels_pre_layer[-1]),
nn.ReLU()))
transition_layers.append(nn.Sequential(*conv_downsamples))
return nn.LayerList(transition_layers)
def _make_stage(self,
stages_config,
stage_idx,
in_channels,
multiscale_output,
freeze_norm=False,
norm_decay=0.):
num_modules = stages_config["num_modules"][stage_idx]
num_branches = stages_config["num_branches"][stage_idx]
num_blocks = stages_config["num_blocks"][stage_idx]
reduce_ratio = stages_config['reduce_ratios'][stage_idx]
module_type = stages_config['module_type'][stage_idx]
modules = []
for i in range(num_modules):
if not multiscale_output and i == num_modules - 1:
reset_multiscale_output = False
else:
reset_multiscale_output = True
modules.append(
LiteHRNetModule(
num_branches,
num_blocks,
in_channels,
reduce_ratio,
module_type,
multiscale_output=reset_multiscale_output,
with_fuse=True,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
in_channels = modules[-1].in_channels
return nn.Sequential(*modules), in_channels
def forward(self, x):
x = self.stem(x)
y_list = [x]
for stage_idx in range(3):
x_list = []
transition = getattr(self, 'transition{}'.format(stage_idx))
for j in range(self.stages_config["num_branches"][stage_idx]):
if transition[j] is not None:
if j >= len(y_list):
x_list.append(transition[j](y_list[-1]))
else:
x_list.append(transition[j](y_list[j]))
else:
x_list.append(y_list[j])
y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
if self.use_head:
y_list = self.head_layer(y_list)
res = []
for i, layer in enumerate(y_list):
if i == self.freeze_at:
layer.stop_gradient = True
if i in self.return_idx:
res.append(layer)
return res
@manager.BACKBONES.add_component
def Lite_HRNet_18(**kwargs):
model = LiteHRNet(network_type="lite_18", **kwargs)
return model
@manager.BACKBONES.add_component
def Lite_HRNet_30(**kwargs):
model = LiteHRNet(network_type="lite_30", **kwargs)
return model
@manager.BACKBONES.add_component
def Lite_HRNet_naive(**kwargs):
model = LiteHRNet(network_type="naive", **kwargs)
return model
@manager.BACKBONES.add_component
def Lite_HRNet_wider_naive(**kwargs):
model = LiteHRNet(network_type="wider_naive", **kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
in_channels=3,
num_classes=1000,
embed_dims=[64, 128, 256, 512],
num_heads=[1, 2, 4, 8],
@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
img_size=img_size,
patch_size=7,
stride=4,
in_chans=in_chans,
in_chans=in_channels,
embed_dim=embed_dims[0])
self.patch_embed2 = OverlapPatchEmbed(
img_size=img_size // 4,

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -12,13 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg import utils
__all__ = [
"MobileNetV2_x0_25",
"MobileNetV2_x0_5",
"MobileNetV2_x0_75",
"MobileNetV2_x1_0",
"MobileNetV2_x1_5",
"MobileNetV2_x2_0",
]
@manager.BACKBONES.add_component
class MobileNetV2(nn.Layer):
"""
The MobileNetV2 implementation based on PaddlePaddle.
@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
(https://arxiv.org/abs/1801.04381).
Args:
channel_ratio (float, optional): The ratio of channel. Default: 1.0
min_channel (int, optional): The minimum of channel. Default: 16
scale (float, optional): The scale of channel. Default: 1.0
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None
"""
def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
super(MobileNetV2, self).__init__()
self.channel_ratio = channel_ratio
self.min_channel = min_channel
def __init__(self, scale=1.0, in_channels=3, pretrained=None):
super().__init__()
self.scale = scale
self.pretrained = pretrained
prefix_name = ""
self.stage0 = conv_bn(3, self.depth(32), 3, 2)
self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
self.stage2 = nn.Sequential(
InvertedResidual(self.depth(16), self.depth(24), 2, 6),
InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
self.stage3 = nn.Sequential(
InvertedResidual(self.depth(24), self.depth(32), 2, 6),
InvertedResidual(self.depth(32), self.depth(32), 1, 6),
InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2), # x4
(6, 32, 3, 2), # x8
(6, 64, 4, 2),
(6, 96, 3, 1), # x16
(6, 160, 3, 2),
(6, 320, 1, 1), # x32
]
self.out_index = [1, 2, 4, 6]
self.stage4 = nn.Sequential(
InvertedResidual(self.depth(32), self.depth(64), 2, 6),
InvertedResidual(self.depth(64), self.depth(64), 1, 6),
InvertedResidual(self.depth(64), self.depth(64), 1, 6),
InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
self.conv1 = ConvBNLayer(
num_channels=in_channels,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
name=prefix_name + "conv1_1")
self.stage5 = nn.Sequential(
InvertedResidual(self.depth(64), self.depth(96), 1, 6),
InvertedResidual(self.depth(96), self.depth(96), 1, 6),
InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
self.block_list = []
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
block = self.add_sublayer(
prefix_name + "conv" + str(i),
sublayer=InvresiBlocks(
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s,
name=prefix_name + "conv" + str(i)))
self.block_list.append(block)
in_c = int(c * scale)
self.stage6 = nn.Sequential(
InvertedResidual(self.depth(96), self.depth(160), 2, 6),
InvertedResidual(self.depth(160), self.depth(160), 1, 6),
InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
out_channels = [
bottleneck_params_list[idx][1] for idx in self.out_index
]
self.feat_channels = [int(c * scale) for c in out_channels]
self.init_weight()
def depth(self, channels):
min_channel = min(channels, self.min_channel)
return max(min_channel, int(channels * self.channel_ratio))
def forward(self, x):
def forward(self, inputs):
feat_list = []
feature_1_2 = self.stage0(x)
feature_1_2 = self.stage1(feature_1_2)
feature_1_4 = self.stage2(feature_1_2)
feature_1_8 = self.stage3(feature_1_4)
feature_1_16 = self.stage4(feature_1_8)
feature_1_16 = self.stage5(feature_1_16)
feature_1_32 = self.stage6(feature_1_16)
feature_1_32 = self.stage7(feature_1_32)
feat_list.append(feature_1_4)
feat_list.append(feature_1_8)
feat_list.append(feature_1_16)
feat_list.append(feature_1_32)
y = self.conv1(inputs, if_act=True)
for idx, block in enumerate(self.block_list):
y = block(y)
if idx in self.out_index:
feat_list.append(y)
return feat_list
def init_weight(self):
@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
utils.load_entire_model(self, self.pretrained)
def conv_bn(inp, oup, kernel, stride):
return nn.Sequential(
nn.Conv2D(
in_channels=inp,
out_channels=oup,
kernel_size=kernel,
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
name=None,
use_cudnn=True):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
param_attr=ParamAttr(name=name + "_bn_scale"),
bias_attr=ParamAttr(name=name + "_bn_offset"),
moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance")
def forward(self, inputs, if_act=True):
y = self._conv(inputs)
y = self._batch_norm(y)
if if_act:
y = F.relu6(y)
return y
class InvertedResidualUnit(nn.Layer):
def __init__(self, num_channels, num_in_filter, num_filters, stride,
filter_size, padding, expansion_factor, name):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_expand")
self._bottleneck_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=(kernel - 1) // 2,
bias_attr=False),
nn.BatchNorm2D(
num_features=oup, epsilon=1e-05, momentum=0.1),
nn.ReLU())
class InvertedResidual(nn.Layer):
def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
self.use_res_connect = self.stride == 1 and inp == oup
self.conv = nn.Sequential(
nn.Conv2D(
inp,
inp * expand_ratio,
kernel_size=1,
stride=1,
padding=0,
dilation=1,
groups=1,
bias_attr=False),
nn.BatchNorm2D(
num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
nn.ReLU(),
nn.Conv2D(
inp * expand_ratio,
inp * expand_ratio,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=inp * expand_ratio,
bias_attr=False),
nn.BatchNorm2D(
num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
nn.ReLU(),
nn.Conv2D(
inp * expand_ratio,
oup,
kernel_size=1,
stride=1,
padding=0,
dilation=1,
groups=1,
bias_attr=False),
nn.BatchNorm2D(
num_features=oup, epsilon=1e-05, momentum=0.1), )
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
padding=padding,
num_groups=num_expfilter,
use_cudnn=False,
name=name + "_dwise")
self._linear_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_linear")
def forward(self, inputs, ifshortcut):
y = self._expand_conv(inputs, if_act=True)
y = self._bottleneck_conv(y, if_act=True)
y = self._linear_conv(y, if_act=False)
if ifshortcut:
y = paddle.add(inputs, y)
return y
class InvresiBlocks(nn.Layer):
def __init__(self, in_c, t, c, n, s, name):
super(InvresiBlocks, self).__init__()
self._first_block = InvertedResidualUnit(
num_channels=in_c,
num_in_filter=in_c,
num_filters=c,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_1")
self._block_list = []
for i in range(1, n):
block = self.add_sublayer(
name + "_" + str(i + 1),
sublayer=InvertedResidualUnit(
num_channels=c,
num_in_filter=c,
num_filters=c,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_" + str(i + 1)))
self._block_list.append(block)
def forward(self, inputs):
y = self._first_block(inputs, ifshortcut=False)
for block in self._block_list:
y = block(y, ifshortcut=True)
return y
@manager.BACKBONES.add_component
def MobileNetV2_x0_25(**kwargs):
model = MobileNetV2(scale=0.25, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x0_5(**kwargs):
model = MobileNetV2(scale=0.5, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x0_75(**kwargs):
model = MobileNetV2(scale=0.75, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x1_0(**kwargs):
model = MobileNetV2(scale=1.0, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x1_5(**kwargs):
model = MobileNetV2(scale=1.5, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x2_0(**kwargs):
model = MobileNetV2(scale=2.0, **kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -14,10 +14,12 @@
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.utils import utils, logger
from paddlers.models.ppseg.models import layers
__all__ = [
@ -28,8 +30,92 @@ __all__ = [
"MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
]
def make_divisible(v, divisor=8, min_value=None):
MODEL_STAGES_PATTERN = {
"MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
"MobileNetV3_large":
["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
}
# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
# k: kernel_size
# exp: middle channel number in depthwise block
# c: output channel number in depthwise block
# se: whether to use SE block
# act: which activation to use
# s: stride in depthwise block
# d: dilation rate in depthwise block
NET_CONFIG = {
"large": [
# k, exp, c, se, act, s
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # x4
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # x8
[3, 240, 80, False, "hardswish", 2],
[3, 200, 80, False, "hardswish", 1],
[3, 184, 80, False, "hardswish", 1],
[3, 184, 80, False, "hardswish", 1],
[3, 480, 112, True, "hardswish", 1],
[3, 672, 112, True, "hardswish", 1], # x16
[5, 672, 160, True, "hardswish", 2],
[5, 960, 160, True, "hardswish", 1],
[5, 960, 160, True, "hardswish", 1], # x32
],
"small": [
# k, exp, c, se, act, s
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hardswish", 2],
[5, 240, 40, True, "hardswish", 1],
[5, 240, 40, True, "hardswish", 1],
[5, 120, 48, True, "hardswish", 1],
[5, 144, 48, True, "hardswish", 1],
[5, 288, 96, True, "hardswish", 2],
[5, 576, 96, True, "hardswish", 1],
[5, 576, 96, True, "hardswish", 1],
],
"large_os8": [
# k, exp, c, se, act, s, {d}
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # x4
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # x8
[3, 240, 80, False, "hardswish", 1],
[3, 200, 80, False, "hardswish", 1, 2],
[3, 184, 80, False, "hardswish", 1, 2],
[3, 184, 80, False, "hardswish", 1, 2],
[3, 480, 112, True, "hardswish", 1, 2],
[3, 672, 112, True, "hardswish", 1, 2],
[5, 672, 160, True, "hardswish", 1, 2],
[5, 960, 160, True, "hardswish", 1, 4],
[5, 960, 160, True, "hardswish", 1, 4],
],
"small_os8": [
# k, exp, c, se, act, s, {d}
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hardswish", 1],
[5, 240, 40, True, "hardswish", 1, 2],
[5, 240, 40, True, "hardswish", 1, 2],
[5, 120, 48, True, "hardswish", 1, 2],
[5, 144, 48, True, "hardswish", 1, 2],
[5, 288, 96, True, "hardswish", 1, 2],
[5, 576, 96, True, "hardswish", 1, 4],
[5, 576, 96, True, "hardswish", 1, 4],
]
}
OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
def _make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
return new_v
class MobileNetV3(nn.Layer):
"""
The MobileNetV3 implementation based on PaddlePaddle.
def _create_act(act):
if act == "hardswish":
return nn.Hardswish()
elif act == "relu":
return nn.ReLU()
elif act is None:
return None
else:
raise RuntimeError(
"The activation function is not supported: {}".format(act))
The original article refers to Jingdong
Andrew Howard, et, al. "Searching for MobileNetV3"
(https://arxiv.org/pdf/1905.02244.pdf).
class MobileNetV3(nn.Layer):
"""
MobileNetV3
Args:
pretrained (str, optional): The path of pretrained model.
scale (float, optional): The scale of channels . Default: 1.0.
model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
config: list. MobileNetV3 depthwise blocks config.
in_channels (int, optional): The channels of input image. Default: 3.
scale: float=1.0. The coefficient that controls the size of network parameters.
Returns:
model: nn.Layer. Specific MobileNetV3 model depends on args.
"""
def __init__(self,
pretrained=None,
config,
stages_pattern,
out_index,
in_channels=3,
scale=1.0,
model_name="small",
output_stride=None):
super(MobileNetV3, self).__init__()
pretrained=None):
super().__init__()
self.cfg = config
self.out_index = out_index
self.scale = scale
self.pretrained = pretrained
inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # output 1 -> out_index=2
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # output 2 -> out_index=5
[3, 240, 80, False, "hard_swish", 2],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish",
1], # output 3 -> out_index=11
[5, 672, 160, True, "hard_swish", 2],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish",
1], # output 3 -> out_index=14
]
self.out_indices = [2, 5, 11, 14]
self.feat_channels = [
make_divisible(i * scale) for i in [24, 40, 112, 160]
]
self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, "relu", 2], # output 1 -> out_index=0
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1], # output 2 -> out_index=3
[5, 96, 40, True, "hard_swish", 2],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7
[5, 288, 96, True, "hard_swish", 2],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10
]
self.out_indices = [0, 3, 7, 10]
self.feat_channels = [
make_divisible(i * scale) for i in [16, 24, 48, 96]
]
self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280
else:
raise NotImplementedError(
"mode[{}_model] is not implemented!".format(model_name))
###################################################
# modify stride and dilation based on output_stride
self.dilation_cfg = [1] * len(self.cfg)
self.modify_bottle_params(output_stride=output_stride)
###################################################
self.conv1 = ConvBNLayer(
in_c=3,
out_c=make_divisible(inplanes * scale),
self.conv = ConvBNLayer(
in_c=in_channels,
out_c=_make_divisible(inplanes * self.scale),
filter_size=3,
stride=2,
padding=1,
num_groups=1,
if_act=True,
act="hard_swish")
self.block_list = []
inplanes = make_divisible(inplanes * scale)
for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
######################################
# add dilation rate
dilation_rate = self.dilation_cfg[i]
######################################
self.block_list.append(
ResidualUnit(
in_c=inplanes,
mid_c=make_divisible(scale * exp),
out_c=make_divisible(scale * c),
filter_size=k,
stride=s,
dilation=dilation_rate,
use_se=se,
act=nl,
name="conv" + str(i + 2)))
self.add_sublayer(
sublayer=self.block_list[-1], name="conv" + str(i + 2))
inplanes = make_divisible(scale * c)
self.pretrained = pretrained
act="hardswish")
self.blocks = nn.Sequential(*[
ResidualUnit(
in_c=_make_divisible(inplanes * self.scale if i == 0 else
self.cfg[i - 1][2] * self.scale),
mid_c=_make_divisible(self.scale * exp),
out_c=_make_divisible(self.scale * c),
filter_size=k,
stride=s,
use_se=se,
act=act,
dilation=td[0] if td else 1)
for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
])
out_channels = [config[idx][2] for idx in self.out_index]
self.feat_channels = [
_make_divisible(self.scale * c) for c in out_channels
]
self.init_res(stages_pattern)
self.init_weight()
def modify_bottle_params(self, output_stride=None):
if output_stride is not None and output_stride % 2 != 0:
raise ValueError("output stride must to be even number")
if output_stride is not None:
stride = 2
rate = 1
for i, _cfg in enumerate(self.cfg):
stride = stride * _cfg[-1]
if stride > output_stride:
rate = rate * _cfg[-1]
self.cfg[i][-1] = 1
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def init_res(self, stages_pattern, return_patterns=None,
return_stages=None):
if return_patterns and return_stages:
msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
logger.warning(msg)
return_stages = None
if return_stages is True:
return_patterns = stages_pattern
# return_stages is int or bool
if type(return_stages) is int:
return_stages = [return_stages]
if isinstance(return_stages, list):
if max(return_stages) > len(stages_pattern) or min(
return_stages) < 0:
msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
logger.warning(msg)
return_stages = [
val for val in return_stages
if val >= 0 and val < len(stages_pattern)
]
return_patterns = [stages_pattern[i] for i in return_stages]
self.dilation_cfg[i] = rate
def forward(self, x):
x = self.conv(x)
def forward(self, inputs, label=None):
x = self.conv1(inputs)
# A feature list saves each downsampling feature.
feat_list = []
for i, block in enumerate(self.block_list):
for idx, block in enumerate(self.blocks):
x = block(x)
if i in self.out_indices:
if idx in self.out_index:
feat_list.append(x)
return feat_list
def init_weight(self):
if self.pretrained is not None:
utils.load_pretrained_model(self, self.pretrained)
class ConvBNLayer(nn.Layer):
def __init__(self,
@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
filter_size,
stride,
padding,
dilation=1,
num_groups=1,
if_act=True,
act=None):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
act=None,
dilation=1):
super().__init__()
self.conv = nn.Conv2D(
self.conv = Conv2D(
in_channels=in_c,
out_channels=out_c,
kernel_size=filter_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=num_groups,
bias_attr=False)
self.bn = layers.SyncBatchNorm(
num_features=out_c,
weight_attr=paddle.ParamAttr(
regularizer=paddle.regularizer.L2Decay(0.0)),
bias_attr=paddle.ParamAttr(
regularizer=paddle.regularizer.L2Decay(0.0)))
self._act_op = layers.Activation(act='hardswish')
bias_attr=False,
dilation=dilation)
self.bn = BatchNorm(
num_channels=out_c,
act=None,
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.if_act = if_act
self.act = _create_act(act)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
x = self._act_op(x)
x = self.act(x)
return x
@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
filter_size,
stride,
use_se,
dilation=1,
act=None,
name=''):
super(ResidualUnit, self).__init__()
dilation=1):
super().__init__()
self.if_shortcut = stride == 1 and in_c == out_c
self.if_se = use_se
@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
padding=0,
if_act=True,
act=act)
self.bottleneck_conv = ConvBNLayer(
in_c=mid_c,
out_c=mid_c,
filter_size=filter_size,
stride=stride,
padding='same',
dilation=dilation,
padding=int((filter_size - 1) // 2) * dilation,
num_groups=mid_c,
if_act=True,
act=act)
act=act,
dilation=dilation)
if self.if_se:
self.mid_se = SEModule(mid_c, name=name + "_se")
self.mid_se = SEModule(mid_c)
self.linear_conv = ConvBNLayer(
in_c=mid_c,
out_c=out_c,
@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
padding=0,
if_act=False,
act=None)
self.dilation = dilation
def forward(self, inputs):
x = self.expand_conv(inputs)
def forward(self, x):
identity = x
x = self.expand_conv(x)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = inputs + x
x = paddle.add(identity, x)
return x
# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
class Hardsigmoid(nn.Layer):
def __init__(self, slope=0.2, offset=0.5):
super().__init__()
self.slope = slope
self.offset = offset
def forward(self, x):
return nn.functional.hardsigmoid(
x, slope=self.slope, offset=self.offset)
class SEModule(nn.Layer):
def __init__(self, channel, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.conv1 = nn.Conv2D(
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.conv2 = nn.Conv2D(
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = F.hardsigmoid(outputs)
return paddle.multiply(x=inputs, y=outputs)
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
return paddle.multiply(x=identity, y=x)
@manager.BACKBONES.add_component
def MobileNetV3_small_x0_35(**kwargs):
model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.35,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x0_5(**kwargs):
model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x0_75(**kwargs):
model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_0(**kwargs):
model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_25(**kwargs):
model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=1.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x0_35(**kwargs):
model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.35,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x0_5(**kwargs):
model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x0_75(**kwargs):
model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_0(**kwargs):
model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_25(**kwargs):
model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=1.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_0_os8(**kwargs):
model = MobileNetV3(
config=NET_CONFIG["large_os8"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_0_os8(**kwargs):
model = MobileNetV3(
config=NET_CONFIG["small_os8"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path of pretrained model.
"""
def __init__(self,
input_channel=3,
layers=50,
output_stride=8,
multi_grid=(1, 1, 1),
in_channels=3,
pretrained=None,
data_format='NCHW'):
super(ResNet_vd, self).__init__()
@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
dilation_dict = {3: 2}
self.conv1_1 = ConvBNLayer(
in_channels=input_channel,
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,

@ -0,0 +1,315 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr, reshape, transpose, concat, split
from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
from paddle.nn.initializer import KaimingNormal
from paddle.nn.functional import swish
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils, logger
__all__ = [
'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
'ShuffleNetV2_swish'
]
def channel_shuffle(x, groups):
x_shape = paddle.shape(x)
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
num_channels = x.shape[1]
channels_per_group = num_channels // groups
# reshape
x = reshape(
x=x, shape=[batch_size, groups, channels_per_group, height, width])
# transpose
x = transpose(x=x, perm=[0, 2, 1, 3, 4])
# flatten
x = reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
class ConvBNLayer(Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(
initializer=KaimingNormal(), name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
out_channels,
param_attr=ParamAttr(name=name + "_bn_scale"),
bias_attr=ParamAttr(name=name + "_bn_offset"),
act=act,
moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance")
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class InvertedResidual(Layer):
def __init__(self, in_channels, out_channels, stride, act="relu",
name=None):
super(InvertedResidual, self).__init__()
self._conv_pw = ConvBNLayer(
in_channels=in_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv1')
self._conv_dw = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=out_channels // 2,
act=None,
name='stage_' + name + '_conv2')
self._conv_linear = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv3')
def forward(self, inputs):
x1, x2 = split(
inputs,
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
axis=1)
x2 = self._conv_pw(x2)
x2 = self._conv_dw(x2)
x2 = self._conv_linear(x2)
out = concat([x1, x2], axis=1)
return channel_shuffle(out, 2)
class InvertedResidualDS(Layer):
def __init__(self, in_channels, out_channels, stride, act="relu",
name=None):
super(InvertedResidualDS, self).__init__()
# branch1
self._conv_dw_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=3,
stride=stride,
padding=1,
groups=in_channels,
act=None,
name='stage_' + name + '_conv4')
self._conv_linear_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv5')
# branch2
self._conv_pw_2 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv1')
self._conv_dw_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=out_channels // 2,
act=None,
name='stage_' + name + '_conv2')
self._conv_linear_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv3')
def forward(self, inputs):
x1 = self._conv_dw_1(inputs)
x1 = self._conv_linear_1(x1)
x2 = self._conv_pw_2(inputs)
x2 = self._conv_dw_2(x2)
x2 = self._conv_linear_2(x2)
out = concat([x1, x2], axis=1)
return channel_shuffle(out, 2)
class ShuffleNet(Layer):
def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
super(ShuffleNet, self).__init__()
self.scale = scale
self.pretrained = pretrained
stage_repeats = [4, 8, 4]
if scale == 0.25:
stage_out_channels = [-1, 24, 24, 48, 96, 512]
elif scale == 0.33:
stage_out_channels = [-1, 24, 32, 64, 128, 512]
elif scale == 0.5:
stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif scale == 1.0:
stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise NotImplementedError("This scale size:[" + str(scale) +
"] is not implemented!")
self.out_index = [3, 11, 15]
self.feat_channels = stage_out_channels[1:5]
# 1. conv1
self._conv1 = ConvBNLayer(
in_channels=in_channels,
out_channels=stage_out_channels[1],
kernel_size=3,
stride=2,
padding=1,
act=act,
name='stage1_conv')
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
# 2. bottleneck sequences
self._block_list = []
for stage_id, num_repeat in enumerate(stage_repeats):
for i in range(num_repeat):
if i == 0:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidualDS(
in_channels=stage_out_channels[stage_id + 1],
out_channels=stage_out_channels[stage_id + 2],
stride=2,
act=act,
name=str(stage_id + 2) + '_' + str(i + 1)))
else:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidual(
in_channels=stage_out_channels[stage_id + 2],
out_channels=stage_out_channels[stage_id + 2],
stride=1,
act=act,
name=str(stage_id + 2) + '_' + str(i + 1)))
self._block_list.append(block)
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, inputs):
feat_list = []
y = self._conv1(inputs)
y = self._max_pool(y)
feat_list.append(y)
for idx, inv in enumerate(self._block_list):
y = inv(y)
if idx in self.out_index:
feat_list.append(y)
return feat_list
@manager.BACKBONES.add_component
def ShuffleNetV2_x0_25(**kwargs):
model = ShuffleNet(scale=0.25, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x0_33(**kwargs):
model = ShuffleNet(scale=0.33, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x0_5(**kwargs):
model = ShuffleNet(scale=0.5, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x1_0(**kwargs):
model = ShuffleNet(scale=1.0, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x1_5(**kwargs):
model = ShuffleNet(scale=1.5, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x2_0(**kwargs):
model = ShuffleNet(scale=2.0, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_swish(**kwargs):
model = ShuffleNet(scale=1.0, act="swish", **kwargs)
return model

@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
block_num(int,optional): block_num of features block. Default: 4.
type(str,optional): feature fusion method "cat"/"add". Default: "cat".
num_classes(int, optional): class number for image classification. Default: 1000.
dropout(float,optional): dropout ratio. if >0,use dropout ratio. Default: 0.20.
use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective
learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained(str, optional): the path of pretrained model.
"""
@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
layers=[4, 5, 3],
block_num=4,
type="cat",
num_classes=1000,
dropout=0.20,
use_conv_last=False,
relative_lr=1.0,
in_channels=3,
pretrained=None):
super(STDCNet, self).__init__()
if type == "cat":
block = CatBottleneck
elif type == "add":
block = AddBottleneck
self.use_conv_last = use_conv_last
self.features = self._make_layers(base, layers, block_num, block)
self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
if (layers == [4, 5, 3]): #stdc1446
self.x2 = nn.Sequential(self.features[:1])
self.x4 = nn.Sequential(self.features[1:2])
self.x8 = nn.Sequential(self.features[2:6])
self.x16 = nn.Sequential(self.features[6:11])
self.x32 = nn.Sequential(self.features[11:])
elif (layers == [2, 2, 2]): #stdc813
self.x2 = nn.Sequential(self.features[:1])
self.x4 = nn.Sequential(self.features[1:2])
self.x8 = nn.Sequential(self.features[2:4])
self.x16 = nn.Sequential(self.features[4:6])
self.x32 = nn.Sequential(self.features[6:])
else:
raise NotImplementedError(
"model with layers:{} is not implemented!".format(layers))
self.layers = layers
self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
self.features = self._make_layers(in_channels, base, layers, block_num,
block, relative_lr)
self.pretrained = pretrained
self.init_weight()
@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
"""
forward function for feature extract.
"""
feat2 = self.x2(x)
feat4 = self.x4(feat2)
feat8 = self.x8(feat4)
feat16 = self.x16(feat8)
feat32 = self.x32(feat16)
if self.use_conv_last:
feat32 = self.conv_last(feat32)
return feat2, feat4, feat8, feat16, feat32
def _make_layers(self, base, layers, block_num, block):
out_feats = []
x = self.features[0](x)
out_feats.append(x)
x = self.features[1](x)
out_feats.append(x)
idx = [[2, 2 + self.layers[0]],
[2 + self.layers[0], 2 + sum(self.layers[0:2])],
[2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
for start_idx, end_idx in idx:
for i in range(start_idx, end_idx):
x = self.features[i](x)
out_feats.append(x)
return out_feats
def _make_layers(self, in_channels, base, layers, block_num, block,
relative_lr):
features = []
features += [ConvBNRelu(3, base // 2, 3, 2)]
features += [ConvBNRelu(base // 2, base, 3, 2)]
features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]
for i, layer in enumerate(layers):
for j in range(layer):
if i == 0 and j == 0:
features.append(block(base, base * 4, block_num, 2))
features.append(
block(base, base * 4, block_num, 2, relative_lr))
elif j == 0:
features.append(
block(base * int(math.pow(2, i + 1)), base * int(
math.pow(2, i + 2)), block_num, 2))
math.pow(2, i + 2)), block_num, 2, relative_lr))
else:
features.append(
block(base * int(math.pow(2, i + 2)), base * int(
math.pow(2, i + 2)), block_num, 1))
math.pow(2, i + 2)), block_num, 1, relative_lr))
return nn.Sequential(*features)
@ -125,16 +119,24 @@ class STDCNet(nn.Layer):
class ConvBNRelu(nn.Layer):
def __init__(self, in_planes, out_planes, kernel=3, stride=1):
def __init__(self,
in_planes,
out_planes,
kernel=3,
stride=1,
relative_lr=1.0):
super(ConvBNRelu, self).__init__()
param_attr = paddle.ParamAttr(learning_rate=relative_lr)
self.conv = nn.Conv2D(
in_planes,
out_planes,
kernel_size=kernel,
stride=stride,
padding=kernel // 2,
weight_attr=param_attr,
bias_attr=False)
self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
self.bn = nn.BatchNorm2D(
out_planes, weight_attr=param_attr, bias_attr=param_attr)
self.relu = nn.ReLU()
def forward(self, x):
@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):
class AddBottleneck(nn.Layer):
def __init__(self, in_planes, out_planes, block_num=3, stride=1):
def __init__(self,
in_planes,
out_planes,
block_num=3,
stride=1,
relative_lr=1.0):
super(AddBottleneck, self).__init__()
assert block_num > 1, "block number should be larger than 1."
self.conv_list = nn.LayerList()
self.stride = stride
param_attr = paddle.ParamAttr(learning_rate=relative_lr)
if stride == 2:
self.avd_layer = nn.Sequential(
nn.Conv2D(
@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
stride=2,
padding=1,
groups=out_planes // 2,
weight_attr=param_attr,
bias_attr=False),
nn.BatchNorm2D(out_planes // 2), )
nn.BatchNorm2D(
out_planes // 2,
weight_attr=param_attr,
bias_attr=param_attr), )
self.skip = nn.Sequential(
nn.Conv2D(
in_planes,
@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
stride=2,
padding=1,
groups=in_planes,
weight_attr=param_attr,
bias_attr=False),
nn.BatchNorm2D(in_planes),
nn.BatchNorm2D(
in_planes, weight_attr=param_attr, bias_attr=param_attr),
nn.Conv2D(
in_planes, out_planes, kernel_size=1, bias_attr=False),
nn.BatchNorm2D(out_planes), )
in_planes,
out_planes,
kernel_size=1,
bias_attr=False,
weight_attr=param_attr),
nn.BatchNorm2D(
out_planes, weight_attr=param_attr, bias_attr=param_attr), )
stride = 1
for idx in range(block_num):
if idx == 0:
self.conv_list.append(
ConvBNRelu(
in_planes, out_planes // 2, kernel=1))
in_planes,
out_planes // 2,
kernel=1,
relative_lr=relative_lr))
elif idx == 1 and block_num == 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 2, stride=stride))
out_planes // 2,
out_planes // 2,
stride=stride,
relative_lr=relative_lr))
elif idx == 1 and block_num > 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 4, stride=stride))
out_planes // 2,
out_planes // 4,
stride=stride,
relative_lr=relative_lr))
elif idx < block_num - 1:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx + 1))))
ConvBNRelu(
out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx + 1)),
relative_lr=relative_lr))
else:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx))))
ConvBNRelu(out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx))),
relative_lr=relative_lr)
def forward(self, x):
out_list = []
@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):
class CatBottleneck(nn.Layer):
def __init__(self, in_planes, out_planes, block_num=3, stride=1):
def __init__(self,
in_planes,
out_planes,
block_num=3,
stride=1,
relative_lr=1.0):
super(CatBottleneck, self).__init__()
assert block_num > 1, "block number should be larger than 1."
self.conv_list = nn.LayerList()
self.stride = stride
param_attr = paddle.ParamAttr(learning_rate=relative_lr)
if stride == 2:
self.avd_layer = nn.Sequential(
nn.Conv2D(
@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
stride=2,
padding=1,
groups=out_planes // 2,
weight_attr=param_attr,
bias_attr=False),
nn.BatchNorm2D(out_planes // 2), )
nn.BatchNorm2D(
out_planes // 2,
weight_attr=param_attr,
bias_attr=param_attr), )
self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
stride = 1
@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
if idx == 0:
self.conv_list.append(
ConvBNRelu(
in_planes, out_planes // 2, kernel=1))
in_planes,
out_planes // 2,
kernel=1,
relative_lr=relative_lr))
elif idx == 1 and block_num == 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 2, stride=stride))
out_planes // 2,
out_planes // 2,
stride=stride,
relative_lr=relative_lr))
elif idx == 1 and block_num > 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 4, stride=stride))
out_planes // 2,
out_planes // 4,
stride=stride,
relative_lr=relative_lr))
elif idx < block_num - 1:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx + 1))))
ConvBNRelu(
out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx + 1)),
relative_lr=relative_lr))
else:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx))))
ConvBNRelu(
out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx)),
relative_lr=relative_lr))
def forward(self, x):
out_list = []

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
Args:
pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
patch_size (int | tuple(int)): Patch size. Default: 4.
in_chans (int): Number of input image channels. Default: 3.
in_channels (int): Number of input image channels. Default: 3.
embed_dim (int): Number of linear projection output channels. Default: 96.
depths (tuple[int]): Depths of each Swin Transformer stage.
num_heads (tuple[int]): Number of attention head of each stage.
@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
def __init__(self,
pretrain_img_size=224,
patch_size=4,
in_chans=3,
in_channels=3,
embed_dim=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
# split image into non-overlapping patches
self.patch_embed = PatchEmbed(
patch_size=patch_size,
in_chans=in_chans,
in_chans=in_channels,
embed_dim=embed_dim,
norm_layer=norm_layer if self.patch_norm else None)

@ -0,0 +1,716 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
"""
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath
__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
def make_divisible(val, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * val:
new_v += divisor
return new_v
class HSigmoid(nn.Layer):
def __init__(self, inplace=True):
super().__init__()
self.relu = nn.ReLU6()
def forward(self, x):
return self.relu(x + 3) / 6
class Conv2DBN(nn.Layer):
def __init__(self,
in_channels,
out_channels,
ks=1,
stride=1,
pad=0,
dilation=1,
groups=1,
bn_weight_init=1,
lr_mult=1.0):
super().__init__()
conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
self.c = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=ks,
stride=stride,
padding=pad,
dilation=dilation,
groups=groups,
weight_attr=conv_weight_attr,
bias_attr=False)
bn_weight_attr = paddle.ParamAttr(
initializer=nn.initializer.Constant(bn_weight_init),
learning_rate=lr_mult)
bn_bias_attr = paddle.ParamAttr(
initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
self.bn = nn.BatchNorm2D(
out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
def forward(self, inputs):
out = self.c(inputs)
out = self.bn(out)
return out
class ConvBNAct(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1,
norm=nn.BatchNorm2D,
act=None,
bias_attr=False,
lr_mult=1.0):
super(ConvBNAct, self).__init__()
param_attr = paddle.ParamAttr(learning_rate=lr_mult)
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=param_attr,
bias_attr=param_attr if bias_attr else False)
self.act = act() if act is not None else Identity()
self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
if norm is not None else Identity()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.act(x)
return x
class MLP(nn.Layer):
def __init__(self,
in_features,
hidden_features=None,
out_features=None,
act_layer=nn.ReLU,
drop=0.,
lr_mult=1.0):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
param_attr = paddle.ParamAttr(learning_rate=lr_mult)
self.dwconv = nn.Conv2D(
hidden_features,
hidden_features,
3,
1,
1,
groups=hidden_features,
weight_attr=param_attr,
bias_attr=param_attr)
self.act = act_layer()
self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.dwconv(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class InvertedResidual(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
expand_ratio,
activations=None,
lr_mult=1.0):
super(InvertedResidual, self).__init__()
assert stride in [1, 2], "The stride should be 1 or 2."
if activations is None:
activations = nn.ReLU
hidden_dim = int(round(in_channels * expand_ratio))
self.use_res_connect = stride == 1 and in_channels == out_channels
layers = []
if expand_ratio != 1:
layers.append(
Conv2DBN(
in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
layers.append(activations())
layers.extend([
Conv2DBN(
hidden_dim,
hidden_dim,
ks=kernel_size,
stride=stride,
pad=kernel_size // 2,
groups=hidden_dim,
lr_mult=lr_mult), activations(), Conv2DBN(
hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
])
self.conv = nn.Sequential(*layers)
self.out_channels = out_channels
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class TokenPyramidModule(nn.Layer):
def __init__(self,
cfgs,
out_indices,
in_channels=3,
inp_channel=16,
activation=nn.ReLU,
width_mult=1.,
lr_mult=1.):
super().__init__()
self.out_indices = out_indices
self.stem = nn.Sequential(
Conv2DBN(
in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
activation())
self.layers = []
for i, (k, t, c, s) in enumerate(cfgs):
output_channel = make_divisible(c * width_mult, 8)
exp_size = t * inp_channel
exp_size = make_divisible(exp_size * width_mult, 8)
layer_name = 'layer{}'.format(i + 1)
layer = InvertedResidual(
inp_channel,
output_channel,
kernel_size=k,
stride=s,
expand_ratio=t,
activations=activation,
lr_mult=lr_mult)
self.add_sublayer(layer_name, layer)
self.layers.append(layer_name)
inp_channel = output_channel
def forward(self, x):
outs = []
x = self.stem(x)
for i, layer_name in enumerate(self.layers):
layer = getattr(self, layer_name)
x = layer(x)
if i in self.out_indices:
outs.append(x)
return outs
class Attention(nn.Layer):
def __init__(self,
dim,
key_dim,
num_heads,
attn_ratio=4,
activation=None,
lr_mult=1.0):
super().__init__()
self.num_heads = num_heads
self.scale = key_dim**-0.5
self.key_dim = key_dim
self.nh_kd = nh_kd = key_dim * num_heads
self.d = int(attn_ratio * key_dim)
self.dh = int(attn_ratio * key_dim) * num_heads
self.attn_ratio = attn_ratio
self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
self.proj = nn.Sequential(
activation(),
Conv2DBN(
self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
def forward(self, x):
x_shape = paddle.shape(x)
H, W = x_shape[2], x_shape[3]
qq = self.to_q(x).reshape(
[0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
[0, 1, 3, 2])
attn = paddle.matmul(qq, kk)
attn = F.softmax(attn, axis=-1)
xx = paddle.matmul(attn, vv)
xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
xx = self.proj(xx)
return xx
class Block(nn.Layer):
def __init__(self,
dim,
key_dim,
num_heads,
mlp_ratios=4.,
attn_ratio=2.,
drop=0.,
drop_path=0.,
act_layer=nn.ReLU,
lr_mult=1.0):
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.mlp_ratios = mlp_ratios
self.attn = Attention(
dim,
key_dim=key_dim,
num_heads=num_heads,
attn_ratio=attn_ratio,
activation=act_layer,
lr_mult=lr_mult)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
mlp_hidden_dim = int(dim * mlp_ratios)
self.mlp = MLP(in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
lr_mult=lr_mult)
def forward(self, x):
h = x
x = self.attn(x)
x = self.drop_path(x)
x = h + x
h = x
x = self.mlp(x)
x = self.drop_path(x)
x = x + h
return x
class BasicLayer(nn.Layer):
def __init__(self,
block_num,
embedding_dim,
key_dim,
num_heads,
mlp_ratios=4.,
attn_ratio=2.,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=None,
lr_mult=1.0):
super().__init__()
self.block_num = block_num
self.transformer_blocks = nn.LayerList()
for i in range(self.block_num):
self.transformer_blocks.append(
Block(
embedding_dim,
key_dim=key_dim,
num_heads=num_heads,
mlp_ratios=mlp_ratios,
attn_ratio=attn_ratio,
drop=drop,
drop_path=drop_path[i]
if isinstance(drop_path, list) else drop_path,
act_layer=act_layer,
lr_mult=lr_mult))
def forward(self, x):
# token * N
for i in range(self.block_num):
x = self.transformer_blocks[i](x)
return x
class PyramidPoolAgg(nn.Layer):
def __init__(self, stride):
super().__init__()
self.stride = stride
self.tmp = Identity() # avoid the error of paddle.flops
def forward(self, inputs):
'''
# The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
# so exporting the inference model will raise error.
_, _, H, W = inputs[-1].shape
H = (H - 1) // self.stride + 1
W = (W - 1) // self.stride + 1
return paddle.concat(
[F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
'''
out = []
ks = 2**len(inputs)
stride = self.stride**len(inputs)
for x in inputs:
x = F.avg_pool2d(x, int(ks), int(stride))
ks /= 2
stride /= 2
out.append(x)
out = paddle.concat(out, axis=1)
return out
class InjectionMultiSum(nn.Layer):
def __init__(self, in_channels, out_channels, activations=None,
lr_mult=1.0):
super(InjectionMultiSum, self).__init__()
self.local_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
self.global_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
self.global_act = ConvBNAct(
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
self.act = HSigmoid()
def forward(self, x_low, x_global):
xl_hw = paddle.shape(x_low)[2:]
local_feat = self.local_embedding(x_low)
global_act = self.global_act(x_global)
sig_act = F.interpolate(
self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
global_feat = self.global_embedding(x_global)
global_feat = F.interpolate(
global_feat, xl_hw, mode='bilinear', align_corners=False)
out = local_feat * sig_act + global_feat
return out
class InjectionMultiSumCBR(nn.Layer):
def __init__(self, in_channels, out_channels, activations=None):
'''
local_embedding: conv-bn-relu
global_embedding: conv-bn-relu
global_act: conv
'''
super(InjectionMultiSumCBR, self).__init__()
self.local_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1)
self.global_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1)
self.global_act = ConvBNAct(
in_channels, out_channels, kernel_size=1, norm=None, act=None)
self.act = HSigmoid()
def forward(self, x_low, x_global):
xl_hw = paddle.shape(x)[2:]
local_feat = self.local_embedding(x_low)
# kernel
global_act = self.global_act(x_global)
global_act = F.interpolate(
self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
# feat_h
global_feat = self.global_embedding(x_global)
global_feat = F.interpolate(
global_feat, xl_hw, mode='bilinear', align_corners=False)
out = local_feat * global_act + global_feat
return out
class FuseBlockSum(nn.Layer):
def __init__(self, in_channels, out_channels, activations=None):
super(FuseBlockSum, self).__init__()
self.fuse1 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
self.fuse2 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
def forward(self, x_low, x_high):
xl_hw = paddle.shape(x)[2:]
inp = self.fuse1(x_low)
kernel = self.fuse2(x_high)
feat_h = F.interpolate(
kernel, xl_hw, mode='bilinear', align_corners=False)
out = inp + feat_h
return out
class FuseBlockMulti(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
stride=1,
activations=None, ):
super(FuseBlockMulti, self).__init__()
assert stride in [1, 2], "The stride should be 1 or 2."
self.fuse1 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
self.fuse2 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
self.act = HSigmoid()
def forward(self, x_low, x_high):
xl_hw = paddle.shape(x)[2:]
inp = self.fuse1(x_low)
sig_act = self.fuse2(x_high)
sig_act = F.interpolate(
self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
out = inp * sig_act
return out
SIM_BLOCK = {
"fuse_sum": FuseBlockSum,
"fuse_multi": FuseBlockMulti,
"multi_sum": InjectionMultiSum,
"multi_sum_cbr": InjectionMultiSumCBR,
}
class TopTransformer(nn.Layer):
def __init__(self,
cfgs,
injection_out_channels,
encoder_out_indices,
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=8,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="muli_sum",
injection=True,
lr_mult=1.0,
in_channels=3,
pretrained=None):
super().__init__()
self.feat_channels = [
c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
]
self.injection_out_channels = injection_out_channels
self.injection = injection
self.embed_dim = sum(self.feat_channels)
self.trans_out_indices = trans_out_indices
self.tpm = TokenPyramidModule(
cfgs=cfgs,
out_indices=encoder_out_indices,
in_channels=in_channels,
lr_mult=lr_mult)
self.ppa = PyramidPoolAgg(stride=c2t_stride)
dpr = [x.item() for x in \
paddle.linspace(0, drop_path_rate, depths)]
self.trans = BasicLayer(
block_num=depths,
embedding_dim=self.embed_dim,
key_dim=key_dim,
num_heads=num_heads,
mlp_ratios=mlp_ratios,
attn_ratio=attn_ratios,
drop=0,
attn_drop=0,
drop_path=dpr,
act_layer=act_layer,
lr_mult=lr_mult)
self.SIM = nn.LayerList()
inj_module = SIM_BLOCK[injection_type]
if self.injection:
for i in range(len(self.feat_channels)):
if i in trans_out_indices:
self.SIM.append(
inj_module(
self.feat_channels[i],
injection_out_channels[i],
activations=act_layer,
lr_mult=lr_mult))
else:
self.SIM.append(Identity())
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, x):
ouputs = self.tpm(x)
out = self.ppa(ouputs)
out = self.trans(out)
if self.injection:
xx = out.split(self.feat_channels, axis=1)
results = []
for i in range(len(self.feat_channels)):
if i in self.trans_out_indices:
local_tokens = ouputs[i]
global_semantics = xx[i]
out_ = self.SIM[i](local_tokens, global_semantics)
results.append(out_)
return results
else:
ouputs.append(out)
return ouputs
@manager.BACKBONES.add_component
def TopTransformer_Base(**kwargs):
cfgs = [
# k, t, c, s
[3, 1, 16, 1], # 1/2
[3, 4, 32, 2], # 1/4 1
[3, 3, 32, 1], #
[5, 3, 64, 2], # 1/8 3
[5, 3, 64, 1], #
[3, 3, 128, 2], # 1/16 5
[3, 3, 128, 1], #
[5, 6, 160, 2], # 1/32 7
[5, 6, 160, 1], #
[3, 6, 160, 1], #
]
model = TopTransformer(
cfgs=cfgs,
injection_out_channels=[None, 256, 256, 256],
encoder_out_indices=[2, 4, 6, 9],
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=8,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="multi_sum",
injection=True,
**kwargs)
return model
@manager.BACKBONES.add_component
def TopTransformer_Small(**kwargs):
cfgs = [
# k, t, c, s
[3, 1, 16, 1], # 1/2
[3, 4, 24, 2], # 1/4 1
[3, 3, 24, 1], #
[5, 3, 48, 2], # 1/8 3
[5, 3, 48, 1], #
[3, 3, 96, 2], # 1/16 5
[3, 3, 96, 1], #
[5, 6, 128, 2], # 1/32 7
[5, 6, 128, 1], #
[3, 6, 128, 1], #
]
model = TopTransformer(
cfgs=cfgs,
injection_out_channels=[None, 192, 192, 192],
encoder_out_indices=[2, 4, 6, 9],
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=6,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="multi_sum",
injection=True,
**kwargs)
return model
@manager.BACKBONES.add_component
def TopTransformer_Tiny(**kwargs):
cfgs = [
# k, t, c, s
[3, 1, 16, 1], # 1/2
[3, 4, 16, 2], # 1/4 1
[3, 3, 16, 1], #
[5, 3, 32, 2], # 1/8 3
[5, 3, 32, 1], #
[3, 3, 64, 2], # 1/16 5
[3, 3, 64, 1], #
[5, 6, 96, 2], # 1/32 7
[5, 6, 96, 1], #
]
model = TopTransformer(
cfgs=cfgs,
injection_out_channels=[None, 128, 128, 128],
encoder_out_indices=[2, 4, 6, 8],
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=4,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="multi_sum",
injection=True,
**kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
return x
keep_prob = paddle.to_tensor(1 - drop_prob)
shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
random_tensor = paddle.floor(random_tensor) # binarize
output = x.divide(keep_prob) * random_tensor
return output

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
in_channels=3,
embed_dim=768,
depth=12,
num_heads=12,
@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
self.patch_embed = PatchEmbed(
img_size=img_size,
patch_size=patch_size,
in_chans=in_chans,
in_chans=in_channels,
embed_dim=embed_dim)
self.pos_w = self.patch_embed.num_patches_in_w
self.pos_h = self.patch_embed.num_patches_in_h

@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):
Args:
backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path of pretrained model.
output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
"""
def __init__(self, backbone, pretrained=None, output_stride=16):
def __init__(self,
backbone,
in_channels=3,
pretrained=None,
output_stride=16):
super(XceptionDeeplab, self).__init__()
@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
self.feat_channels = [128, 2048]
self._conv1 = ConvBNLayer(
3,
in_channels,
32,
3,
stride=2,

@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
num_classes,
lambd=0.25,
align_corners=False,
in_channels=3,
pretrained=None):
super().__init__()
@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
sb_channels = (C1, C3, C4, C5)
mid_channels = 128
self.db = DetailBranch(db_channels)
self.sb = SemanticBranch(sb_channels)
self.db = DetailBranch(in_channels, db_channels)
self.sb = SemanticBranch(in_channels, sb_channels)
self.bga = BGA(mid_channels, align_corners)
self.aux_head1 = SegHead(C1, C1, num_classes)
@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
class DetailBranch(nn.Layer):
"""The detail branch of BiSeNet, which has wide channels but shallow layers."""
def __init__(self, in_channels):
def __init__(self, in_channels, feature_channels):
super().__init__()
C1, C2, C3 = in_channels
C1, C2, C3 = feature_channels
self.convs = nn.Sequential(
# stage 1
layers.ConvBNReLU(
3, C1, 3, stride=2),
in_channels, C1, 3, stride=2),
layers.ConvBNReLU(C1, C1, 3),
# stage 2
layers.ConvBNReLU(
@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
class SemanticBranch(nn.Layer):
"""The semantic branch of BiSeNet, which has narrow channels but deep layers."""
def __init__(self, in_channels):
def __init__(self, in_channels, feature_channels):
super().__init__()
C1, C3, C4, C5 = in_channels
C1, C3, C4, C5 = feature_channels
self.stem = StemBlock(3, C1)
self.stem = StemBlock(in_channels, C1)
self.stage3 = nn.Sequential(
GatherAndExpansionLayer2(C1, C3, 6),

@ -0,0 +1,174 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
@manager.MODELS.add_component
class CCNet(nn.Layer):
"""
The CCNet implementation based on PaddlePaddle.
The original article refers to
Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
(https://arxiv.org/abs/1811.11721)
Args:
num_classes (int): The unique number of target classes.
backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
dropout_prob (float, optional): The probability of dropout. Default: 0.0.
recurrence (int, optional): The number of recurrent operations. Defautl: 1.
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=(2, 3),
enable_auxiliary_loss=True,
dropout_prob=0.0,
recurrence=1,
align_corners=False,
pretrained=None):
super().__init__()
self.enable_auxiliary_loss = enable_auxiliary_loss
self.recurrence = recurrence
self.align_corners = align_corners
self.backbone = backbone
self.backbone_indices = backbone_indices
backbone_channels = [
backbone.feat_channels[i] for i in backbone_indices
]
if enable_auxiliary_loss:
self.aux_head = layers.AuxLayer(
backbone_channels[0],
512,
num_classes,
dropout_prob=dropout_prob)
self.head = RCCAModule(
backbone_channels[1],
512,
num_classes,
dropout_prob=dropout_prob,
recurrence=recurrence)
self.pretrained = pretrained
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, x):
feat_list = self.backbone(x)
logit_list = []
output = self.head(feat_list[self.backbone_indices[-1]])
logit_list.append(output)
if self.training and self.enable_auxiliary_loss:
aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
logit_list.append(aux_out)
return [
F.interpolate(
logit,
paddle.shape(x)[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
class RCCAModule(nn.Layer):
def __init__(self,
in_channels,
out_channels,
num_classes,
dropout_prob=0.1,
recurrence=1):
super().__init__()
inter_channels = in_channels // 4
self.recurrence = recurrence
self.conva = layers.ConvBNLeakyReLU(
in_channels, inter_channels, 3, padding=1, bias_attr=False)
self.cca = CrissCrossAttention(inter_channels)
self.convb = layers.ConvBNLeakyReLU(
inter_channels, inter_channels, 3, padding=1, bias_attr=False)
self.out = layers.AuxLayer(
in_channels + inter_channels,
out_channels,
num_classes,
dropout_prob=dropout_prob)
def forward(self, x):
feat = self.conva(x)
for i in range(self.recurrence):
feat = self.cca(feat)
feat = self.convb(feat)
output = self.out(paddle.concat([x, feat], axis=1))
return output
class CrissCrossAttention(nn.Layer):
def __init__(self, in_channels):
super().__init__()
self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
self.softmax = nn.Softmax(axis=3)
self.gamma = self.create_parameter(
shape=(1, ), default_initializer=nn.initializer.Constant(0))
self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
def forward(self, x):
b, c, h, w = paddle.shape(x)
proj_q = self.q_conv(x)
proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
[b * w, -1, h]).transpose([0, 2, 1])
proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
[b * h, -1, w]).transpose([0, 2, 1])
proj_k = self.k_conv(x)
proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
proj_v = self.v_conv(x)
proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
[b, w, h, h]).transpose([0, 2, 1, 3])
energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
[b * w, h, h])
attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
[b, w, -1, h]).transpose([0, 2, 3, 1])
out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
[b, h, -1, w]).transpose([0, 2, 1, 3])
return self.gamma * (out_h + out_w) + x
def Inf(self, B, H, W):
return -paddle.tile(
paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
[B * W, 1, 1])

@ -0,0 +1,403 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
class DualResNet(nn.Layer):
"""
The DDRNet implementation based on PaddlePaddle.
The original article refers to
Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
(https://arxiv.org/abs/2101.06085)
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): Number of input channels. Default: 3.
block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
planes (int): Base channels in network. Default: 64.
spp_planes (int): Branch channels for DAPPM. Default: 128.
head_planes (int): Mid channels of segmentation head. Default: 128.
enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
in_channels=3,
block_layers=[2, 2, 2, 2],
planes=64,
spp_planes=128,
head_planes=128,
enable_auxiliary_loss=False,
pretrained=None):
super().__init__()
highres_planes = planes * 2
self.enable_auxiliary_loss = enable_auxiliary_loss
self.conv1 = nn.Sequential(
layers.ConvBNReLU(
in_channels, planes, kernel_size=3, stride=2, padding=1),
layers.ConvBNReLU(
planes, planes, kernel_size=3, stride=2, padding=1), )
self.relu = nn.ReLU()
self.layer1 = self._make_layers(BasicBlock, planes, planes,
block_layers[0])
self.layer2 = self._make_layers(
BasicBlock, planes, planes * 2, block_layers[1], stride=2)
self.layer3 = self._make_layers(
BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
self.layer4 = self._make_layers(
BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
self.compression3 = layers.ConvBN(
planes * 4, highres_planes, kernel_size=1, bias_attr=False)
self.compression4 = layers.ConvBN(
planes * 8, highres_planes, kernel_size=1, bias_attr=False)
self.down3 = layers.ConvBN(
highres_planes,
planes * 4,
kernel_size=3,
stride=2,
bias_attr=False)
self.down4 = nn.Sequential(
layers.ConvBNReLU(
highres_planes,
planes * 4,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False),
layers.ConvBN(
planes * 4,
planes * 8,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False))
self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
2)
self.layer4_ = self._make_layers(BasicBlock, highres_planes,
highres_planes, 2)
self.layer5_ = self._make_layers(Bottleneck, highres_planes,
highres_planes, 1)
self.layer5 = self._make_layers(
Bottleneck, planes * 8, planes * 8, 1, stride=2)
self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
if self.enable_auxiliary_loss:
self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
self.head = DDRNetHead(planes * 4, head_planes, num_classes)
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
else:
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
param_init.kaiming_normal_init(m.weight)
elif isinstance(m, nn.BatchNorm2D):
param_init.constant_init(m.weight, value=1)
param_init.constant_init(m.bias, value=0)
def _make_layers(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2D(
inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias_attr=False),
nn.BatchNorm2D(planes * block.expansion), )
layers = []
layers.append(block(inplanes, planes, stride, downsample))
inplanes = planes * block.expansion
for i in range(1, blocks):
if i == (blocks - 1):
layers.append(block(inplanes, planes, stride=1, no_relu=True))
else:
layers.append(block(inplanes, planes, stride=1, no_relu=False))
return nn.Sequential(*layers)
def forward(self, x):
n, c, h, w = paddle.shape(x)
width_output = w // 8
height_output = h // 8
x = self.conv1(x)
stage1_out = self.layer1(x)
stage2_out = self.layer2(self.relu(stage1_out))
stage3_out = self.layer3(self.relu(stage2_out))
stage3_out_dual = self.layer3_(self.relu(stage2_out))
x = stage3_out + self.down3(self.relu(stage3_out_dual))
stage3_merge = stage3_out_dual + F.interpolate(
self.compression3(self.relu(stage3_out)),
size=[height_output, width_output],
mode='bilinear')
stage4_out = self.layer4(self.relu(x))
stage4_out_dual = self.layer4_(self.relu(stage3_merge))
x = stage4_out + self.down4(self.relu(stage4_out_dual))
stage4_merge = stage4_out_dual + F.interpolate(
self.compression4(self.relu(stage4_out)),
size=[height_output, width_output],
mode='bilinear')
stage5_out_dual = self.layer5_(self.relu(stage4_merge))
x = F.interpolate(
self.spp(self.layer5(self.relu(x))),
size=[height_output, width_output],
mode='bilinear')
output = self.head(x + stage5_out_dual)
logit_list = []
logit_list.append(output)
if self.enable_auxiliary_loss:
aux_out = self.aux_head(stage3_merge)
logit_list.append(aux_out)
return [
F.interpolate(
logit, [h, w], mode='bilinear') for logit in logit_list
]
class BasicBlock(nn.Layer):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
no_relu=False):
super().__init__()
self.conv_bn_relu = layers.ConvBNReLU(
inplanes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.relu = nn.ReLU()
self.conv_bn = layers.ConvBN(
planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
self.downsample = downsample
self.stride = stride
self.no_relu = no_relu
def forward(self, x):
residual = x
out = self.conv_bn_relu(x)
out = self.conv_bn(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
if self.no_relu:
return out
else:
return self.relu(out)
class Bottleneck(nn.Layer):
expansion = 2
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
no_relu=True):
super().__init__()
self.conv_bn_relu1 = layers.ConvBNReLU(
inplanes, planes, kernel_size=1, bias_attr=False)
self.conv_bn_relu2 = layers.ConvBNReLU(
planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.conv_bn = layers.ConvBN(
planes, planes * self.expansion, kernel_size=1, bias_attr=False)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
self.no_relu = no_relu
def forward(self, x):
residual = x
out = self.conv_bn_relu1(x)
out = self.conv_bn_relu2(out)
out = self.conv_bn(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
if self.no_relu:
return out
else:
return self.relu(out)
class DAPPM(nn.Layer):
def __init__(self, inplanes, branch_planes, outplanes):
super().__init__()
self.scale1 = nn.Sequential(
nn.AvgPool2D(
kernel_size=5, stride=2, padding=2),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale2 = nn.Sequential(
nn.AvgPool2D(
kernel_size=9, stride=4, padding=4),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale3 = nn.Sequential(
nn.AvgPool2D(
kernel_size=17, stride=8, padding=8),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale4 = nn.Sequential(
nn.AdaptiveAvgPool2D((1, 1)),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale0 = nn.Sequential(
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.process1 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.process2 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.process3 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.process4 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.compression = nn.Sequential(
layers.SyncBatchNorm(branch_planes * 5),
nn.ReLU(),
nn.Conv2D(
branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
self.shortcut = nn.Sequential(
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, outplanes, kernel_size=1, bias_attr=False))
def forward(self, x):
n, c, h, w = paddle.shape(x)
x0 = self.scale0(x)
x1 = self.process1(
F.interpolate(
self.scale1(x), size=[h, w], mode='bilinear') + x0)
x2 = self.process2(
F.interpolate(
self.scale2(x), size=[h, w], mode='bilinear') + x1)
x3 = self.process3(
F.interpolate(
self.scale3(x), size=[h, w], mode='bilinear') + x2)
x4 = self.process4(
F.interpolate(
self.scale4(x), size=[h, w], mode='bilinear') + x3)
out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
1)) + self.shortcut(x)
return out
class DDRNetHead(nn.Layer):
def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
super().__init__()
self.bn1 = nn.BatchNorm2D(inplanes)
self.relu = nn.ReLU()
self.conv_bn_relu = layers.ConvBNReLU(
inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
self.conv = nn.Conv2D(
interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
self.scale_factor = scale_factor
def forward(self, x):
x = self.bn1(x)
x = self.relu(x)
x = self.conv_bn_relu(x)
out = self.conv(x)
if self.scale_factor is not None:
out = F.interpolate(
out, scale_factor=self.scale_factor, mode='bilinear')
return out
@manager.MODELS.add_component
def DDRNet_23(**kwargs):
return DualResNet(
block_layers=[2, 2, 2, 2],
planes=64,
spp_planes=128,
head_planes=128,
**kwargs)

@ -209,7 +209,9 @@ class EMAU(nn.Layer):
mu = F.normalize(mu, axis=1, p=2)
mu = self.mu * (1 - self.momentum) + mu * self.momentum
if paddle.distributed.get_world_size() > 1:
mu = paddle.distributed.all_reduce(mu)
out = paddle.distributed.all_reduce(mu)
if out is not None:
mu = out
mu /= paddle.distributed.get_world_size()
self.mu = mu

@ -34,6 +34,7 @@ class ENet(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
function; otherwise, PReLU is used. Default: False.
@ -43,13 +44,14 @@ class ENet(nn.Layer):
def __init__(self,
num_classes,
in_channels=3,
pretrained=None,
encoder_relu=False,
decoder_relu=True):
super(ENet, self).__init__()
self.numclasses = num_classes
self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)
self.downsample1_0 = DownsamplingBottleneck(
16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)

@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
(https://arxiv.org/pdf/1902.04502.pdf).
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):
def __init__(self,
num_classes,
in_channels=3,
enable_auxiliary_loss=True,
align_corners=False,
pretrained=None):
super().__init__()
self.learning_to_downsample = LearningToDownsample(32, 48, 64)
self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
64)
self.global_feature_extractor = GlobalFeatureExtractor(
in_channels=64,
block_channels=[64, 96, 128],
@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
"""
def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
def __init__(self,
in_channels=3,
dw_channels1=32,
dw_channels2=48,
out_channels=64):
super(LearningToDownsample, self).__init__()
self.conv_bn_relu = layers.ConvBNReLU(
in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
in_channels=in_channels,
out_channels=dw_channels1,
kernel_size=3,
stride=2)
self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
in_channels=dw_channels1,
out_channels=dw_channels2,

@ -92,7 +92,7 @@ class GINet(nn.Layer):
return [
F.interpolate(
logit, (h, w),
logit, [h, w],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]

@ -0,0 +1,198 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
@manager.MODELS.add_component
class GloRe(nn.Layer):
"""
The GloRe implementation based on PaddlePaddle.
The original article refers to:
Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
(https://arxiv.org/pdf/1811.12814.pdf)
Args:
num_classes (int): The unique number of target classes.
backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=(2, 3),
gru_channels=512,
gru_num_state=128,
gru_num_node=64,
enable_auxiliary_loss=True,
align_corners=False,
pretrained=None):
super().__init__()
self.backbone = backbone
backbone_channels = [
backbone.feat_channels[i] for i in backbone_indices
]
self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
gru_channels, gru_num_state, gru_num_node,
enable_auxiliary_loss)
self.align_corners = align_corners
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
feat_list = self.backbone(x)
logit_list = self.head(feat_list)
return [
F.interpolate(
logit,
paddle.shape(x)[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class GloReHead(nn.Layer):
def __init__(self,
num_classes,
backbone_indices,
backbone_channels,
gru_channels=512,
gru_num_state=128,
gru_num_node=64,
enable_auxiliary_loss=True):
super().__init__()
in_channels = backbone_channels[1]
self.conv_bn_relu = layers.ConvBNReLU(
in_channels, gru_channels, 1, bias_attr=False)
self.gru_module = GruModule(
num_input=gru_channels,
num_state=gru_num_state,
num_node=gru_num_node)
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
self.auxlayer = layers.AuxLayer(
in_channels=backbone_channels[0],
inter_channels=backbone_channels[0] // 4,
out_channels=num_classes)
self.backbone_indices = backbone_indices
self.enable_auxiliary_loss = enable_auxiliary_loss
def forward(self, feat_list):
logit_list = []
x = feat_list[self.backbone_indices[1]]
feature = self.conv_bn_relu(x)
gru_output = self.gru_module(feature)
output = self.dropout(gru_output)
logit = self.classifier(output)
logit_list.append(logit)
if self.enable_auxiliary_loss:
low_level_feat = feat_list[self.backbone_indices[0]]
auxiliary_logit = self.auxlayer(low_level_feat)
logit_list.append(auxiliary_logit)
return logit_list
class GCN(nn.Layer):
def __init__(self, num_state, num_node, bias=False):
super(GCN, self).__init__()
self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
self.relu = nn.ReLU()
self.conv2 = nn.Conv1D(
num_state, num_state, kernel_size=1, bias_attr=bias)
def forward(self, x):
h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
h = paddle.transpose(h, perm=(0, 2, 1))
h = h + x
h = self.relu(self.conv2(h))
return h
class GruModule(nn.Layer):
def __init__(self,
num_input=512,
num_state=128,
num_node=64,
normalize=False):
super(GruModule, self).__init__()
self.normalize = normalize
self.num_state = num_state
self.num_node = num_node
self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
self.extend_dim = nn.Conv2D(
self.num_state, num_input, kernel_size=1, bias_attr=False)
self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
def forward(self, input):
n, c, h, w = input.shape
# B, C, H, W
reduction_dim = self.reduction_dim(input)
# B, N, H, W
mat_B = self.projection_mat(input)
# B, C, H*W
reshaped_reduction = paddle.reshape(
reduction_dim, shape=[n, self.num_state, h * w])
# B, N, H*W
reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
# B, N, H*W
reproject = reshaped_B
# B, C, N
node_state_V = paddle.matmul(
reshaped_reduction, paddle.transpose(
reshaped_B, perm=[0, 2, 1]))
if self.normalize:
node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
# B, C, N
gcn_out = self.gcn(node_state_V)
# B, C, H*W
Y = paddle.matmul(gcn_out, reproject)
# B, C, H, W
Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
Y_extend = self.extend_dim(Y)
Y_extend = self.extend_bn(Y_extend)
out = input + Y_extend
return out

@ -31,6 +31,7 @@ class HarDNet(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
@ -43,6 +44,7 @@ class HarDNet(nn.Layer):
def __init__(self,
num_classes,
in_channels=3,
stem_channels=(16, 24, 32, 48),
ch_list=(64, 96, 160, 224, 320),
grmul=1.7,
@ -60,7 +62,7 @@ class HarDNet(nn.Layer):
self.stem = nn.Sequential(
layers.ConvBNReLU(
3, stem_channels[0], kernel_size=3, bias_attr=False),
in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
layers.ConvBNReLU(
stem_channels[0],
stem_channels[1],

@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
from .activation import Activation
from .pyramid_pool import ASPPModule, PPModule
from .attention import AttentionBlock
from .nonlocal2d import NonLocal2D
from .wrap_functions import *
from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten

@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
if self.out_project is not None:
context = self.out_project(context)
return context
class DualAttentionModule(nn.Layer):
"""
Dual attention module.
Args:
in_channels (int): The number of input channels.
out_channels (int): The number of output channels.
"""
def __init__(self, in_channels, out_channels):
super().__init__()
inter_channels = in_channels // 4
self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
self.pam = PAM(inter_channels)
self.cam = CAM(inter_channels)
self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
def forward(self, feats):
channel_feats = self.channel_conv(feats)
channel_feats = self.cam(channel_feats)
channel_feats = self.conv1(channel_feats)
position_feats = self.position_conv(feats)
position_feats = self.pam(position_feats)
position_feats = self.conv2(position_feats)
feats_sum = position_feats + channel_feats
out = self.conv3(feats_sum)
return out
class PAM(nn.Layer):
"""
Position attention module.
Args:
in_channels (int): The number of input channels.
"""
def __init__(self, in_channels):
super().__init__()
mid_channels = in_channels // 8
self.mid_channels = mid_channels
self.in_channels = in_channels
self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
self.gamma = self.create_parameter(
shape=[1],
dtype='float32',
default_initializer=nn.initializer.Constant(0))
def forward(self, x):
x_shape = paddle.shape(x)
# query: n, h * w, c1
query = self.query_conv(x)
query = paddle.reshape(query, (0, self.mid_channels, -1))
query = paddle.transpose(query, (0, 2, 1))
# key: n, c1, h * w
key = self.key_conv(x)
key = paddle.reshape(key, (0, self.mid_channels, -1))
# sim: n, h * w, h * w
sim = paddle.bmm(query, key)
sim = F.softmax(sim, axis=-1)
value = self.value_conv(x)
value = paddle.reshape(value, (0, self.in_channels, -1))
sim = paddle.transpose(sim, (0, 2, 1))
# feat: from (n, c2, h * w) -> (n, c2, h, w)
feat = paddle.bmm(value, sim)
feat = paddle.reshape(feat,
(0, self.in_channels, x_shape[2], x_shape[3]))
out = self.gamma * feat + x
return out
class CAM(nn.Layer):
"""
Channel attention module.
Args:
in_channels (int): The number of input channels.
"""
def __init__(self, channels):
super().__init__()
self.channels = channels
self.gamma = self.create_parameter(
shape=[1],
dtype='float32',
default_initializer=nn.initializer.Constant(0))
def forward(self, x):
x_shape = paddle.shape(x)
# query: n, c, h * w
query = paddle.reshape(x, (0, self.channels, -1))
# key: n, h * w, c
key = paddle.reshape(x, (0, self.channels, -1))
key = paddle.transpose(key, (0, 2, 1))
# sim: n, c, c
sim = paddle.bmm(query, key)
# The danet author claims that this can avoid gradient divergence
sim = paddle.max(sim, axis=-1, keepdim=True).tile(
[1, 1, self.channels]) - sim
sim = F.softmax(sim, axis=-1)
# feat: from (n, c, h * w) to (n, c, h, w)
value = paddle.reshape(x, (0, self.channels, -1))
feat = paddle.bmm(sim, value)
feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
out = self.gamma * feat + x
return out

@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
return x
class ConvBNAct(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
act_type=None,
**kwargs):
super().__init__()
self._conv = nn.Conv2D(
in_channels, out_channels, kernel_size, padding=padding, **kwargs)
if 'data_format' in kwargs:
data_format = kwargs['data_format']
else:
data_format = 'NCHW'
self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
self._act_type = act_type
if act_type is not None:
self._act = layers.Activation(act_type)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
if self._act_type is not None:
x = self._act(x)
return x
class ConvBN(nn.Layer):
def __init__(self,
in_channels,
@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
x = self._batch_norm(x)
x = self._prelu(x)
return x
class ConvBNLeakyReLU(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
**kwargs):
super().__init__()
self._conv = nn.Conv2D(
in_channels, out_channels, kernel_size, padding=padding, **kwargs)
if 'data_format' in kwargs:
data_format = kwargs['data_format']
else:
data_format = 'NCHW'
self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
self._relu = layers.Activation("leakyrelu")
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
x = self._relu(x)
return x

@ -0,0 +1,285 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Constant
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper
class UAFM(nn.Layer):
"""
The base of Unified Attention Fusion Module.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__()
self.conv_x = layers.ConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.ConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
self.resize_mode = resize_mode
def check(self, x, y):
assert x.ndim == 4 and y.ndim == 4
x_h, x_w = x.shape[2:]
y_h, y_w = y.shape[2:]
assert x_h >= y_h and x_w >= y_w
def prepare(self, x, y):
x = self.prepare_x(x, y)
y = self.prepare_y(x, y)
return x, y
def prepare_x(self, x, y):
x = self.conv_x(x)
return x
def prepare_y(self, x, y):
y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
return y_up
def fuse(self, x, y):
out = x + y
out = self.conv_out(out)
return out
def forward(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
self.check(x, y)
x, y = self.prepare(x, y)
out = self.fuse(x, y)
return out
class UAFM_ChAtten(UAFM):
"""
The UAFM with channel attention, which uses mean and max values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNAct(
4 * y_ch,
y_ch // 2,
kernel_size=1,
bias_attr=False,
act_type="leakyrelu"),
layers.ConvBN(
y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_hw([x, y], self.training)
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_ChAtten_S(UAFM):
"""
The UAFM with channel attention, which uses mean values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNAct(
2 * y_ch,
y_ch // 2,
kernel_size=1,
bias_attr=False,
act_type="leakyrelu"),
layers.ConvBN(
y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_reduce_hw([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_SpAtten(UAFM):
"""
The UAFM with spatial attention, which uses mean and max values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
4, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
self._scale = self.create_parameter(
shape=[1],
attr=ParamAttr(initializer=Constant(value=1.)),
dtype="float32")
self._scale.stop_gradient = True
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (self._scale - atten)
out = self.conv_out(out)
return out
class UAFM_SpAtten_S(UAFM):
"""
The UAFM with spatial attention, which uses mean values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
2, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFMMobile(UAFM):
"""
Unified Attention Fusion Module for mobile.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_x = layers.SeparableConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.SeparableConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
class UAFMMobile_SpAtten(UAFM):
"""
Unified Attention Fusion Module with spatial attention for mobile.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_x = layers.SeparableConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.SeparableConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
4, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out

@ -0,0 +1,133 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
def avg_reduce_hw(x):
# Reduce hw by avg
# Return cat([avg_pool_0, avg_pool_1, ...])
if not isinstance(x, (list, tuple)):
return F.adaptive_avg_pool2d(x, 1)
elif len(x) == 1:
return F.adaptive_avg_pool2d(x[0], 1)
else:
res = []
for xi in x:
res.append(F.adaptive_avg_pool2d(xi, 1))
return paddle.concat(res, axis=1)
def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
assert not isinstance(x, (list, tuple))
avg_pool = F.adaptive_avg_pool2d(x, 1)
# TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
if is_training:
max_pool = F.adaptive_max_pool2d(x, 1)
else:
max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
if use_concat:
res = paddle.concat([avg_pool, max_pool], axis=1)
else:
res = [avg_pool, max_pool]
return res
def avg_max_reduce_hw(x, is_training):
# Reduce hw by avg and max
# Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
if not isinstance(x, (list, tuple)):
return avg_max_reduce_hw_helper(x, is_training)
elif len(x) == 1:
return avg_max_reduce_hw_helper(x[0], is_training)
else:
res_avg = []
res_max = []
for xi in x:
avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
res_avg.append(avg)
res_max.append(max)
res = res_avg + res_max
return paddle.concat(res, axis=1)
def avg_reduce_channel(x):
# Reduce channel by avg
# Return cat([avg_ch_0, avg_ch_1, ...])
if not isinstance(x, (list, tuple)):
return paddle.mean(x, axis=1, keepdim=True)
elif len(x) == 1:
return paddle.mean(x[0], axis=1, keepdim=True)
else:
res = []
for xi in x:
res.append(paddle.mean(xi, axis=1, keepdim=True))
return paddle.concat(res, axis=1)
def max_reduce_channel(x):
# Reduce channel by max
# Return cat([max_ch_0, max_ch_1, ...])
if not isinstance(x, (list, tuple)):
return paddle.max(x, axis=1, keepdim=True)
elif len(x) == 1:
return paddle.max(x[0], axis=1, keepdim=True)
else:
res = []
for xi in x:
res.append(paddle.max(xi, axis=1, keepdim=True))
return paddle.concat(res, axis=1)
def avg_max_reduce_channel_helper(x, use_concat=True):
# Reduce hw by avg and max, only support single input
assert not isinstance(x, (list, tuple))
mean_value = paddle.mean(x, axis=1, keepdim=True)
max_value = paddle.max(x, axis=1, keepdim=True)
if use_concat:
res = paddle.concat([mean_value, max_value], axis=1)
else:
res = [mean_value, max_value]
return res
def avg_max_reduce_channel(x):
# Reduce hw by avg and max
# Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
if not isinstance(x, (list, tuple)):
return avg_max_reduce_channel_helper(x)
elif len(x) == 1:
return avg_max_reduce_channel_helper(x[0])
else:
res = []
for xi in x:
res.extend(avg_max_reduce_channel_helper(xi, False))
return paddle.concat(res, axis=1)
def cat_avg_max_reduce_channel(x):
# Reduce hw by cat+avg+max
assert isinstance(x, (list, tuple)) and len(x) > 1
x = paddle.concat(x, axis=1)
mean_value = paddle.mean(x, axis=1, keepdim=True)
max_value = paddle.max(x, axis=1, keepdim=True)
res = paddle.concat([mean_value, max_value], axis=1)
return res

@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
raise ValueError(
"if type of `weight` is str, it should equal to 'dynamic', but it is {}"
.format(self.weight))
elif isinstance(self.weight, paddle.VarBase):
elif not isinstance(self.weight, paddle.Tensor):
raise TypeError(
'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
.format(type(self.weight)))

@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
logit = paddle.transpose(logit, [0, 2, 3, 1])
label = label.astype('int64')
# In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
# When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
loss = F.cross_entropy(
logit,
label,
@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
loss = loss * semantic_weights
if self.weight is not None:
_one_hot = F.one_hot(label, logit.shape[-1])
_one_hot = F.one_hot(label * mask, logit.shape[-1])
coef = paddle.sum(_one_hot * self.weight, axis=-1)
else:
coef = paddle.ones_like(label)

@ -16,7 +16,7 @@ import numpy as np
import paddle
from paddle import nn
import paddle.nn.functional as F
from scipy.ndimage.interpolation import shift
from scipy.ndimage import shift
from paddlers.models.ppseg.cvlibs import manager

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -19,38 +19,59 @@ from paddlers.models.ppseg.cvlibs import manager
@manager.LOSSES.add_component
class DiceLoss(nn.Layer):
"""
Implements the dice loss function.
The implements of the dice loss.
Args:
ignore_index (int64): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
smooth (float32): laplace smoothing,
to smooth dice loss and accelerate convergence. following:
https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
weight (list[float], optional): The weight for each class. Default: None.
ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
is ignored and does not contribute to the input gradient. Default ``255``.
smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
Default: 1.0
"""
def __init__(self, ignore_index=255, smooth=0.):
super(DiceLoss, self).__init__()
def __init__(self, weight=None, ignore_index=255, smooth=1.0):
super().__init__()
self.weight = weight
self.ignore_index = ignore_index
self.eps = 1e-5
self.smooth = smooth
self.eps = 1e-8
def forward(self, logits, labels):
labels = paddle.cast(labels, dtype='int32')
labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
num_class = logits.shape[1]
if self.weight is not None:
assert num_class == len(self.weight), \
"The lenght of weight should be euqal to the num class"
mask = labels != self.ignore_index
mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')
labels[labels == self.ignore_index] = 0
labels_one_hot = F.one_hot(labels, num_class)
labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
logits = F.softmax(logits, axis=1)
mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
logits = logits * mask
labels_one_hot = labels_one_hot * mask
dice_loss = 0.0
for i in range(num_class):
dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
mask, self.smooth, self.eps)
if self.weight is not None:
dice_loss_i *= self.weight[i]
dice_loss += dice_loss_i
dice_loss = dice_loss / num_class
return dice_loss
dims = (0, ) + tuple(range(2, labels.ndimension() + 1))
intersection = paddle.sum(logits * labels_one_hot, dims)
cardinality = paddle.sum(logits + labels_one_hot, dims)
dice_loss = ((2. * intersection + self.smooth) /
(cardinality + self.eps + self.smooth)).mean()
return 1 - dice_loss
def dice_loss_helper(logit, label, mask, smooth, eps):
assert logit.shape == label.shape, \
"The shape of logit and label should be the same"
logit = paddle.reshape(logit, [0, -1])
label = paddle.reshape(label, [0, -1])
mask = paddle.reshape(mask, [0, -1])
logit *= mask
label *= mask
intersection = paddle.sum(logit * label, axis=1)
cardinality = paddle.sum(logit + label, axis=1)
dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
dice_loss = dice_loss.mean()
return dice_loss

@ -23,38 +23,110 @@ from paddlers.models.ppseg.cvlibs import manager
@manager.LOSSES.add_component
class FocalLoss(nn.Layer):
"""
Focal Loss.
The implement of focal loss.
Code referenced from:
https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
The focal loss requires the label is 0 or 1 for now.
Args:
gamma (float): the coefficient of Focal Loss.
ignore_index (int64): Specifies a target value that is ignored
alpha (float, list, optional): The alpha of focal loss. alpha is the weight
of class 1, 1-alpha is the weight of class 0. Default: 0.25
gamma (float, optional): The gamma of Focal Loss. Default: 2.0
ignore_index (int64, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
"""
def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
super(FocalLoss, self).__init__()
def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
super().__init__()
self.alpha = alpha
self.gamma = gamma
self.ignore_index = ignore_index
self.edge_label = edge_label
self.EPS = 1e-10
def forward(self, logit, label):
logit = paddle.reshape(
logit, [logit.shape[0], logit.shape[1], -1]) # N,C,H,W => N,C,H*W
logit = paddle.transpose(logit, [0, 2, 1]) # N,C,H*W => N,H*W,C
logit = paddle.reshape(logit,
[-1, logit.shape[2]]) # N,H*W,C => N*H*W,C
label = paddle.reshape(label, [-1, 1])
range_ = paddle.arange(0, label.shape[0])
range_ = paddle.unsqueeze(range_, axis=-1)
label = paddle.cast(label, dtype='int64')
label = paddle.concat([range_, label], axis=-1)
logpt = F.log_softmax(logit)
logpt = paddle.gather_nd(logpt, label)
pt = paddle.exp(logpt.detach())
loss = -1 * (1 - pt)**self.gamma * logpt
loss = paddle.mean(loss)
return loss
"""
Forward computation.
Args:
logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
(N, C, H, W), where C is number of classes.
label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
where each value is 0 <= label[i] <= C-1.
Returns:
(Tensor): The average loss.
"""
assert logit.ndim == 4, "The ndim of logit should be 4."
assert logit.shape[1] == 2, "The channel of logit should be 2."
assert label.ndim == 3, "The ndim of label should be 3."
class_num = logit.shape[1] # class num is 2
logit = paddle.transpose(logit, [0, 2, 3, 1]) # N,C,H,W => N,H,W,C
mask = label != self.ignore_index # N,H,W
mask = paddle.unsqueeze(mask, 3)
mask = paddle.cast(mask, 'float32')
mask.stop_gradient = True
label = F.one_hot(label, class_num) # N,H,W,C
label = paddle.cast(label, logit.dtype)
label.stop_gradient = True
loss = F.sigmoid_focal_loss(
logit=logit,
label=label,
alpha=self.alpha,
gamma=self.gamma,
reduction='none')
loss = loss * mask
avg_loss = paddle.sum(loss) / (
paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
return avg_loss
@manager.LOSSES.add_component
class MultiClassFocalLoss(nn.Layer):
"""
The implement of focal loss for multi class.
Args:
alpha (float, list, optional): The alpha of focal loss. alpha is the weight
of class 1, 1-alpha is the weight of class 0. Default: 0.25
gamma (float, optional): The gamma of Focal Loss. Default: 2.0
ignore_index (int64, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
"""
def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
super().__init__()
self.num_class = num_class
self.alpha = alpha
self.gamma = gamma
self.ignore_index = ignore_index
self.EPS = 1e-10
def forward(self, logit, label):
"""
Forward computation.
Args:
logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
(N, C, H, W), where C is number of classes.
label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
where each value is 0 <= label[i] <= C-1.
Returns:
(Tensor): The average loss.
"""
assert logit.ndim == 4, "The ndim of logit should be 4."
assert label.ndim == 3, "The ndim of label should be 3."
logit = paddle.transpose(logit, [0, 2, 3, 1])
label = label.astype('int64')
ce_loss = F.cross_entropy(
logit, label, ignore_index=self.ignore_index, reduction='none')
pt = paddle.exp(-ce_loss)
focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
mask = paddle.cast(label != self.ignore_index, 'float32')
focal_loss *= mask
avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
return avg_loss

@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):
def __init__(self, reduction='mean', ignore_index=255):
super().__init__(reduction=reduction)
self.ignore_index = ignore_index
self.EPS = 1e-10
def forward(self, input, label):
mask = label != self.ignore_index
mask = paddle.cast(mask, "float32")
label.stop_gradient = True
mask.stop_gradient = True
output = paddle.nn.functional.l1_loss(
input, label, "none", name=self.name) * mask
if self.reduction == "mean":
return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
elif self.reduction == "none":
return output
elif self.reduction == "sum":
return paddle.sum(output)
else:
raise ValueError(
"The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
"received %s, which is not allowed." % self.reduction)

@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
signs = 2. * labels - 1.
signs.stop_gradient = True
errors = 1. - logits * signs
errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
'descending', True)
if hasattr(paddle, "_legacy_C_ops"):
errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
'descending', True)
else:
errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
'descending', True)
errors_sorted.stop_gradient = False
gt_sorted = paddle.gather(labels, perm)
grad = lovasz_grad(gt_sorted)
@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
else:
class_pred = probas[:, c]
errors = paddle.abs(fg - class_pred)
errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
'descending', True)
if hasattr(paddle, "_legacy_C_ops"):
errors_sorted, perm = paddle._legacy_C_ops.argsort(
errors, 'axis', 0, 'descending', True)
else:
errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
'descending', True)
errors_sorted.stop_gradient = False
fg_sorted = paddle.gather(fg, perm)

@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):
# get the label after ohem
n, c, h, w = logit.shape
label = label.reshape((-1, ))
label = label.reshape((-1, )).astype('int64')
valid_mask = (label != self.ignore_index).astype('int64')
num_valid = valid_mask.sum()
label = label * valid_mask

@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
elif num_hard >= n_view / 2:
num_easy_keep = num_easy
num_hard_keep = n_view - num_easy_keep
else:
elif num_easy >= n_view / 2:
num_hard_keep = num_hard
num_easy_keep = n_view - num_hard_keep
else:
num_hard_keep = num_hard
num_easy_keep = num_easy
indices = None
if num_hard > 0:

@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
label_num_conn, label_conn = cv2.connectedComponents(
labels_np_class.astype(np.uint8))
origin_pred_num_conn = pred_num_conn
if pred_num_conn > 2 * label_num_conn:
pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
real_pred_num = pred_num_conn - 1
@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
# Connected Components Matching and SC Loss Calculation
if real_label_num > 0 and real_pred_num > 0:
img_connectivity = compute_class_connectiveity(
pred_conn, label_conn, pred_num_conn, label_num_conn,
pred_i, real_label_num, real_pred_num, zero)
pred_conn, label_conn, pred_num_conn,
origin_pred_num_conn, label_num_conn, pred_i,
real_label_num, real_pred_num, zero)
sc_loss += 1 - img_connectivity
elif real_label_num == 0 and real_pred_num == 0:
# if no connected component, SC Loss = 0, so pass
@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):
def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
label_num_conn, pred, real_label_num,
real_pred_num, zero):
origin_pred_num_conn, label_num_conn, pred,
real_label_num, real_pred_num, zero):
pred_conn = paddle.to_tensor(pred_conn)
label_conn = paddle.to_tensor(label_conn)
pred_conn = F.one_hot(pred_conn, pred_num_conn)
pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
label_conn = F.one_hot(label_conn, label_num_conn)
ious = paddle.zeros((real_label_num, real_pred_num))

@ -0,0 +1,162 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
@manager.MODELS.add_component
class LRASPP(nn.Layer):
"""
Semantic segmentation model with a light R-ASPP head.
The original article refers to
Howard, Andrew, et al. "Searching for mobilenetv3."
(https://arxiv.org/pdf/1909.11065.pdf)
Args:
num_classes (int): The number of target classes.
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
has feat_channels, of which the length is 5.
backbone_indices (List(int), optional): The values indicate the indices of backbone output
used as the input of the LR-ASPP head.
Default: [0, 1, 3].
lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
Default: [32, 64].
lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
Default: 128
resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
Default: bilinear.
use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
a 49x49 kernel for average pooling.
Default: True.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[0, 1, 3],
lraspp_head_inter_chs=[32, 64],
lraspp_head_out_ch=128,
resize_mode='bilinear',
use_gap=True,
pretrained=None):
super().__init__()
# backbone
assert hasattr(backbone, 'feat_channels'), \
"The backbone should has feat_channels."
assert len(backbone.feat_channels) >= len(backbone_indices), \
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
assert len(backbone.feat_channels) > max(backbone_indices), \
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
f"less than the length of feat_channels ({len(backbone.feat_channels)})."
self.backbone = backbone
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
"should not be lesser than 1"
# head
assert len(backbone_indices) == len(
lraspp_head_inter_chs
) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
self.backbone_indices = backbone_indices
self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
lraspp_head_inter_chs, lraspp_head_out_ch,
num_classes, resize_mode, use_gap)
# pretrained
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
x_hw = paddle.shape(x)[2:]
feats_backbone = self.backbone(x)
assert len(feats_backbone) >= len(self.backbone_indices), \
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
y = self.lraspp_head(feats_backbone)
y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
logit_list = [y]
return logit_list
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class LRASPPHead(nn.Layer):
def __init__(self,
indices,
in_chs,
mid_chs,
out_ch,
n_classes,
resize_mode,
use_gap,
align_corners=False):
super().__init__()
self.indices = indices[-2::-1]
self.in_chs = [in_chs[i] for i in indices[::-1]]
self.mid_chs = mid_chs[::-1]
self.convs = nn.LayerList()
self.conv_ups = nn.LayerList()
for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
self.convs.append(
nn.Conv2D(
in_ch, mid_ch, kernel_size=1, bias_attr=False))
self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
self.conv_w = nn.Sequential(
nn.AvgPool2D(
kernel_size=(49, 49), stride=(16, 20))
if not use_gap else nn.AdaptiveAvgPool2D(1),
nn.Conv2D(
self.in_chs[0], out_ch, 1, bias_attr=False),
nn.Sigmoid())
self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
self.conv_out = nn.Conv2D(
out_ch, n_classes, kernel_size=1, bias_attr=False)
self.interp = partial(
F.interpolate, mode=resize_mode, align_corners=align_corners)
def forward(self, in_feat_list):
x = in_feat_list[-1]
x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
y = self.conv_t(x)
for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
feat = in_feat_list[idx]
y = self.interp(y, paddle.shape(feat)[2:])
y = paddle.concat([y, conv(feat)], axis=1)
y = conv_up(y)
y = self.conv_out(y)
return y

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -0,0 +1,289 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
@manager.MODELS.add_component
class MobileSeg(nn.Layer):
"""
The semantic segmentation models for mobile devices.
Args:
num_classes (int): The number of target classes.
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
has feat_channels, of which the length is 5.
backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
Default: [2, 3, 4].
cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
Default: [64, 64, 64].
resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
Default: bilinear.
use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[1, 2, 3],
cm_bin_sizes=[1, 2],
cm_out_ch=64,
arm_type='UAFMMobile',
arm_out_chs=[32, 48, 64],
seg_head_inter_chs=[32, 32, 32],
resize_mode='bilinear',
use_last_fuse=False,
pretrained=None):
super().__init__()
# backbone
assert hasattr(backbone, 'feat_channels'), \
"The backbone should has feat_channels."
assert len(backbone.feat_channels) >= len(backbone_indices), \
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
assert len(backbone.feat_channels) > max(backbone_indices), \
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
f"less than the length of feat_channels ({len(backbone.feat_channels)})."
self.backbone = backbone
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
"should not be lesser than 1"
self.backbone_indices = backbone_indices # [..., x16_id, x32_id]
backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
# head
if len(arm_out_chs) == 1:
arm_out_chs = arm_out_chs * len(backbone_indices)
assert len(arm_out_chs) == len(backbone_indices), "The length of " \
"arm_out_chs and backbone_indices should be equal"
self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
cm_bin_sizes, cm_out_ch, arm_type,
resize_mode, use_last_fuse)
if len(seg_head_inter_chs) == 1:
seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
"seg_head_inter_chs and backbone_indices should be equal"
self.seg_heads = nn.LayerList() # [..., head_16, head32]
for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
# pretrained
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
x_hw = paddle.shape(x)[2:]
feats_backbone = self.backbone(x) # [x4, x8, x16, x32]
assert len(feats_backbone) >= len(self.backbone_indices), \
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
feats_selected = [feats_backbone[i] for i in self.backbone_indices]
feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32]
if self.training:
logit_list = []
for x, seg_head in zip(feats_head, self.seg_heads):
x = seg_head(x)
logit_list.append(x)
logit_list = [
F.interpolate(
x, x_hw, mode='bilinear', align_corners=False)
for x in logit_list
]
else:
x = self.seg_heads[0](feats_head[0])
x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
logit_list = [x]
return logit_list
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class MobileSegHead(nn.Layer):
"""
The head of MobileSeg.
Args:
backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
arm_out_chs (List(int)): The out channels of each arm module.
cm_bin_sizes (List(int)): The bin size of context module.
cm_out_ch (int): The output channel of the last context module.
arm_type (str): The type of attention refinement module.
resize_mode (str): The resize mode for the upsampling operation in decoder.
"""
def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
arm_type, resize_mode, use_last_fuse):
super().__init__()
self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
cm_out_ch, cm_bin_sizes)
assert hasattr(layers,arm_type), \
"Not support arm_type ({})".format(arm_type)
arm_class = eval("layers." + arm_type)
self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32]
for i in range(len(backbone_out_chs)):
low_chs = backbone_out_chs[i]
high_ch = cm_out_ch if i == len(
backbone_out_chs) - 1 else arm_out_chs[i + 1]
out_ch = arm_out_chs[i]
arm = arm_class(
low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
self.arm_list.append(arm)
self.use_last_fuse = use_last_fuse
if self.use_last_fuse:
self.fuse_convs = nn.LayerList()
for i in range(1, len(arm_out_chs)):
conv = layers.SeparableConvBNReLU(
arm_out_chs[i],
arm_out_chs[0],
kernel_size=3,
bias_attr=False)
self.fuse_convs.append(conv)
self.last_conv = layers.SeparableConvBNReLU(
len(arm_out_chs) * arm_out_chs[0],
arm_out_chs[0],
kernel_size=3,
bias_attr=False)
def forward(self, in_feat_list):
"""
Args:
in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
x2, x4 and x8 are optional.
Returns:
out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
x2, x4 and x8 are optional.
The length of in_feat_list and out_feat_list are the same.
"""
high_feat = self.cm(in_feat_list[-1])
out_feat_list = []
for i in reversed(range(len(in_feat_list))):
low_feat = in_feat_list[i]
arm = self.arm_list[i]
high_feat = arm(low_feat, high_feat)
out_feat_list.insert(0, high_feat)
if self.use_last_fuse:
x_list = [out_feat_list[0]]
size = paddle.shape(out_feat_list[0])[2:]
for i, (x, conv
) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
x = conv(x)
x = F.interpolate(
x, size=size, mode='bilinear', align_corners=False)
x_list.append(x)
x = paddle.concat(x_list, axis=1)
x = self.last_conv(x)
out_feat_list[0] = x
return out_feat_list
class MobileContextModule(nn.Layer):
"""
Context Module for Mobile Model.
Args:
in_channels (int): The number of input channels to pyramid pooling module.
inter_channels (int): The number of inter channels to pyramid pooling module.
out_channels (int): The number of output channels after pyramid pooling module.
bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
align_corners (bool): An argument of F.interpolate. It should be set to False
when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
"""
def __init__(self,
in_channels,
inter_channels,
out_channels,
bin_sizes,
align_corners=False):
super().__init__()
self.stages = nn.LayerList([
self._make_stage(in_channels, inter_channels, size)
for size in bin_sizes
])
self.conv_out = layers.SeparableConvBNReLU(
in_channels=inter_channels,
out_channels=out_channels,
kernel_size=3,
bias_attr=False)
self.align_corners = align_corners
def _make_stage(self, in_channels, out_channels, size):
prior = nn.AdaptiveAvgPool2D(output_size=size)
conv = layers.ConvBNReLU(
in_channels=in_channels, out_channels=out_channels, kernel_size=1)
return nn.Sequential(prior, conv)
def forward(self, input):
out = None
input_shape = paddle.shape(input)[2:]
for stage in self.stages:
x = stage(input)
x = F.interpolate(
x,
input_shape,
mode='bilinear',
align_corners=self.align_corners)
if out is None:
out = x
else:
out += x
out = self.conv_out(out)
return out
class SegHead(nn.Layer):
def __init__(self, in_chan, mid_chan, n_classes):
super().__init__()
self.conv = layers.SeparableConvBNReLU(
in_chan, mid_chan, kernel_size=3, bias_attr=False)
self.conv_out = nn.Conv2D(
mid_chan, n_classes, kernel_size=1, bias_attr=False)
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -50,19 +50,9 @@ class PortraitNet(nn.Layer):
self.init_weight()
def forward(self, x):
img = x[:, :3, :, :]
img_ori = x[:, 3:, :, :]
feat_list = self.backbone(img)
feat_list = self.backbone(x)
logits_list = self.head(feat_list)
feat_list = self.backbone(img_ori)
logits_ori_list = self.head(feat_list)
return [
logits_list[0], logits_ori_list[0], logits_list[1],
logits_ori_list[1]
]
return [logits_list]
def init_weight(self):
if self.pretrained is not None:

@ -0,0 +1,273 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
@manager.MODELS.add_component
class PPLiteSeg(nn.Layer):
"""
The PP_LiteSeg implementation based on PaddlePaddle.
The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu,
Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai,
Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic
Segmentation Model. https://arxiv.org/abs/2204.02681".
Args:
num_classes (int): The number of target classes.
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
has feat_channels, of which the length is 5.
backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
Default: [2, 3, 4].
arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
Default: [64, 64, 64].
resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
Default: bilinear.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[2, 3, 4],
arm_type='UAFM_SpAtten',
cm_bin_sizes=[1, 2, 4],
cm_out_ch=128,
arm_out_chs=[64, 96, 128],
seg_head_inter_chs=[64, 64, 64],
resize_mode='bilinear',
pretrained=None):
super().__init__()
# backbone
assert hasattr(backbone, 'feat_channels'), \
"The backbone should has feat_channels."
assert len(backbone.feat_channels) >= len(backbone_indices), \
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
assert len(backbone.feat_channels) > max(backbone_indices), \
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
f"less than the length of feat_channels ({len(backbone.feat_channels)})."
self.backbone = backbone
assert len(backbone_indices) > 1, "The lenght of backbone_indices " \
"should be greater than 1"
self.backbone_indices = backbone_indices # [..., x16_id, x32_id]
backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
# head
if len(arm_out_chs) == 1:
arm_out_chs = arm_out_chs * len(backbone_indices)
assert len(arm_out_chs) == len(backbone_indices), "The length of " \
"arm_out_chs and backbone_indices should be equal"
self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs,
cm_bin_sizes, cm_out_ch, arm_type,
resize_mode)
if len(seg_head_inter_chs) == 1:
seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
"seg_head_inter_chs and backbone_indices should be equal"
self.seg_heads = nn.LayerList() # [..., head_16, head32]
for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
# pretrained
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
x_hw = paddle.shape(x)[2:]
feats_backbone = self.backbone(x) # [x2, x4, x8, x16, x32]
assert len(feats_backbone) >= len(self.backbone_indices), \
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
feats_selected = [feats_backbone[i] for i in self.backbone_indices]
feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32]
if self.training:
logit_list = []
for x, seg_head in zip(feats_head, self.seg_heads):
x = seg_head(x)
logit_list.append(x)
logit_list = [
F.interpolate(
x, x_hw, mode='bilinear', align_corners=False)
for x in logit_list
]
else:
x = self.seg_heads[0](feats_head[0])
x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
logit_list = [x]
return logit_list
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class PPLiteSegHead(nn.Layer):
"""
The head of PPLiteSeg.
Args:
backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
arm_out_chs (List(int)): The out channels of each arm module.
cm_bin_sizes (List(int)): The bin size of context module.
cm_out_ch (int): The output channel of the last context module.
arm_type (str): The type of attention refinement module.
resize_mode (str): The resize mode for the upsampling operation in decoder.
"""
def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
arm_type, resize_mode):
super().__init__()
self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch,
cm_bin_sizes)
assert hasattr(layers,arm_type), \
"Not support arm_type ({})".format(arm_type)
arm_class = eval("layers." + arm_type)
self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32]
for i in range(len(backbone_out_chs)):
low_chs = backbone_out_chs[i]
high_ch = cm_out_ch if i == len(
backbone_out_chs) - 1 else arm_out_chs[i + 1]
out_ch = arm_out_chs[i]
arm = arm_class(
low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
self.arm_list.append(arm)
def forward(self, in_feat_list):
"""
Args:
in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
x2, x4 and x8 are optional.
Returns:
out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
x2, x4 and x8 are optional.
The length of in_feat_list and out_feat_list are the same.
"""
high_feat = self.cm(in_feat_list[-1])
out_feat_list = []
for i in reversed(range(len(in_feat_list))):
low_feat = in_feat_list[i]
arm = self.arm_list[i]
high_feat = arm(low_feat, high_feat)
out_feat_list.insert(0, high_feat)
return out_feat_list
class PPContextModule(nn.Layer):
"""
Simple Context module.
Args:
in_channels (int): The number of input channels to pyramid pooling module.
inter_channels (int): The number of inter channels to pyramid pooling module.
out_channels (int): The number of output channels after pyramid pooling module.
bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
align_corners (bool): An argument of F.interpolate. It should be set to False
when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
"""
def __init__(self,
in_channels,
inter_channels,
out_channels,
bin_sizes,
align_corners=False):
super().__init__()
self.stages = nn.LayerList([
self._make_stage(in_channels, inter_channels, size)
for size in bin_sizes
])
self.conv_out = layers.ConvBNReLU(
in_channels=inter_channels,
out_channels=out_channels,
kernel_size=3,
padding=1)
self.align_corners = align_corners
def _make_stage(self, in_channels, out_channels, size):
prior = nn.AdaptiveAvgPool2D(output_size=size)
conv = layers.ConvBNReLU(
in_channels=in_channels, out_channels=out_channels, kernel_size=1)
return nn.Sequential(prior, conv)
def forward(self, input):
out = None
input_shape = paddle.shape(input)[2:]
for stage in self.stages:
x = stage(input)
x = F.interpolate(
x,
input_shape,
mode='bilinear',
align_corners=self.align_corners)
if out is None:
out = x
else:
out += x
out = self.conv_out(out)
return out
class SegHead(nn.Layer):
def __init__(self, in_chan, mid_chan, n_classes):
super().__init__()
self.conv = layers.ConvBNReLU(
in_chan,
mid_chan,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
self.conv_out = nn.Conv2D(
mid_chan, n_classes, kernel_size=1, bias_attr=False)
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x

@ -27,13 +27,17 @@ __all__ = ['PPHumanSegLite']
class PPHumanSegLite(nn.Layer):
"A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals."
def __init__(self, num_classes, pretrained=None, align_corners=False):
def __init__(self,
num_classes,
in_channels=3,
pretrained=None,
align_corners=False):
super().__init__()
self.pretrained = pretrained
self.num_classes = num_classes
self.align_corners = align_corners
self.conv_bn0 = _ConvBNReLU(3, 36, 3, 2, 1)
self.conv_bn0 = _ConvBNReLU(in_channels, 36, 3, 2, 1)
self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0)
self.block1 = nn.Sequential(

@ -127,51 +127,3 @@ class SegFormer(nn.Layer):
mode='bilinear',
align_corners=self.align_corners)
]
@manager.MODELS.add_component
def SegFormer_B0(**kwargs):
return SegFormer(
backbone=manager.BACKBONES['MixVisionTransformer_B0'](),
embedding_dim=256,
**kwargs)
@manager.MODELS.add_component
def SegFormer_B1(**kwargs):
return SegFormer(
backbone=manager.BACKBONES['MixVisionTransformer_B1'](),
embedding_dim=256,
**kwargs)
@manager.MODELS.add_component
def SegFormer_B2(**kwargs):
return SegFormer(
backbone=manager.BACKBONES['MixVisionTransformer_B2'](),
embedding_dim=768,
**kwargs)
@manager.MODELS.add_component
def SegFormer_B3(**kwargs):
return SegFormer(
backbone=manager.BACKBONES['MixVisionTransformer_B3'](),
embedding_dim=768,
**kwargs)
@manager.MODELS.add_component
def SegFormer_B4(**kwargs):
return SegFormer(
backbone=manager.BACKBONES['MixVisionTransformer_B4'](),
embedding_dim=768,
**kwargs)
@manager.MODELS.add_component
def SegFormer_B5(**kwargs):
return SegFormer(
backbone=manager.BACKBONES['MixVisionTransformer_B5'](),
embedding_dim=768,
**kwargs)

@ -32,14 +32,14 @@ class SegNet(nn.Layer):
num_classes (int): The unique number of target classes.
"""
def __init__(self, num_classes, pretrained=None):
def __init__(self, num_classes, in_channels=3, pretrained=None):
super().__init__()
# Encoder Module
self.enco1 = nn.Sequential(
layers.ConvBNReLU(
3, 64, 3, padding=1),
in_channels, 64, 3, padding=1),
layers.ConvBNReLU(
64, 64, 3, padding=1))

@ -0,0 +1,449 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]],
[[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]],
[[3, 2], [3, 4]], [[3, 1], [5, 2]]]
@manager.MODELS.add_component
class SINet(nn.Layer):
"""
The SINet implementation based on PaddlePaddle.
The original article refers to
Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak
"SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules
and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099).
Args:
num_classes (int): The unique number of target classes.
config (List, optional): The config for SINet. Defualt use the CFG.
stage2_blocks (int, optional): The num of blocks in stage2. Default: 2.
stage3_blocks (int, optional): The num of blocks in stage3. Default: 8.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes=2,
config=CFG,
stage2_blocks=2,
stage3_blocks=8,
in_channels=3,
pretrained=None):
super().__init__()
dim1 = 16
dim2 = 48
dim3 = 96
self.encoder = SINetEncoder(config, in_channels, num_classes,
stage2_blocks, stage3_blocks)
self.up = nn.UpsamplingBilinear2D(scale_factor=2)
self.bn_3 = nn.BatchNorm(num_classes)
self.level2_C = CBR(dim2, num_classes, 1, 1)
self.bn_2 = nn.BatchNorm(num_classes)
self.classifier = nn.Sequential(
nn.UpsamplingBilinear2D(scale_factor=2),
nn.Conv2D(
num_classes, num_classes, 3, 1, 1, bias_attr=False))
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, input):
output1 = self.encoder.level1(input) # x2
output2_0 = self.encoder.level2_0(output1) # x4
for i, layer in enumerate(self.encoder.level2):
if i == 0:
output2 = layer(output2_0)
else:
output2 = layer(output2)
output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1))
output3_0 = self.encoder.level3_0(output2_cat) # x8
for i, layer in enumerate(self.encoder.level3):
if i == 0:
output3 = layer(output3_0)
else:
output3 = layer(output3)
output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1))
enc_final = self.encoder.classifier(output3_cat) # x8
dec_stage1 = self.bn_3(self.up(enc_final)) # x4
stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1)
stage1_gate = (1 - stage1_confidence).unsqueeze(1)
dec_stage2_0 = self.level2_C(output2) # x4
dec_stage2 = self.bn_2(
self.up(dec_stage2_0 * stage1_gate + dec_stage1)) # x2
out = self.classifier(dec_stage2) # x
return [out]
def channel_shuffle(x, groups):
x_shape = paddle.shape(x)
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
num_channels = x.shape[1]
channels_per_group = num_channels // groups
# reshape
x = paddle.reshape(
x=x, shape=[batch_size, groups, channels_per_group, height, width])
# transpose
x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
# flatten
x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
class CBR(nn.Layer):
'''
This class defines the convolution layer with batch normalization and PReLU activation
'''
def __init__(self, nIn, nOut, kSize, stride=1):
super().__init__()
padding = int((kSize - 1) / 2)
self.conv = nn.Conv2D(
nIn,
nOut, (kSize, kSize),
stride=stride,
padding=(padding, padding),
bias_attr=False)
self.bn = nn.BatchNorm(nOut)
self.act = nn.PReLU(nOut)
def forward(self, input):
output = self.conv(input)
output = self.bn(output)
output = self.act(output)
return output
class SeparableCBR(nn.Layer):
'''
This class defines the convolution layer with batch normalization and PReLU activation
'''
def __init__(self, nIn, nOut, kSize, stride=1):
super().__init__()
padding = int((kSize - 1) / 2)
self.conv = nn.Sequential(
nn.Conv2D(
nIn,
nIn, (kSize, kSize),
stride=stride,
padding=(padding, padding),
groups=nIn,
bias_attr=False),
nn.Conv2D(
nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
self.bn = nn.BatchNorm(nOut)
self.act = nn.PReLU(nOut)
def forward(self, input):
output = self.conv(input)
output = self.bn(output)
output = self.act(output)
return output
class SqueezeBlock(nn.Layer):
def __init__(self, exp_size, divide=4.0):
super(SqueezeBlock, self).__init__()
if divide > 1:
self.dense = nn.Sequential(
nn.Linear(exp_size, int(exp_size / divide)),
nn.PReLU(int(exp_size / divide)),
nn.Linear(int(exp_size / divide), exp_size),
nn.PReLU(exp_size), )
else:
self.dense = nn.Sequential(
nn.Linear(exp_size, exp_size), nn.PReLU(exp_size))
def forward(self, x):
alpha = F.adaptive_avg_pool2d(x, [1, 1])
alpha = paddle.squeeze(alpha, axis=[2, 3])
alpha = self.dense(alpha)
alpha = paddle.unsqueeze(alpha, axis=[2, 3])
out = x * alpha
return out
class SESeparableCBR(nn.Layer):
'''
This class defines the convolution layer with batch normalization and PReLU activation
'''
def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0):
super().__init__()
padding = int((kSize - 1) / 2)
self.conv = nn.Sequential(
nn.Conv2D(
nIn,
nIn, (kSize, kSize),
stride=stride,
padding=(padding, padding),
groups=nIn,
bias_attr=False),
SqueezeBlock(
nIn, divide=divide),
nn.Conv2D(
nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
self.bn = nn.BatchNorm(nOut)
self.act = nn.PReLU(nOut)
def forward(self, input):
output = self.conv(input)
output = self.bn(output)
output = self.act(output)
return output
class BR(nn.Layer):
'''
This class groups the batch normalization and PReLU activation
'''
def __init__(self, nOut):
super().__init__()
self.bn = nn.BatchNorm(nOut)
self.act = nn.PReLU(nOut)
def forward(self, input):
output = self.bn(input)
output = self.act(output)
return output
class CB(nn.Layer):
'''
This class groups the convolution and batch normalization
'''
def __init__(self, nIn, nOut, kSize, stride=1):
super().__init__()
padding = int((kSize - 1) / 2)
self.conv = nn.Conv2D(
nIn,
nOut, (kSize, kSize),
stride=stride,
padding=(padding, padding),
bias_attr=False)
self.bn = nn.BatchNorm(nOut)
def forward(self, input):
output = self.conv(input)
output = self.bn(output)
return output
class C(nn.Layer):
'''
This class is for a convolutional layer.
'''
def __init__(self, nIn, nOut, kSize, stride=1, group=1):
super().__init__()
padding = int((kSize - 1) / 2)
self.conv = nn.Conv2D(
nIn,
nOut, (kSize, kSize),
stride=stride,
padding=(padding, padding),
bias_attr=False,
groups=group)
def forward(self, input):
output = self.conv(input)
return output
class S2block(nn.Layer):
'''
This class defines the dilated convolution.
'''
def __init__(self, nIn, nOut, kSize, avgsize):
super().__init__()
self.resolution_down = False
if avgsize > 1:
self.resolution_down = True
self.down_res = nn.AvgPool2D(avgsize, avgsize)
self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize)
self.avgsize = avgsize
padding = int((kSize - 1) / 2)
self.conv = nn.Sequential(
nn.Conv2D(
nIn,
nIn,
kernel_size=(kSize, kSize),
stride=1,
padding=(padding, padding),
groups=nIn,
bias_attr=False),
nn.BatchNorm(nIn))
self.act_conv1x1 = nn.Sequential(
nn.PReLU(nIn),
nn.Conv2D(
nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
self.bn = nn.BatchNorm(nOut)
def forward(self, input):
if self.resolution_down:
input = self.down_res(input)
output = self.conv(input)
output = self.act_conv1x1(output)
if self.resolution_down:
output = self.up_res(output)
return self.bn(output)
class S2module(nn.Layer):
'''
This class defines the ESP block, which is based on the following principle
Reduce ---> Split ---> Transform --> Merge
'''
def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]):
super().__init__()
group_n = len(config)
assert group_n == 2
n = int(nOut / group_n)
n1 = nOut - group_n * n
self.c1 = C(nIn, n, 1, 1, group=group_n)
# self.c1 = C(nIn, n, 1, 1)
for i in range(group_n):
if i == 0:
self.layer_0 = S2block(
n, n + n1, kSize=config[i][0], avgsize=config[i][1])
else:
self.layer_1 = S2block(
n, n, kSize=config[i][0], avgsize=config[i][1])
self.BR = BR(nOut)
self.add = add
self.group_n = group_n
def forward(self, input):
output1 = self.c1(input)
output1 = channel_shuffle(output1, self.group_n)
res_0 = self.layer_0(output1)
res_1 = self.layer_1(output1)
combine = paddle.concat([res_0, res_1], 1)
if self.add:
combine = input + combine
output = self.BR(combine)
return output
class SINetEncoder(nn.Layer):
def __init__(self,
config,
in_channels=3,
num_classes=2,
stage2_blocks=2,
stage3_blocks=8):
super().__init__()
assert stage2_blocks == 2
dim1 = 16
dim2 = 48
dim3 = 96
self.level1 = CBR(in_channels, 12, 3, 2)
self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1)
self.level2 = nn.LayerList()
for i in range(0, stage2_blocks):
if i == 0:
self.level2.append(
S2module(
dim1, dim2, config=config[i], add=False))
else:
self.level2.append(S2module(dim2, dim2, config=config[i]))
self.BR2 = BR(dim2 + dim1)
self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2)
self.level3 = nn.LayerList()
for i in range(0, stage3_blocks):
if i == 0:
self.level3.append(
S2module(
dim2, dim3, config=config[2 + i], add=False))
else:
self.level3.append(S2module(dim3, dim3, config=config[2 + i]))
self.BR3 = BR(dim3 + dim2)
self.classifier = C(dim3 + dim2, num_classes, 1, 1)
def forward(self, input):
output1 = self.level1(input) # x2
output2_0 = self.level2_0(output1) # x4
for i, layer in enumerate(self.level2):
if i == 0:
output2 = layer(output2_0)
else:
output2 = layer(output2)
output3_0 = self.level3_0(
self.BR2(paddle.concat([output2_0, output2], 1))) # x8
for i, layer in enumerate(self.level3):
if i == 0:
output3 = layer(output3_0)
else:
output3 = layer(output3)
output3_cat = self.BR3(paddle.concat([output3_0, output3], 1))
classifier = self.classifier(output3_cat)
return classifier

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -0,0 +1,155 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct
@manager.MODELS.add_component
class TopFormer(nn.Layer):
"""
The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle.
The original article refers to
Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu,
and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation."
In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
pp. 12083-12093. 2022.
This model refers to https://github.com/hustvl/TopFormer.
Args:
num_classes(int,optional): The unique number of target classes.
backbone(nn.Layer): Backbone network.
head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False.
align_corners (bool, optional): Set the align_corners in resizing. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
head_use_dw=False,
align_corners=False,
pretrained=None):
super().__init__()
self.backbone = backbone
head_in_channels = [
i for i in backbone.injection_out_channels if i is not None
]
self.decode_head = TopFormerHead(
num_classes=num_classes,
in_channels=head_in_channels,
use_dw=head_use_dw,
align_corners=align_corners)
self.align_corners = align_corners
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, x):
x_hw = paddle.shape(x)[2:]
x = self.backbone(x) # len=3, 1/8,1/16,1/32
x = self.decode_head(x)
x = F.interpolate(
x, x_hw, mode='bilinear', align_corners=self.align_corners)
return [x]
class TopFormerHead(nn.Layer):
def __init__(self,
num_classes,
in_channels,
in_index=[0, 1, 2],
in_transform='multiple_select',
use_dw=False,
dropout_ratio=0.1,
align_corners=False):
super().__init__()
self.in_index = in_index
self.in_transform = in_transform
self.align_corners = align_corners
self._init_inputs(in_channels, in_index, in_transform)
self.linear_fuse = ConvBNAct(
in_channels=self.last_channels,
out_channels=self.last_channels,
kernel_size=1,
stride=1,
groups=self.last_channels if use_dw else 1,
act=nn.ReLU)
self.dropout = nn.Dropout2D(dropout_ratio)
self.conv_seg = nn.Conv2D(
self.last_channels, num_classes, kernel_size=1)
def _init_inputs(self, in_channels, in_index, in_transform):
assert in_transform in [None, 'resize_concat', 'multiple_select']
if in_transform is not None:
assert len(in_channels) == len(in_index)
if in_transform == 'resize_concat':
self.last_channels = sum(in_channels)
else:
self.last_channels = in_channels[0]
else:
assert isinstance(in_channels, int)
assert isinstance(in_index, int)
self.last_channels = in_channels
def _transform_inputs(self, inputs):
if self.in_transform == 'resize_concat':
inputs = [inputs[i] for i in self.in_index]
inputs = [
F.interpolate(
input_data=x,
size=paddle.shape(inputs[0])[2:],
mode='bilinear',
align_corners=self.align_corners) for x in inputs
]
inputs = paddle.concat(inputs, axis=1)
elif self.in_transform == 'multiple_select':
inputs_tmp = [inputs[i] for i in self.in_index]
inputs = inputs_tmp[0]
for x in inputs_tmp[1:]:
x = F.interpolate(
x,
size=paddle.shape(inputs)[2:],
mode='bilinear',
align_corners=self.align_corners)
inputs += x
else:
inputs = inputs[self.in_index]
return inputs
def forward(self, x):
x = self._transform_inputs(x)
x = self.linear_fuse(x)
x = self.dropout(x)
x = self.conv_seg(x)
return x

@ -34,15 +34,15 @@ class U2Net(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_ch (int, optional): Input channels. Default: 3.
in_channels (int, optional): Input channels. Default: 3.
pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
"""
def __init__(self, num_classes, in_ch=3, pretrained=None):
def __init__(self, num_classes, in_channels=3, pretrained=None):
super(U2Net, self).__init__()
self.stage1 = RSU7(in_ch, 32, 64)
self.stage1 = RSU7(in_channels, 32, 64)
self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
self.stage2 = RSU6(64, 32, 128)
@ -153,10 +153,10 @@ class U2Net(nn.Layer):
class U2Netp(nn.Layer):
"""Please Refer to U2Net above."""
def __init__(self, num_classes, in_ch=3, pretrained=None):
def __init__(self, num_classes, in_channels=3, pretrained=None):
super(U2Netp, self).__init__()
self.stage1 = RSU7(in_ch, 16, 64)
self.stage1 = RSU7(in_channels, 16, 64)
self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
self.stage2 = RSU6(64, 16, 64)

@ -36,18 +36,19 @@ class UNet(nn.Layer):
is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
If False, use resize_bilinear. Default: False.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
"""
def __init__(self,
num_classes,
input_channel=3,
align_corners=False,
use_deconv=False,
in_channels=3,
pretrained=None):
super().__init__()
self.encode = Encoder(input_channel)
self.encode = Encoder(in_channels)
self.decode = Decoder(align_corners, use_deconv=use_deconv)
self.cls = self.conv = nn.Conv2D(
in_channels=64,
@ -73,12 +74,11 @@ class UNet(nn.Layer):
class Encoder(nn.Layer):
def __init__(self, input_channel=3):
def __init__(self, in_channels=3):
super().__init__()
self.double_conv = nn.Sequential(
layers.ConvBNReLU(input_channel, 64, 3),
layers.ConvBNReLU(64, 64, 3))
layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
self.down_sample_list = nn.LayerList([
self.down_sampling(channel[0], channel[1])

@ -31,8 +31,8 @@ class UNetPlusPlus(nn.Layer):
(https://arxiv.org/abs/1807.10165).
Args:
in_channels (int): The channel number of input image.
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channel number of input image. Default: 3.
use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
If False, use resize_bilinear. Default: False.
align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -42,8 +42,8 @@ class UNetPlusPlus(nn.Layer):
"""
def __init__(self,
in_channels,
num_classes,
in_channels=3,
use_deconv=False,
align_corners=False,
pretrained=None,

@ -0,0 +1,173 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
@manager.MODELS.add_component
class UPerNet(nn.Layer):
"""
The UPerNet implementation based on PaddlePaddle.
The original article refers to
Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding"
(https://arxiv.org/abs/1807.10221).
Args:
num_classes (int): The unique number of target classes.
backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone.
channels (int): The channels of inter layers. Default: 512.
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False.
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
dropout_prob (float): Dropout ratio for upernet head. Default: 0.1.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices,
channels=512,
enable_auxiliary_loss=False,
align_corners=False,
dropout_prob=0.1,
pretrained=None):
super().__init__()
self.backbone = backbone
self.backbone_indices = backbone_indices
self.in_channels = [
self.backbone.feat_channels[i] for i in backbone_indices
]
self.align_corners = align_corners
self.pretrained = pretrained
self.enable_auxiliary_loss = enable_auxiliary_loss
fpn_inplanes = [
self.backbone.feat_channels[i] for i in backbone_indices
]
self.head = UPerNetHead(
num_classes=num_classes,
fpn_inplanes=fpn_inplanes,
dropout_prob=dropout_prob,
channels=channels,
enable_auxiliary_loss=self.enable_auxiliary_loss)
self.init_weight()
def forward(self, x):
feats = self.backbone(x)
feats = [feats[i] for i in self.backbone_indices]
logit_list = self.head(feats)
logit_list = [
F.interpolate(
logit,
paddle.shape(x)[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
return logit_list
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class UPerNetHead(nn.Layer):
def __init__(self,
num_classes,
fpn_inplanes,
channels,
dropout_prob=0.1,
enable_auxiliary_loss=False,
align_corners=True):
super(UPerNetHead, self).__init__()
self.align_corners = align_corners
self.ppm = layers.PPModule(
in_channels=fpn_inplanes[-1],
out_channels=channels,
bin_sizes=(1, 2, 3, 6),
dim_reduction=True,
align_corners=True)
self.enable_auxiliary_loss = enable_auxiliary_loss
self.lateral_convs = nn.LayerList()
self.fpn_convs = nn.LayerList()
for fpn_inplane in fpn_inplanes[:-1]:
self.lateral_convs.append(
layers.ConvBNReLU(fpn_inplane, channels, 1))
self.fpn_convs.append(
layers.ConvBNReLU(
channels, channels, 3, bias_attr=False))
if self.enable_auxiliary_loss:
self.aux_head = layers.AuxLayer(
fpn_inplanes[2],
fpn_inplanes[2],
num_classes,
dropout_prob=dropout_prob)
self.fpn_bottleneck = layers.ConvBNReLU(
len(fpn_inplanes) * channels, channels, 3, padding=1)
self.conv_last = nn.Sequential(
layers.ConvBNReLU(
len(fpn_inplanes) * channels, channels, 3, bias_attr=False),
nn.Conv2D(
channels, num_classes, kernel_size=1))
self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1)
def forward(self, inputs):
laterals = []
for i, lateral_conv in enumerate(self.lateral_convs):
laterals.append(lateral_conv(inputs[i]))
laterals.append(self.ppm(inputs[-1]))
fpn_levels = len(laterals)
for i in range(fpn_levels - 1, 0, -1):
prev_shape = paddle.shape(laterals[i - 1])
laterals[i - 1] = laterals[i - 1] + F.interpolate(
laterals[i],
size=prev_shape[2:],
mode='bilinear',
align_corners=self.align_corners)
fpn_outs = []
for i in range(fpn_levels - 1):
fpn_outs.append(self.fpn_convs[i](laterals[i]))
fpn_outs.append(laterals[-1])
for i in range(fpn_levels - 1, 0, -1):
fpn_outs[i] = F.interpolate(
fpn_outs[i],
size=paddle.shape(fpn_outs[0])[2:],
mode='bilinear',
align_corners=self.align_corners)
fuse_out = paddle.concat(fpn_outs, axis=1)
x = self.fpn_bottleneck(fuse_out)
x = self.conv_seg(x)
logits_list = [x]
if self.enable_auxiliary_loss:
aux_out = self.aux_head(inputs[2])
logits_list.append(aux_out)
return logits_list
else:
return logits_list

@ -15,7 +15,14 @@
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from scipy.ndimage.morphology import distance_transform_edt
from scipy.ndimage import distance_transform_edt
def rescale_size(img_size, target_size):
scale = min(
max(target_size) / max(img_size), min(target_size) / min(img_size))
rescaled_size = [round(i * scale) for i in img_size]
return rescaled_size, scale
def normalize(im, mean, std):

File diff suppressed because it is too large Load Diff

@ -19,5 +19,4 @@ from .env import seg_env, get_sys_env
from .utils import *
from .timer import TimeAverager, calculate_eta
from . import visualize
from .config_check import config_check
from .ema import EMA

@ -1,59 +0,0 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
def config_check(cfg, train_dataset=None, val_dataset=None):
"""
To check config
Args:
cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config.
train_dataset (paddle.io.Dataset): Used to read and process training datasets.
val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
"""
num_classes_check(cfg, train_dataset, val_dataset)
def num_classes_check(cfg, train_dataset, val_dataset):
""""
Check that the num_classes in model, train_dataset and val_dataset is consistent.
"""
num_classes_set = set()
if train_dataset and hasattr(train_dataset, 'num_classes'):
num_classes_set.add(train_dataset.num_classes)
if val_dataset and hasattr(val_dataset, 'num_classes'):
num_classes_set.add(val_dataset.num_classes)
if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None):
num_classes_set.add(cfg.dic['model'].get('num_classes'))
if (not cfg.train_dataset) and (not cfg.val_dataset):
raise ValueError(
'One of `train_dataset` or `val_dataset should be given, but there are none.'
)
if len(num_classes_set) == 0:
raise ValueError(
'`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
)
elif len(num_classes_set) > 1:
raise ValueError(
'`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
.format(num_classes_set))
else:
num_classes = num_classes_set.pop()
if train_dataset:
train_dataset.num_classes = num_classes
if val_dataset:
val_dataset.num_classes = num_classes

@ -1,4 +1,4 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -20,6 +20,7 @@ import sys
import cv2
import paddle
import paddlers.models.ppseg as ppseg
IS_WINDOWS = sys.platform == 'win32'
@ -57,8 +58,12 @@ def _get_nvcc_info(cuda_home):
if cuda_home is not None and os.path.isdir(cuda_home):
try:
nvcc = os.path.join(cuda_home, 'bin/nvcc')
nvcc = subprocess.check_output(
"{} -V".format(nvcc), shell=True).decode()
if not IS_WINDOWS:
nvcc = subprocess.check_output(
"{} -V".format(nvcc), shell=True).decode()
else:
nvcc = subprocess.check_output(
"\"{}\" -V".format(nvcc), shell=True).decode()
nvcc = nvcc.strip().split('\n')[-1]
except subprocess.SubprocessError:
nvcc = "Not Available"
@ -116,6 +121,7 @@ def get_sys_env():
except:
pass
env_info['PaddleSeg'] = ppseg.__version__
env_info['PaddlePaddle'] = paddle.__version__
env_info['OpenCV'] = cv2.__version__

@ -135,37 +135,6 @@ def mean_iou(intersect_area, pred_area, label_area):
return np.array(class_iou), miou
def fwiou(intersect_area, pred_area, label_area):
"""
Calculate iou.
Args:
intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
pred_area (Tensor): The prediction area on all classes.
label_area (Tensor): The ground truth area on all classes.
Returns:
np.ndarray: iou on all classes.
float: Frequency Weighted iou of all classes.
np.ndarray: Frequency of all classes.
"""
intersect_area = intersect_area.numpy()
pred_area = pred_area.numpy()
label_area = label_area.numpy()
union = pred_area + label_area - intersect_area
class_iou = []
for i in range(len(intersect_area)):
if union[i] == 0:
iou = 0
else:
iou = intersect_area[i] / union[i]
class_iou.append(iou)
fw = label_area / np.sum(label_area)
fwious = np.array(fw) * np.array(class_iou)
fwiou = np.sum(fwious)
return np.array(class_iou), fwiou, fw
def dice(intersect_area, pred_area, label_area):
"""
Calculate DICE.
@ -194,6 +163,7 @@ def dice(intersect_area, pred_area, label_area):
return np.array(class_dice), mdice
# This is a deprecated function, please use class_measurement function.
def accuracy(intersect_area, pred_area):
"""
Calculate accuracy
@ -219,6 +189,38 @@ def accuracy(intersect_area, pred_area):
return np.array(class_acc), macc
def class_measurement(intersect_area, pred_area, label_area):
"""
Calculate accuracy, calss precision and class recall.
Args:
intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
pred_area (Tensor): The prediction area on all classes.
label_area (Tensor): The ground truth area on all classes.
Returns:
float: The mean accuracy.
np.ndarray: The precision of all classes.
np.ndarray: The recall of all classes.
"""
intersect_area = intersect_area.numpy()
pred_area = pred_area.numpy()
label_area = label_area.numpy()
mean_acc = np.sum(intersect_area) / np.sum(pred_area)
class_precision = []
class_recall = []
for i in range(len(intersect_area)):
precision = 0 if pred_area[i] == 0 \
else intersect_area[i] / pred_area[i]
recall = 0 if label_area[i] == 0 \
else intersect_area[i] / label_area[i]
class_precision.append(precision)
class_recall.append(recall)
return mean_acc, np.array(class_precision), np.array(class_recall)
def kappa(intersect_area, pred_area, label_area):
"""
Calculate kappa coefficient
@ -231,9 +233,9 @@ def kappa(intersect_area, pred_area, label_area):
Returns:
float: kappa coefficient.
"""
intersect_area = intersect_area.numpy()
pred_area = pred_area.numpy()
label_area = label_area.numpy()
intersect_area = intersect_area.numpy().astype(np.float64)
pred_area = pred_area.numpy().astype(np.float64)
label_area = label_area.numpy().astype(np.float64)
total_area = np.sum(label_area)
po = np.sum(intersect_area) / total_area
pe = np.sum(pred_area * label_area) / (total_area * total_area)

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -160,6 +160,8 @@ def get_image_list(image_path):
for f in files:
if '.ipynb_checkpoints' in root:
continue
if f.startswith('.'):
continue
if os.path.splitext(f)[-1] in valid_suffix:
image_list.append(os.path.join(root, f))
else:

@ -63,7 +63,7 @@ def get_pseudo_color_map(pred, color_map=None):
pred (numpy.ndarray): the origin predicted image.
color_map (list, optional): the palette color map. Default: None,
use paddleseg's default color map.
Returns:
(numpy.ndarray): the pseduo image.
"""
@ -103,3 +103,41 @@ def get_color_map_list(num_classes, custom_color=None):
if custom_color:
color_map[:len(custom_color)] = custom_color
return color_map
def paste_images(image_list):
"""
Paste all image to a image.
Args:
image_list (List or Tuple): The images to be pasted and their size are the same.
Returns:
result_img (PIL.Image): The pasted image.
"""
assert isinstance(image_list,
(list, tuple)), "image_list should be a list or tuple"
assert len(
image_list) > 1, "The length of image_list should be greater than 1"
pil_img_list = []
for img in image_list:
if isinstance(img, str):
assert os.path.exists(img), "The image is not existed: {}".format(
img)
img = PILImage.open(img)
img = np.array(img)
elif isinstance(img, np.ndarray):
img = PILImage.fromarray(img)
pil_img_list.append(img)
sample_img = pil_img_list[0]
size = sample_img.size
for img in pil_img_list:
assert size == img.size, "The image size in image_list should be the same"
width, height = sample_img.size
result_img = PILImage.new(sample_img.mode,
(width * len(pil_img_list), height))
for i, img in enumerate(pil_img_list):
result_img.paste(img, box=(width * i, 0))
return result_img

@ -43,42 +43,13 @@ class DiceLoss(nn.Layer):
return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true)
class MultiClassDiceLoss(nn.Layer):
def __init__(
self,
weight,
batch=True,
ignore_index=-1,
do_softmax=False,
**kwargs, ):
super(MultiClassDiceLoss, self).__init__()
self.ignore_index = ignore_index
self.weight = weight
self.do_softmax = do_softmax
self.binary_diceloss = DiceLoss(batch)
def forward(self, y_pred, y_true):
if self.do_softmax:
y_pred = paddle.nn.functional.softmax(y_pred, axis=1)
y_true = F.one_hot(y_true.long(), y_pred.shape[1]).permute(0, 3, 1, 2)
total_loss = 0.0
tmp_i = 0.0
for i in range(y_pred.shape[1]):
if i != self.ignore_index:
diceloss = self.binary_diceloss(y_pred[:, i, :, :],
y_true[:, i, :, :])
total_loss += paddle.multiply(diceloss, self.weight[i])
tmp_i += 1.0
return total_loss / tmp_i
class DiceBCELoss(nn.Layer):
"""Binary change detection task loss"""
def __init__(self):
super(DiceBCELoss, self).__init__()
self.bce_loss = nn.BCELoss()
self.binnary_dice = DiceLoss()
self.binary_dice = DiceLoss()
def forward(self, scores, labels, do_sigmoid=True):
if len(scores.shape) > 3:
@ -87,29 +58,11 @@ class DiceBCELoss(nn.Layer):
labels = labels.squeeze(1)
if do_sigmoid:
scores = paddle.nn.functional.sigmoid(scores.clone())
diceloss = self.binnary_dice(scores, labels)
diceloss = self.binary_dice(scores, labels)
bceloss = self.bce_loss(scores, labels)
return diceloss + bceloss
class McDiceBCELoss(nn.Layer):
"""Multi-class change detection task loss"""
def __init__(self, weight, do_sigmoid=True):
super(McDiceBCELoss, self).__init__()
self.ce_loss = nn.CrossEntropyLoss(weight)
self.dice = MultiClassDiceLoss(weight, do_sigmoid)
def forward(self, scores, labels):
if len(scores.shape) < 4:
scores = scores.unsqueeze(1)
if len(labels.shape) < 4:
labels = labels.unsqueeze(1)
diceloss = self.dice(scores, labels)
bceloss = self.ce_loss(scores, labels)
return diceloss + bceloss
def fccdn_ssl_loss(logits_list, labels):
"""
Self-supervised learning loss for change detection.
@ -160,11 +113,11 @@ def fccdn_ssl_loss(logits_list, labels):
# Seg loss
labels_downsample = labels_downsample.astype(paddle.float32)
loss_aux = 0.2 * criterion_ssl(out1, pred_seg_post_tmp1, False)
loss_aux += 0.2 * criterion_ssl(out2, pred_seg_pre_tmp1, False)
loss_aux += 0.2 * criterion_ssl(
out3, labels_downsample - pred_seg_post_tmp2, False)
loss_aux += 0.2 * criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
False)
loss_aux = criterion_ssl(out1, pred_seg_post_tmp1, False)
loss_aux += criterion_ssl(out2, pred_seg_pre_tmp1, False)
loss_aux += criterion_ssl(out3, labels_downsample - pred_seg_post_tmp2,
False)
loss_aux += criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
False)
return loss_aux

@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .condensenet_v2 import CondenseNetV2_a, CondenseNetV2_b, CondenseNetV2_c
from .condensenetv2 import CondenseNetV2_A, CondenseNetV2_B, CondenseNetV2_C

@ -1,442 +1,442 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/AgentMaker/Paddle-Image-Models
Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
Apache License [see LICENSE for details]
"""
import paddle
import paddle.nn as nn
__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"]
class SELayer(nn.Layer):
def __init__(self, inplanes, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.fc = nn.Sequential(
nn.Linear(
inplanes, inplanes // reduction, bias_attr=False),
nn.ReLU(),
nn.Linear(
inplanes // reduction, inplanes, bias_attr=False),
nn.Sigmoid(), )
def forward(self, x):
b, c, _, _ = x.shape
y = self.avg_pool(x).reshape((b, c))
y = self.fc(y).reshape((b, c, 1, 1))
return x * paddle.expand(y, shape=x.shape)
class HS(nn.Layer):
def __init__(self):
super(HS, self).__init__()
self.relu6 = nn.ReLU6()
def forward(self, inputs):
return inputs * self.relu6(inputs + 3) / 6
class Conv(nn.Sequential):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
activation="ReLU",
bn_momentum=0.9, ):
super(Conv, self).__init__()
self.add_sublayer(
"norm", nn.BatchNorm2D(
in_channels, momentum=bn_momentum))
if activation == "ReLU":
self.add_sublayer("activation", nn.ReLU())
elif activation == "HS":
self.add_sublayer("activation", HS())
else:
raise NotImplementedError
self.add_sublayer(
"conv",
nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias_attr=False,
groups=groups, ), )
def ShuffleLayer(x, groups):
batchsize, num_channels, height, width = x.shape
channels_per_group = num_channels // groups
# Reshape
x = x.reshape((batchsize, groups, channels_per_group, height, width))
# Transpose
x = x.transpose((0, 2, 1, 3, 4))
# Reshape
x = x.reshape((batchsize, groups * channels_per_group, height, width))
return x
def ShuffleLayerTrans(x, groups):
batchsize, num_channels, height, width = x.shape
channels_per_group = num_channels // groups
# Reshape
x = x.reshape((batchsize, channels_per_group, groups, height, width))
# Transpose
x = x.transpose((0, 2, 1, 3, 4))
# Reshape
x = x.reshape((batchsize, channels_per_group * groups, height, width))
return x
class CondenseLGC(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
activation="ReLU", ):
super(CondenseLGC, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.groups = groups
self.norm = nn.BatchNorm2D(self.in_channels)
if activation == "ReLU":
self.activation = nn.ReLU()
elif activation == "HS":
self.activation = HS()
else:
raise NotImplementedError
self.conv = nn.Conv2D(
self.in_channels,
self.out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=self.groups,
bias_attr=False, )
self.register_buffer(
"index", paddle.zeros(
(self.in_channels, ), dtype="int64"))
def forward(self, x):
x = paddle.index_select(x, self.index, axis=1)
x = self.norm(x)
x = self.activation(x)
x = self.conv(x)
x = ShuffleLayer(x, self.groups)
return x
class CondenseSFR(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
activation="ReLU", ):
super(CondenseSFR, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.groups = groups
self.norm = nn.BatchNorm2D(self.in_channels)
if activation == "ReLU":
self.activation = nn.ReLU()
elif activation == "HS":
self.activation = HS()
else:
raise NotImplementedError
self.conv = nn.Conv2D(
self.in_channels,
self.out_channels,
kernel_size=kernel_size,
padding=padding,
groups=self.groups,
bias_attr=False,
stride=stride, )
self.register_buffer("index",
paddle.zeros(
(self.out_channels, self.out_channels)))
def forward(self, x):
x = self.norm(x)
x = self.activation(x)
x = ShuffleLayerTrans(x, self.groups)
x = self.conv(x) # SIZE: N, C, H, W
N, C, H, W = x.shape
x = x.reshape((N, C, H * W))
x = x.transpose((0, 2, 1)) # SIZE: N, HW, C
# x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
x = paddle.matmul(x, self.index)
x = x.transpose((0, 2, 1)) # SIZE: N, C, HW
x = x.reshape((N, C, H, W)) # SIZE: N, C, HW
return x
class _SFR_DenseLayer(nn.Layer):
def __init__(
self,
in_channels,
growth_rate,
group_1x1,
group_3x3,
group_trans,
bottleneck,
activation,
use_se=False, ):
super(_SFR_DenseLayer, self).__init__()
self.group_1x1 = group_1x1
self.group_3x3 = group_3x3
self.group_trans = group_trans
self.use_se = use_se
# 1x1 conv i --> b*k
self.conv_1 = CondenseLGC(
in_channels,
bottleneck * growth_rate,
kernel_size=1,
groups=self.group_1x1,
activation=activation, )
# 3x3 conv b*k --> k
self.conv_2 = Conv(
bottleneck * growth_rate,
growth_rate,
kernel_size=3,
padding=1,
groups=self.group_3x3,
activation=activation, )
# 1x1 res conv k(8-16-32)--> i (k*l)
self.sfr = CondenseSFR(
growth_rate,
in_channels,
kernel_size=1,
groups=self.group_trans,
activation=activation, )
if self.use_se:
self.se = SELayer(inplanes=growth_rate, reduction=1)
def forward(self, x):
x_ = x
x = self.conv_1(x)
x = self.conv_2(x)
if self.use_se:
x = self.se(x)
sfr_feature = self.sfr(x)
y = x_ + sfr_feature
return paddle.concat([y, x], 1)
class _SFR_DenseBlock(nn.Sequential):
def __init__(
self,
num_layers,
in_channels,
growth_rate,
group_1x1,
group_3x3,
group_trans,
bottleneck,
activation,
use_se, ):
super(_SFR_DenseBlock, self).__init__()
for i in range(num_layers):
layer = _SFR_DenseLayer(
in_channels + i * growth_rate,
growth_rate,
group_1x1,
group_3x3,
group_trans,
bottleneck,
activation,
use_se, )
self.add_sublayer("denselayer_%d" % (i + 1), layer)
class _Transition(nn.Layer):
def __init__(self):
super(_Transition, self).__init__()
self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
def forward(self, x):
x = self.pool(x)
return x
class CondenseNetV2(nn.Layer):
def __init__(
self,
stages,
growth,
HS_start_block,
SE_start_block,
fc_channel,
group_1x1,
group_3x3,
group_trans,
bottleneck,
last_se_reduction,
in_channels=3,
class_num=1000, ):
super(CondenseNetV2, self).__init__()
self.stages = stages
self.growth = growth
self.in_channels = in_channels
self.class_num = class_num
self.last_se_reduction = last_se_reduction
assert len(self.stages) == len(self.growth)
self.progress = 0.0
self.init_stride = 2
self.pool_size = 7
self.features = nn.Sequential()
# Initial nChannels should be 3
self.num_features = 2 * self.growth[0]
# Dense-block 1 (224x224)
self.features.add_sublayer(
"init_conv",
nn.Conv2D(
in_channels,
self.num_features,
kernel_size=3,
stride=self.init_stride,
padding=1,
bias_attr=False, ), )
for i in range(len(self.stages)):
activation = "HS" if i >= HS_start_block else "ReLU"
use_se = True if i >= SE_start_block else False
# Dense-block i
self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
activation, use_se)
self.fc = nn.Linear(self.num_features, fc_channel)
self.fc_act = HS()
# Classifier layer
if class_num > 0:
self.classifier = nn.Linear(fc_channel, class_num)
self._initialize()
def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
activation, use_se):
# Check if ith is the last one
last = i == len(self.stages) - 1
block = _SFR_DenseBlock(
num_layers=self.stages[i],
in_channels=self.num_features,
growth_rate=self.growth[i],
group_1x1=group_1x1,
group_3x3=group_3x3,
group_trans=group_trans,
bottleneck=bottleneck,
activation=activation,
use_se=use_se, )
self.features.add_sublayer("denseblock_%d" % (i + 1), block)
self.num_features += self.stages[i] * self.growth[i]
if not last:
trans = _Transition()
self.features.add_sublayer("transition_%d" % (i + 1), trans)
else:
self.features.add_sublayer("norm_last",
nn.BatchNorm2D(self.num_features))
self.features.add_sublayer("relu_last", nn.ReLU())
self.features.add_sublayer("pool_last",
nn.AvgPool2D(self.pool_size))
# if useSE:
self.features.add_sublayer(
"se_last",
SELayer(
self.num_features, reduction=self.last_se_reduction))
def forward(self, x):
features = self.features(x)
out = features.reshape((features.shape[0], features.shape[1] *
features.shape[2] * features.shape[3]))
out = self.fc(out)
out = self.fc_act(out)
if self.class_num > 0:
out = self.classifier(out)
return out
def _initialize(self):
# Initialize
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
nn.initializer.KaimingNormal()(m.weight)
elif isinstance(m, nn.BatchNorm2D):
nn.initializer.Constant(value=1.0)(m.weight)
nn.initializer.Constant(value=0.0)(m.bias)
def CondenseNetV2_a(**kwargs):
model = CondenseNetV2(
stages=[1, 1, 4, 6, 8],
growth=[8, 8, 16, 32, 64],
HS_start_block=2,
SE_start_block=3,
fc_channel=828,
group_1x1=8,
group_3x3=8,
group_trans=8,
bottleneck=4,
last_se_reduction=16,
**kwargs)
return model
def CondenseNetV2_b(**kwargs):
model = CondenseNetV2(
stages=[2, 4, 6, 8, 6],
growth=[6, 12, 24, 48, 96],
HS_start_block=2,
SE_start_block=3,
fc_channel=1024,
group_1x1=6,
group_3x3=6,
group_trans=6,
bottleneck=4,
last_se_reduction=16,
**kwargs)
return model
def CondenseNetV2_c(**kwargs):
model = CondenseNetV2(
stages=[4, 6, 8, 10, 8],
growth=[8, 16, 32, 64, 128],
HS_start_block=2,
SE_start_block=3,
fc_channel=1024,
group_1x1=8,
group_3x3=8,
group_trans=8,
bottleneck=4,
last_se_reduction=16,
**kwargs)
return model
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/AgentMaker/Paddle-Image-Models
Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
Apache License [see LICENSE for details]
"""
import paddle
import paddle.nn as nn
__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"]
class SELayer(nn.Layer):
def __init__(self, inplanes, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.fc = nn.Sequential(
nn.Linear(
inplanes, inplanes // reduction, bias_attr=False),
nn.ReLU(),
nn.Linear(
inplanes // reduction, inplanes, bias_attr=False),
nn.Sigmoid(), )
def forward(self, x):
b, c, _, _ = x.shape
y = self.avg_pool(x).reshape((b, c))
y = self.fc(y).reshape((b, c, 1, 1))
return x * paddle.expand(y, shape=x.shape)
class HS(nn.Layer):
def __init__(self):
super(HS, self).__init__()
self.relu6 = nn.ReLU6()
def forward(self, inputs):
return inputs * self.relu6(inputs + 3) / 6
class Conv(nn.Sequential):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
activation="ReLU",
bn_momentum=0.9, ):
super(Conv, self).__init__()
self.add_sublayer(
"norm", nn.BatchNorm2D(
in_channels, momentum=bn_momentum))
if activation == "ReLU":
self.add_sublayer("activation", nn.ReLU())
elif activation == "HS":
self.add_sublayer("activation", HS())
else:
raise NotImplementedError
self.add_sublayer(
"conv",
nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias_attr=False,
groups=groups, ), )
def ShuffleLayer(x, groups):
batchsize, num_channels, height, width = x.shape
channels_per_group = num_channels // groups
# Reshape
x = x.reshape((batchsize, groups, channels_per_group, height, width))
# Transpose
x = x.transpose((0, 2, 1, 3, 4))
# Reshape
x = x.reshape((batchsize, groups * channels_per_group, height, width))
return x
def ShuffleLayerTrans(x, groups):
batchsize, num_channels, height, width = x.shape
channels_per_group = num_channels // groups
# Reshape
x = x.reshape((batchsize, channels_per_group, groups, height, width))
# Transpose
x = x.transpose((0, 2, 1, 3, 4))
# Reshape
x = x.reshape((batchsize, channels_per_group * groups, height, width))
return x
class CondenseLGC(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
activation="ReLU", ):
super(CondenseLGC, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.groups = groups
self.norm = nn.BatchNorm2D(self.in_channels)
if activation == "ReLU":
self.activation = nn.ReLU()
elif activation == "HS":
self.activation = HS()
else:
raise NotImplementedError
self.conv = nn.Conv2D(
self.in_channels,
self.out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=self.groups,
bias_attr=False, )
self.register_buffer(
"index", paddle.zeros(
(self.in_channels, ), dtype="int64"))
def forward(self, x):
x = paddle.index_select(x, self.index, axis=1)
x = self.norm(x)
x = self.activation(x)
x = self.conv(x)
x = ShuffleLayer(x, self.groups)
return x
class CondenseSFR(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
activation="ReLU", ):
super(CondenseSFR, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.groups = groups
self.norm = nn.BatchNorm2D(self.in_channels)
if activation == "ReLU":
self.activation = nn.ReLU()
elif activation == "HS":
self.activation = HS()
else:
raise NotImplementedError
self.conv = nn.Conv2D(
self.in_channels,
self.out_channels,
kernel_size=kernel_size,
padding=padding,
groups=self.groups,
bias_attr=False,
stride=stride, )
self.register_buffer("index",
paddle.zeros(
(self.out_channels, self.out_channels)))
def forward(self, x):
x = self.norm(x)
x = self.activation(x)
x = ShuffleLayerTrans(x, self.groups)
x = self.conv(x) # SIZE: N, C, H, W
N, C, H, W = x.shape
x = x.reshape((N, C, H * W))
x = x.transpose((0, 2, 1)) # SIZE: N, HW, C
# x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
x = paddle.matmul(x, self.index)
x = x.transpose((0, 2, 1)) # SIZE: N, C, HW
x = x.reshape((N, C, H, W)) # SIZE: N, C, HW
return x
class _SFR_DenseLayer(nn.Layer):
def __init__(
self,
in_channels,
growth_rate,
group_1x1,
group_3x3,
group_trans,
bottleneck,
activation,
use_se=False, ):
super(_SFR_DenseLayer, self).__init__()
self.group_1x1 = group_1x1
self.group_3x3 = group_3x3
self.group_trans = group_trans
self.use_se = use_se
# 1x1 conv i --> b*k
self.conv_1 = CondenseLGC(
in_channels,
bottleneck * growth_rate,
kernel_size=1,
groups=self.group_1x1,
activation=activation, )
# 3x3 conv b*k --> k
self.conv_2 = Conv(
bottleneck * growth_rate,
growth_rate,
kernel_size=3,
padding=1,
groups=self.group_3x3,
activation=activation, )
# 1x1 res conv k(8-16-32)--> i (k*l)
self.sfr = CondenseSFR(
growth_rate,
in_channels,
kernel_size=1,
groups=self.group_trans,
activation=activation, )
if self.use_se:
self.se = SELayer(inplanes=growth_rate, reduction=1)
def forward(self, x):
x_ = x
x = self.conv_1(x)
x = self.conv_2(x)
if self.use_se:
x = self.se(x)
sfr_feature = self.sfr(x)
y = x_ + sfr_feature
return paddle.concat([y, x], 1)
class _SFR_DenseBlock(nn.Sequential):
def __init__(
self,
num_layers,
in_channels,
growth_rate,
group_1x1,
group_3x3,
group_trans,
bottleneck,
activation,
use_se, ):
super(_SFR_DenseBlock, self).__init__()
for i in range(num_layers):
layer = _SFR_DenseLayer(
in_channels + i * growth_rate,
growth_rate,
group_1x1,
group_3x3,
group_trans,
bottleneck,
activation,
use_se, )
self.add_sublayer("denselayer_%d" % (i + 1), layer)
class _Transition(nn.Layer):
def __init__(self):
super(_Transition, self).__init__()
self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
def forward(self, x):
x = self.pool(x)
return x
class CondenseNetV2(nn.Layer):
def __init__(
self,
stages,
growth,
HS_start_block,
SE_start_block,
fc_channel,
group_1x1,
group_3x3,
group_trans,
bottleneck,
last_se_reduction,
in_channels=3,
class_num=1000, ):
super(CondenseNetV2, self).__init__()
self.stages = stages
self.growth = growth
self.in_channels = in_channels
self.class_num = class_num
self.last_se_reduction = last_se_reduction
assert len(self.stages) == len(self.growth)
self.progress = 0.0
self.init_stride = 2
self.pool_size = 7
self.features = nn.Sequential()
# Initial nChannels should be 3
self.num_features = 2 * self.growth[0]
# Dense-block 1 (224x224)
self.features.add_sublayer(
"init_conv",
nn.Conv2D(
in_channels,
self.num_features,
kernel_size=3,
stride=self.init_stride,
padding=1,
bias_attr=False, ), )
for i in range(len(self.stages)):
activation = "HS" if i >= HS_start_block else "ReLU"
use_se = True if i >= SE_start_block else False
# Dense-block i
self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
activation, use_se)
self.fc = nn.Linear(self.num_features, fc_channel)
self.fc_act = HS()
# Classifier layer
if class_num > 0:
self.classifier = nn.Linear(fc_channel, class_num)
self._initialize()
def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
activation, use_se):
# Check if ith is the last one
last = i == len(self.stages) - 1
block = _SFR_DenseBlock(
num_layers=self.stages[i],
in_channels=self.num_features,
growth_rate=self.growth[i],
group_1x1=group_1x1,
group_3x3=group_3x3,
group_trans=group_trans,
bottleneck=bottleneck,
activation=activation,
use_se=use_se, )
self.features.add_sublayer("denseblock_%d" % (i + 1), block)
self.num_features += self.stages[i] * self.growth[i]
if not last:
trans = _Transition()
self.features.add_sublayer("transition_%d" % (i + 1), trans)
else:
self.features.add_sublayer("norm_last",
nn.BatchNorm2D(self.num_features))
self.features.add_sublayer("relu_last", nn.ReLU())
self.features.add_sublayer("pool_last",
nn.AvgPool2D(self.pool_size))
# if useSE:
self.features.add_sublayer(
"se_last",
SELayer(
self.num_features, reduction=self.last_se_reduction))
def forward(self, x):
features = self.features(x)
out = features.reshape((features.shape[0], features.shape[1] *
features.shape[2] * features.shape[3]))
out = self.fc(out)
out = self.fc_act(out)
if self.class_num > 0:
out = self.classifier(out)
return out
def _initialize(self):
# Initialize
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
nn.initializer.KaimingNormal()(m.weight)
elif isinstance(m, nn.BatchNorm2D):
nn.initializer.Constant(value=1.0)(m.weight)
nn.initializer.Constant(value=0.0)(m.bias)
def CondenseNetV2_A(**kwargs):
model = CondenseNetV2(
stages=[1, 1, 4, 6, 8],
growth=[8, 8, 16, 32, 64],
HS_start_block=2,
SE_start_block=3,
fc_channel=828,
group_1x1=8,
group_3x3=8,
group_trans=8,
bottleneck=4,
last_se_reduction=16,
**kwargs)
return model
def CondenseNetV2_B(**kwargs):
model = CondenseNetV2(
stages=[2, 4, 6, 8, 6],
growth=[6, 12, 24, 48, 96],
HS_start_block=2,
SE_start_block=3,
fc_channel=1024,
group_1x1=6,
group_3x3=6,
group_trans=6,
bottleneck=4,
last_se_reduction=16,
**kwargs)
return model
def CondenseNetV2_C(**kwargs):
model = CondenseNetV2(
stages=[4, 6, 8, 10, 8],
growth=[8, 16, 32, 64, 128],
HS_start_block=2,
SE_start_block=3,
fc_channel=1024,
group_1x1=8,
group_3x3=8,
group_trans=8,
bottleneck=4,
last_se_reduction=16,
**kwargs)
return model

@ -1067,7 +1067,7 @@ class FCCDN(BaseChangeDetector):
return {
'types':
[seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss],
'coef': [1.0, 1.0]
'coef': [1.0, 0.2]
}
else:
raise ValueError(

@ -34,9 +34,7 @@ from paddlers.utils.checkpoint import cls_pretrain_weights_dict
from paddlers.transforms import Resize, decode_image
from .base import BaseModel
__all__ = [
"ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
]
__all__ = ["ResNet50_vd", "MobileNetV3", "HRNet", "CondenseNetV2"]
class BaseClassifier(BaseModel):
@ -600,13 +598,13 @@ class ResNet50_vd(BaseClassifier):
**params)
class MobileNetV3_small_x1_0(BaseClassifier):
class MobileNetV3(BaseClassifier):
def __init__(self,
num_classes=2,
use_mixed_loss=False,
losses=None,
**params):
super(MobileNetV3_small_x1_0, self).__init__(
super(MobileNetV3, self).__init__(
model_name='MobileNetV3_small_x1_0',
num_classes=num_classes,
use_mixed_loss=use_mixed_loss,
@ -614,13 +612,13 @@ class MobileNetV3_small_x1_0(BaseClassifier):
**params)
class HRNet_W18_C(BaseClassifier):
class HRNet(BaseClassifier):
def __init__(self,
num_classes=2,
use_mixed_loss=False,
losses=None,
**params):
super(HRNet_W18_C, self).__init__(
super(HRNet, self).__init__(
model_name='HRNet_W18_C',
num_classes=num_classes,
use_mixed_loss=use_mixed_loss,
@ -628,15 +626,21 @@ class HRNet_W18_C(BaseClassifier):
**params)
class CondenseNetV2_b(BaseClassifier):
class CondenseNetV2(BaseClassifier):
def __init__(self,
num_classes=2,
use_mixed_loss=False,
losses=None,
in_channels=3,
arch='A',
**params):
super(CondenseNetV2_b, self).__init__(
model_name='CondenseNetV2_b',
if arch not in ('A', 'B', 'C'):
raise ValueError("{} is not a supported architecture.".format(arch))
model_name = 'CondenseNetV2_' + arch
super(CondenseNetV2, self).__init__(
model_name=model_name,
num_classes=num_classes,
use_mixed_loss=use_mixed_loss,
losses=losses,
in_channels=in_channels,
**params)

@ -773,7 +773,7 @@ class LESRCNN(BaseRestorer):
group=1,
**params):
params.update({
'scale': sr_factor,
'scale': sr_factor if sr_factor is not None else 1,
'multi_scale': multi_scale,
'group': group
})

@ -185,14 +185,7 @@ class BaseSegmenter(BaseModel):
)
losses = [getattr(seg_losses, loss)() for loss in losses]
loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))]
if self.model_name == 'FastSCNN':
loss_type *= 2
loss_coef = [1.0, 0.4]
elif self.model_name == 'BiSeNetV2':
loss_type *= 5
loss_coef = [1.0] * 5
else:
loss_coef = [1.0]
loss_coef = [1.0]
losses = {'types': loss_type, 'coef': loss_coef}
return losses
@ -761,7 +754,7 @@ class UNet(BaseSegmenter):
})
super(UNet, self).__init__(
model_name='UNet',
input_channel=in_channels,
in_channels=in_channels,
num_classes=num_classes,
use_mixed_loss=use_mixed_loss,
losses=losses,
@ -789,7 +782,7 @@ class DeepLabV3P(BaseSegmenter):
if params.get('with_net', True):
with DisablePrint():
backbone = getattr(ppseg.models, backbone)(
input_channel=in_channels, output_stride=output_stride)
in_channels=in_channels, output_stride=output_stride)
else:
backbone = None
params.update({
@ -809,6 +802,7 @@ class DeepLabV3P(BaseSegmenter):
class FastSCNN(BaseSegmenter):
def __init__(self,
in_channels=3,
num_classes=2,
use_mixed_loss=False,
losses=None,
@ -817,14 +811,22 @@ class FastSCNN(BaseSegmenter):
params.update({'align_corners': align_corners})
super(FastSCNN, self).__init__(
model_name='FastSCNN',
in_channels=in_channels,
num_classes=num_classes,
use_mixed_loss=use_mixed_loss,
losses=losses,
**params)
def default_loss(self):
losses = super(FastSCNN, self).default_loss()
losses['types'] *= 2
losses['coef'] = [1.0, 0.4]
return losses
class HRNet(BaseSegmenter):
def __init__(self,
in_channels=3,
num_classes=2,
width=48,
use_mixed_loss=False,
@ -839,7 +841,7 @@ class HRNet(BaseSegmenter):
if params.get('with_net', True):
with DisablePrint():
backbone = getattr(ppseg.models, self.backbone_name)(
align_corners=align_corners)
in_channels=in_channels, align_corners=align_corners)
else:
backbone = None
@ -855,6 +857,7 @@ class HRNet(BaseSegmenter):
class BiSeNetV2(BaseSegmenter):
def __init__(self,
in_channels=3,
num_classes=2,
use_mixed_loss=False,
losses=None,
@ -863,11 +866,18 @@ class BiSeNetV2(BaseSegmenter):
params.update({'align_corners': align_corners})
super(BiSeNetV2, self).__init__(
model_name='BiSeNetV2',
in_channels=in_channels,
num_classes=num_classes,
use_mixed_loss=use_mixed_loss,
losses=losses,
**params)
def default_loss(self):
losses = super(BiSeNetV2, self).default_loss()
losses['types'] *= 5
losses['coef'] = [1.0] * 5
return losses
class FarSeg(BaseSegmenter):
def __init__(self,

@ -493,11 +493,12 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
num_params_loaded = 0
for k in model_state_dict:
if k not in param_state_dict:
logging.warning("{} is not in pretrained model".format(k))
logging.warning("{} is not in the pretrained model.".format(
k))
elif list(param_state_dict[k].shape) != list(model_state_dict[k]
.shape):
logging.warning(
"[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
"[SKIP] Shape of parameters {} do not match. (pretrained: {} vs actual: {})"
.format(k, param_state_dict[k].shape, model_state_dict[
k].shape))
else:
@ -507,11 +508,11 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
logging.info("There are {}/{} variables loaded into {}.".format(
num_params_loaded, len(model_state_dict), model_name))
else:
raise ValueError('The pretrained model directory is not Found: {}'.
raise ValueError('The pretrained model directory is not found: {}'.
format(pretrain_weights))
else:
logging.info(
'No pretrained model to load, {} will be trained from scratch.'.
'No pretrained model to load. {} will be trained from scratch.'.
format(model_name))

@ -32,6 +32,7 @@
| 变化检测 | FC-Siam-conc | 支持 | - | - | - |
| 变化检测 | FC-Siam-diff | 支持 | - | - | - |
| 变化检测 | ChangeFormer | 支持 | - | - | - |
| 场景分类 | CondenseNet V2 | 支持 | - | - | - |
| 场景分类 | HRNet | 支持 | - | - | - |
| 场景分类 | MobileNetV3 | 支持 | - | - | - |
| 场景分类 | ResNet50-vd | 支持 | - | - | - |
@ -43,8 +44,11 @@
| 目标检测 | PP-YOLO Tiny | 支持 | - | - | - |
| 目标检测 | PP-YOLOv2 | 支持 | - | - | - |
| 目标检测 | YOLOv3 | 支持 | - | - | - |
| 图像分割 | BiSeNet V2 | 支持 | - | - | - |
| 图像分割 | DeepLab V3+ | 支持 | - | - | - |
| 图像分割 | FarSeg | 支持 | - | - | - |
| 图像分割 | Fast-SCNN | 支持 | - | - | - |
| 图像分割 | HRNet | 支持 | - | - | - |
| 图像分割 | UNet | 支持 | - | - | - |
## 3 测试工具简介

@ -119,6 +119,7 @@ def parse_args(*args, **kwargs):
# Global settings
parser.add_argument('cmd', choices=['train', 'eval'])
parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
parser.add_argument('--seed', type=int, default=None)
# Data
parser.add_argument('--datasets', type=dict, default={})

@ -1,5 +1,7 @@
# Basic configurations of AirChange dataset
seed: 1024
datasets:
train: !Node
type: CDDataset

@ -1,5 +1,7 @@
# Basic configurations of LEVIR-CD dataset
seed: 1024
datasets:
train: !Node
type: CDDataset

@ -1,8 +0,0 @@
# Basic configurations of BIT
_base_: ../_base_/airchange.yaml
save_dir: ./test_tipc/output/cd/bit/
model: !Node
type: BIT

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save