Update ppseg

own
Bobholamovic 3 years ago
parent 19cf02c1c2
commit 5834df2fad
  1. 2
      paddlers/models/ppseg/__init__.py
  2. 107
      paddlers/models/ppseg/core/infer.py
  3. 36
      paddlers/models/ppseg/core/predict.py
  4. 78
      paddlers/models/ppseg/core/train.py
  5. 106
      paddlers/models/ppseg/core/val.py
  6. 4
      paddlers/models/ppseg/cvlibs/callbacks.py
  7. 211
      paddlers/models/ppseg/cvlibs/config.py
  8. 4
      paddlers/models/ppseg/cvlibs/manager.py
  9. 34
      paddlers/models/ppseg/cvlibs/param_init.py
  10. 1
      paddlers/models/ppseg/datasets/__init__.py
  11. 36
      paddlers/models/ppseg/datasets/ade.py
  12. 10
      paddlers/models/ppseg/datasets/chase_db1.py
  13. 6
      paddlers/models/ppseg/datasets/cityscapes.py
  14. 6
      paddlers/models/ppseg/datasets/cocostuff.py
  15. 62
      paddlers/models/ppseg/datasets/dataset.py
  16. 10
      paddlers/models/ppseg/datasets/drive.py
  17. 12
      paddlers/models/ppseg/datasets/eg1800.py
  18. 10
      paddlers/models/ppseg/datasets/hrf.py
  19. 8
      paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py
  20. 8
      paddlers/models/ppseg/datasets/optic_disc_seg.py
  21. 6
      paddlers/models/ppseg/datasets/pascal_context.py
  22. 4
      paddlers/models/ppseg/datasets/pp_humanseg14k.py
  23. 135
      paddlers/models/ppseg/datasets/pssl.py
  24. 10
      paddlers/models/ppseg/datasets/stare.py
  25. 12
      paddlers/models/ppseg/datasets/supervisely.py
  26. 10
      paddlers/models/ppseg/datasets/voc.py
  27. 9
      paddlers/models/ppseg/models/__init__.py
  28. 6
      paddlers/models/ppseg/models/ann.py
  29. 12
      paddlers/models/ppseg/models/attention_unet.py
  30. 4
      paddlers/models/ppseg/models/backbones/__init__.py
  31. 318
      paddlers/models/ppseg/models/backbones/ghostnet.py
  32. 10
      paddlers/models/ppseg/models/backbones/hrnet.py
  33. 974
      paddlers/models/ppseg/models/backbones/lite_hrnet.py
  34. 12
      paddlers/models/ppseg/models/backbones/mix_transformer.py
  35. 333
      paddlers/models/ppseg/models/backbones/mobilenetv2.py
  36. 500
      paddlers/models/ppseg/models/backbones/mobilenetv3.py
  37. 13
      paddlers/models/ppseg/models/backbones/resnet_vd.py
  38. 315
      paddlers/models/ppseg/models/backbones/shufflenetv2.py
  39. 186
      paddlers/models/ppseg/models/backbones/stdcnet.py
  40. 14
      paddlers/models/ppseg/models/backbones/swin_transformer.py
  41. 716
      paddlers/models/ppseg/models/backbones/top_transformer.py
  42. 4
      paddlers/models/ppseg/models/backbones/transformer_utils.py
  43. 12
      paddlers/models/ppseg/models/backbones/vision_transformer.py
  44. 17
      paddlers/models/ppseg/models/backbones/xception_deeplab.py
  45. 24
      paddlers/models/ppseg/models/bisenet.py
  46. 6
      paddlers/models/ppseg/models/bisenetv1.py
  47. 174
      paddlers/models/ppseg/models/ccnet.py
  48. 6
      paddlers/models/ppseg/models/danet.py
  49. 403
      paddlers/models/ppseg/models/ddrnet.py
  50. 10
      paddlers/models/ppseg/models/decoupled_segnet.py
  51. 6
      paddlers/models/ppseg/models/deeplab.py
  52. 6
      paddlers/models/ppseg/models/dmnet.py
  53. 6
      paddlers/models/ppseg/models/dnlnet.py
  54. 10
      paddlers/models/ppseg/models/emanet.py
  55. 6
      paddlers/models/ppseg/models/encnet.py
  56. 10
      paddlers/models/ppseg/models/enet.py
  57. 6
      paddlers/models/ppseg/models/espnet.py
  58. 6
      paddlers/models/ppseg/models/espnetv1.py
  59. 22
      paddlers/models/ppseg/models/fast_scnn.py
  60. 6
      paddlers/models/ppseg/models/fastfcn.py
  61. 6
      paddlers/models/ppseg/models/fcn.py
  62. 6
      paddlers/models/ppseg/models/gcnet.py
  63. 8
      paddlers/models/ppseg/models/ginet.py
  64. 198
      paddlers/models/ppseg/models/glore.py
  65. 10
      paddlers/models/ppseg/models/gscnn.py
  66. 10
      paddlers/models/ppseg/models/hardnet.py
  67. 6
      paddlers/models/ppseg/models/hrnet_contrast.py
  68. 6
      paddlers/models/ppseg/models/isanet.py
  69. 3
      paddlers/models/ppseg/models/layers/__init__.py
  70. 2
      paddlers/models/ppseg/models/layers/activation.py
  71. 128
      paddlers/models/ppseg/models/layers/attention.py
  72. 59
      paddlers/models/ppseg/models/layers/layer_libs.py
  73. 2
      paddlers/models/ppseg/models/layers/nonlocal2d.py
  74. 2
      paddlers/models/ppseg/models/layers/pyramid_pool.py
  75. 285
      paddlers/models/ppseg/models/layers/tensor_fusion.py
  76. 133
      paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
  77. 4
      paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
  78. 2
      paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py
  79. 6
      paddlers/models/ppseg/models/losses/cross_entropy_loss.py
  80. 4
      paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
  81. 4
      paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
  82. 67
      paddlers/models/ppseg/models/losses/dice_loss.py
  83. 4
      paddlers/models/ppseg/models/losses/edge_attention_loss.py
  84. 124
      paddlers/models/ppseg/models/losses/focal_loss.py
  85. 2
      paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py
  86. 2
      paddlers/models/ppseg/models/losses/kl_loss.py
  87. 24
      paddlers/models/ppseg/models/losses/l1_loss.py
  88. 18
      paddlers/models/ppseg/models/losses/lovasz_loss.py
  89. 2
      paddlers/models/ppseg/models/losses/mean_square_error_loss.py
  90. 2
      paddlers/models/ppseg/models/losses/mixed_loss.py
  91. 4
      paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
  92. 4
      paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py
  93. 7
      paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
  94. 2
      paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py
  95. 2
      paddlers/models/ppseg/models/losses/rmi_loss.py
  96. 14
      paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
  97. 2
      paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py
  98. 162
      paddlers/models/ppseg/models/lraspp.py
  99. 8
      paddlers/models/ppseg/models/mla_transformer.py
  100. 289
      paddlers/models/ppseg/models/mobileseg.py
  101. Some files were not shown because too many files have changed in this diff Show More

@ -1,4 +1,4 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

@ -21,88 +21,16 @@ import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['ResizeByLong']:
reverse_list.append(('resize', (h, w)))
long_edge = max(h, w)
short_edge = min(h, w)
short_edge = int(round(short_edge * op.long_size / long_edge))
long_edge = op.long_size
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
if op.__class__.__name__ in ['ResizeByShort']:
reverse_list.append(('resize', (h, w)))
long_edge = max(h, w)
short_edge = min(h, w)
long_edge = int(round(long_edge * op.short_size / short_edge))
short_edge = op.short_size
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
if op.__class__.__name__ in ['Pad']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['PadByAspectRatio']:
reverse_list.append(('padding', (h, w)))
ratio = w / h
if ratio == op.aspect_ratio:
pass
elif ratio > op.aspect_ratio:
h = int(w / op.aspect_ratio)
else:
w = int(h * op.aspect_ratio)
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
def reverse_transform(pred, trans_info, mode='nearest'):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
dtype = pred.dtype
for item in reverse_list[::-1]:
if item[0] == 'resize':
for item in trans_info[::-1]:
if isinstance(item[0], list):
trans_mode = item[0][0]
else:
trans_mode = item[0]
if trans_mode == 'resize':
h, w = item[1][0], item[1][1]
if paddle.get_device() == 'cpu' and dtype in intTypeList:
pred = paddle.cast(pred, 'float32')
@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
pred = paddle.cast(pred, dtype)
else:
pred = F.interpolate(pred, (h, w), mode=mode)
elif item[0] == 'padding':
elif trans_mode == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):
def inference(model,
im,
ori_shape=None,
transforms=None,
trans_info=None,
is_slide=False,
stride=None,
crop_size=None):
@ -216,8 +143,7 @@ def inference(model,
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
trans_info (list): Image shape informating changed process. Default: None.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
@ -239,8 +165,8 @@ def inference(model,
logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
if hasattr(model, 'data_format') and model.data_format == 'NHWC':
logit = logit.transpose((0, 3, 1, 2))
if ori_shape is not None:
logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
if trans_info is not None:
logit = reverse_transform(logit, trans_info, mode='bilinear')
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
return pred, logit
else:
@ -249,8 +175,7 @@ def inference(model,
def aug_inference(model,
im,
ori_shape,
transforms,
trans_info,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
@ -263,8 +188,7 @@ def aug_inference(model,
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
trans_info (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
@ -302,8 +226,7 @@ def aug_inference(model,
logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
final_logit = reverse_transform(
final_logit, ori_shape, transforms, mode='bilinear')
final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
return pred, final_logit

@ -19,9 +19,9 @@ import cv2
import numpy as np
import paddle
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.core import infer
from paddlers.models.ppseg.utils import logger, progbar, visualize
from paddleseg import utils
from paddleseg.core import infer
from paddleseg.utils import logger, progbar, visualize
def mkdir(path):
@ -36,6 +36,15 @@ def partition_list(arr, m):
return [arr[i:i + n] for i in range(0, len(arr), n)]
def preprocess(im_path, transforms):
data = {}
data['img'] = im_path
data = transforms(data)
data['img'] = data['img'][np.newaxis, ...]
data['img'] = paddle.to_tensor(data['img'])
return data
def predict(model,
model_path,
transforms,
@ -89,18 +98,13 @@ def predict(model,
color_map = visualize.get_color_map_list(256, custom_color=custom_color)
with paddle.no_grad():
for i, im_path in enumerate(img_lists[local_rank]):
im = cv2.imread(im_path)
ori_shape = im.shape[:2]
im, _ = transforms(im)
im = im[np.newaxis, ...]
im = paddle.to_tensor(im)
data = preprocess(im_path, transforms)
if aug_pred:
pred, _ = infer.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
data['img'],
trans_info=data['trans_info'],
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
@ -110,9 +114,8 @@ def predict(model,
else:
pred, _ = infer.inference(
model,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
data['img'],
trans_info=data['trans_info'],
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
@ -141,9 +144,4 @@ def predict(model,
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
# pred_im = utils.visualize(im_path, pred, weight=0.0)
# pred_saved_path = os.path.join(pred_saved_dir, im_file)
# mkdir(pred_saved_path)
# cv2.imwrite(pred_saved_path, pred_im)
progbar_pred.update(i + 1)

@ -20,10 +20,9 @@ import shutil
import paddle
import paddle.nn.functional as F
from paddlers.models.ppseg.utils import (TimeAverager, calculate_eta, resume,
logger, worker_init_fn, train_profiler,
op_flops_funs)
from paddlers.models.ppseg.core.val import evaluate
from paddleseg.utils import (TimeAverager, calculate_eta, resume, logger,
worker_init_fn, train_profiler, op_flops_funs)
from paddleseg.core.val import evaluate
def check_logits_losses(logits_list, losses):
@ -35,17 +34,15 @@ def check_logits_losses(logits_list, losses):
.format(len_logits, len_losses))
def loss_computation(logits_list, labels, losses, edges=None):
def loss_computation(logits_list, labels, edges, losses):
check_logits_losses(logits_list, losses)
loss_list = []
for i in range(len(logits_list)):
logits = logits_list[i]
loss_i = losses['types'][i]
coef_i = losses['coef'][i]
if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
) and loss_i.edge_label:
# If use edges as labels According to loss type.
if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
# Use edges as labels According to loss type.
loss_list.append(coef_i * loss_i(logits, edges))
elif loss_i.__class__.__name__ == 'MixedLoss':
mixed_loss_list = loss_i(logits, labels)
@ -75,13 +72,14 @@ def train(model,
keep_checkpoint_max=5,
test_config=None,
precision='fp32',
amp_level='O1',
profiler_options=None,
to_static_training=False):
"""
Launch training.
Args:
modelnn.Layer): A sementic segmentation model.
modelnn.Layer): A semantic segmentation model.
train_dataset (paddle.io.Dataset): Used to read and process training datasets.
val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
optimizer (paddle.optimizer.Optimizer): The optimizer.
@ -98,6 +96,9 @@ def train(model,
keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
test_config(dict, optional): Evaluation config.
precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
amp_level (str, optional): Auto mixed precision level. Accepted values are O1 and O2: O1 represent mixed precision,
the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators
parameters and input data will be casted to fp16, except operators in black_list, dont support fp16 kernel and batchnorm. Default is O1(amp)
profiler_options (str, optional): The option of train profiler.
to_static_training (bool, optional): Whether to use @to_static for training.
"""
@ -112,7 +113,18 @@ def train(model,
if not os.path.isdir(save_dir):
if os.path.exists(save_dir):
os.remove(save_dir)
os.makedirs(save_dir)
os.makedirs(save_dir, exist_ok=True)
# use amp
if precision == 'fp16':
logger.info('use AMP to train. AMP level = {}'.format(amp_level))
scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
if amp_level == 'O2':
model, optimizer = paddle.amp.decorate(
models=model,
optimizers=optimizer,
level='O2',
save_dtype='float32')
if nranks > 1:
paddle.distributed.fleet.init(is_collective=True)
@ -130,18 +142,13 @@ def train(model,
return_list=True,
worker_init_fn=worker_init_fn, )
# use amp
if precision == 'fp16':
logger.info('use amp to train')
scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
if use_vdl:
from visualdl import LogWriter
log_writer = LogWriter(save_dir)
if to_static_training:
model = paddle.jit.to_static(model)
logger.info("Successfully to apply @to_static")
logger.info("Successfully applied @to_static")
avg_loss = 0.0
avg_loss_list = []
@ -164,30 +171,29 @@ def train(model,
else:
break
reader_cost_averager.record(time.time() - batch_start)
images = data[0]
labels = data[1].astype('int64')
images = data['img']
labels = data['label'].astype('int64')
edges = None
if len(data) == 3:
edges = data[2].astype('int64')
if 'edge' in data.keys():
edges = data['edge'].astype('int64')
if hasattr(model, 'data_format') and model.data_format == 'NHWC':
images = images.transpose((0, 2, 3, 1))
if precision == 'fp16':
with paddle.amp.auto_cast(
level=amp_level,
enable=True,
custom_white_list={
"elementwise_add", "batch_norm", "sync_batch_norm"
},
custom_black_list={'bilinear_interp_v2'}):
if nranks > 1:
logits_list = ddp_model(images)
else:
logits_list = model(images)
logits_list = ddp_model(images) if nranks > 1 else model(
images)
loss_list = loss_computation(
logits_list=logits_list,
labels=labels,
losses=losses,
edges=edges)
edges=edges,
losses=losses)
loss = sum(loss_list)
scaled = scaler.scale(loss) # scale the loss
@ -197,15 +203,12 @@ def train(model,
else:
scaler.minimize(optimizer, scaled) # update parameters
else:
if nranks > 1:
logits_list = ddp_model(images)
else:
logits_list = model(images)
logits_list = ddp_model(images) if nranks > 1 else model(images)
loss_list = loss_computation(
logits_list=logits_list,
labels=labels,
losses=losses,
edges=edges)
edges=edges,
losses=losses)
loss = sum(loss_list)
loss.backward()
# if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
@ -278,7 +281,12 @@ def train(model,
test_config = {}
mean_iou, acc, _, _, _ = evaluate(
model, val_dataset, num_workers=num_workers, **test_config)
model,
val_dataset,
num_workers=num_workers,
precision=precision,
amp_level=amp_level,
**test_config)
model.train()
@ -314,7 +322,7 @@ def train(model,
batch_start = time.time()
# Calculate flops.
if local_rank == 0:
if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
_, c, h, w = images.shape
_ = paddle.flops(
model, [1, c, h, w],

@ -19,8 +19,8 @@ import time
import paddle
import paddle.nn.functional as F
from paddlers.models.ppseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
from paddlers.models.ppseg.core import infer
from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
from paddleseg.core import infer
np.set_printoptions(suppress=True)
@ -34,6 +34,8 @@ def evaluate(model,
is_slide=False,
stride=None,
crop_size=None,
precision='fp32',
amp_level='O1',
num_workers=0,
print_detail=True,
auc_roc=False):
@ -41,7 +43,7 @@ def evaluate(model,
Launch evalution.
Args:
modelnn.Layer): A sementic segmentation model.
modelnn.Layer): A semantic segmentation model.
eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
@ -52,6 +54,8 @@ def evaluate(model,
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
amp_level (str, optional): Auto mixed precision level. Accepted values are O1 and O2: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, dont support fp16 kernel and batchnorm. Default is O1(amp)
num_workers (int, optional): Num workers for data loader. Default: 0.
print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
auc_roc(bool, optional): whether add auc_roc metric
@ -93,32 +97,66 @@ def evaluate(model,
batch_cost_averager = TimeAverager()
batch_start = time.time()
with paddle.no_grad():
for iter, (im, label) in enumerate(loader):
for iter, data in enumerate(loader):
reader_cost_averager.record(time.time() - batch_start)
label = label.astype('int64')
label = data['label'].astype('int64')
ori_shape = label.shape[-2:]
if aug_eval:
pred, logits = infer.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
if precision == 'fp16':
with paddle.amp.auto_cast(
level=amp_level,
enable=True,
custom_white_list={
"elementwise_add", "batch_norm",
"sync_batch_norm"
},
custom_black_list={'bilinear_interp_v2'}):
pred, logits = infer.aug_inference(
model,
data['img'],
trans_info=data['trans_info'],
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred, logits = infer.aug_inference(
model,
data['img'],
trans_info=data['trans_info'],
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred, logits = infer.inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
if precision == 'fp16':
with paddle.amp.auto_cast(
level=amp_level,
enable=True,
custom_white_list={
"elementwise_add", "batch_norm",
"sync_batch_norm"
},
custom_black_list={'bilinear_interp_v2'}):
pred, logits = infer.inference(
model,
data['img'],
trans_info=data['trans_info'],
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred, logits = infer.inference(
model,
data['img'],
trans_info=data['trans_info'],
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
intersect_area, pred_area, label_area = metrics.calculate_area(
pred,
@ -175,12 +213,12 @@ def evaluate(model,
batch_cost_averager.reset()
batch_start = time.time()
class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
label_area_all)
class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
label_area_all)
metrics_input = (intersect_area_all, pred_area_all, label_area_all)
class_iou, miou = metrics.mean_iou(*metrics_input)
acc, class_precision, class_recall = metrics.class_measurement(
*metrics_input)
kappa = metrics.kappa(*metrics_input)
class_dice, mdice = metrics.dice(*metrics_input)
if auc_roc:
auc_roc = metrics.auc_roc(
@ -193,5 +231,7 @@ def evaluate(model,
infor = infor + auc_infor if auc_roc else infor
logger.info(infor)
logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
return miou, acc, class_iou, class_acc, kappa
logger.info("[EVAL] Class Precision: \n" + str(
np.round(class_precision, 4)))
logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
return miou, acc, class_iou, class_precision, kappa

@ -19,8 +19,8 @@ import numpy as np
import paddle
from paddle.distributed.parallel import ParallelEnv
from visualdl import LogWriter
from paddlers.models.ppseg.utils.progbar import Progbar
import paddlers.models.ppseg.utils.logger as logger
from paddleseg.utils.progbar import Progbar
import paddleseg.utils.logger as logger
class CallbackList(object):

@ -15,12 +15,15 @@
import codecs
import os
from typing import Any, Dict, Generic
import warnings
from ast import literal_eval
import paddle
import yaml
import six
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import logger
from paddleseg.cvlibs import manager
from paddleseg.utils import logger
class Config(object):
@ -51,7 +54,7 @@ class Config(object):
Examples:
from paddlers.models.ppseg.cvlibs.config import Config
from paddleseg.cvlibs.config import Config
# Create a cfg object with yaml file path.
cfg = Config(yaml_cfg_path)
@ -69,7 +72,8 @@ class Config(object):
path: str,
learning_rate: float=None,
batch_size: int=None,
iters: int=None):
iters: int=None,
opts: list=None):
if not path:
raise ValueError('Please specify the configuration file path.')
@ -84,7 +88,18 @@ class Config(object):
raise RuntimeError('Config file should in yaml format!')
self.update(
learning_rate=learning_rate, batch_size=batch_size, iters=iters)
learning_rate=learning_rate,
batch_size=batch_size,
iters=iters,
opts=opts)
model_cfg = self.dic.get('model', None)
if model_cfg is None:
raise RuntimeError('No model specified in the configuration file.')
if (not self.train_dataset_config) and (not self.val_dataset_config):
raise ValueError(
'One of `train_dataset` or `val_dataset should be given, but there are none.'
)
def _update_dic(self, dic, base_dic):
"""
@ -121,7 +136,8 @@ class Config(object):
def update(self,
learning_rate: float=None,
batch_size: int=None,
iters: int=None):
iters: int=None,
opts: list=None):
'''Update config'''
if learning_rate:
if 'lr_scheduler' in self.dic:
@ -135,6 +151,27 @@ class Config(object):
if iters:
self.dic['iters'] = iters
# fix parameters by --opts of command
if opts is not None:
if len(opts) % 2 != 0 or len(opts) == 0:
raise ValueError(
"Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
format(opts))
for key, value in zip(opts[0::2], opts[1::2]):
if isinstance(value, six.string_types):
try:
value = literal_eval(value)
except ValueError:
pass
except SyntaxError:
pass
key_list = key.split('.')
dic = self.dic
for subkey in key_list[:-1]:
dic.setdefault(subkey, dict())
dic = dic[subkey]
dic[key_list[-1]] = value
@property
def batch_size(self) -> int:
return self.dic.get('batch_size', 1)
@ -153,13 +190,32 @@ class Config(object):
'No `lr_scheduler` specified in the configuration file.')
params = self.dic.get('lr_scheduler')
use_warmup = False
if 'warmup_iters' in params:
use_warmup = True
warmup_iters = params.pop('warmup_iters')
assert 'warmup_start_lr' in params, \
"When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
warmup_start_lr = params.pop('warmup_start_lr')
end_lr = params['learning_rate']
lr_type = params.pop('type')
if lr_type == 'PolynomialDecay':
params.setdefault('decay_steps', self.iters)
iters = self.iters - warmup_iters if use_warmup else self.iters
iters = max(iters, 1)
params.setdefault('decay_steps', iters)
params.setdefault('end_lr', 0)
params.setdefault('power', 0.9)
lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
if use_warmup:
lr_sche = paddle.optimizer.lr.LinearWarmup(
learning_rate=lr_sche,
warmup_steps=warmup_iters,
start_lr=warmup_start_lr,
end_lr=end_lr)
return getattr(paddle.optimizer.lr, lr_type)(**params)
return lr_sche
@property
def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
@ -202,15 +258,33 @@ class Config(object):
args = self.optimizer_args
optimizer_type = args.pop('type')
params = self.model.parameters()
if 'backbone_lr_mult' in args:
if not hasattr(self.model, 'backbone'):
logger.warning('The backbone_lr_mult is not effective because'
' the model does not have backbone')
else:
backbone_lr_mult = args.pop('backbone_lr_mult')
backbone_params = self.model.backbone.parameters()
backbone_params_id = [id(x) for x in backbone_params]
other_params = [
x for x in params if id(x) not in backbone_params_id
]
params = [{
'params': backbone_params,
'learning_rate': backbone_lr_mult
}, {
'params': other_params
}]
if optimizer_type == 'sgd':
return paddle.optimizer.Momentum(
lr, parameters=self.model.parameters(), **args)
return paddle.optimizer.Momentum(lr, parameters=params, **args)
elif optimizer_type == 'adam':
return paddle.optimizer.Adam(
lr, parameters=self.model.parameters(), **args)
return paddle.optimizer.Adam(lr, parameters=params, **args)
elif optimizer_type in paddle.optimizer.__all__:
return getattr(paddle.optimizer, optimizer_type)(
lr, parameters=self.model.parameters(), **args)
return getattr(paddle.optimizer, optimizer_type)(lr,
parameters=params,
**args)
raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
@ -295,24 +369,6 @@ class Config(object):
@property
def model(self) -> paddle.nn.Layer:
model_cfg = self.dic.get('model').copy()
if not model_cfg:
raise RuntimeError('No model specified in the configuration file.')
if not 'num_classes' in model_cfg:
num_classes = None
if self.train_dataset_config:
if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
num_classes = self.train_dataset_class.NUM_CLASSES
elif hasattr(self.train_dataset, 'num_classes'):
num_classes = self.train_dataset.num_classes
elif self.val_dataset_config:
if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
num_classes = self.val_dataset_class.NUM_CLASSES
elif hasattr(self.val_dataset, 'num_classes'):
num_classes = self.val_dataset.num_classes
if num_classes is not None:
model_cfg['num_classes'] = num_classes
if not self._model:
self._model = self._load_object(model_cfg)
return self._model
@ -401,3 +457,94 @@ class Config(object):
def __str__(self) -> str:
return yaml.dump(self.dic)
@property
def val_transforms(self) -> list:
"""Get val_transform from val_dataset"""
_val_dataset = self.val_dataset_config
if not _val_dataset:
return []
_transforms = _val_dataset.get('transforms', [])
transforms = []
for i in _transforms:
transforms.append(self._load_object(i))
return transforms
def check_sync_info(self) -> None:
"""
Check and sync the info, such as num_classes and img_channels,
between the config of model, train_dataset and val_dataset.
"""
self._check_sync_num_classes()
self._check_sync_img_channels()
def _check_sync_num_classes(self):
num_classes_set = set()
if self.dic['model'].get('num_classes', None) is not None:
num_classes_set.add(self.dic['model'].get('num_classes'))
if self.train_dataset_config:
if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
elif 'num_classes' in self.train_dataset_config:
num_classes_set.add(self.train_dataset_config['num_classes'])
if self.val_dataset_config:
if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
elif 'num_classes' in self.val_dataset_config:
num_classes_set.add(self.val_dataset_config['num_classes'])
if len(num_classes_set) == 0:
raise ValueError(
'`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
)
elif len(num_classes_set) > 1:
raise ValueError(
'`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
.format(num_classes_set))
num_classes = num_classes_set.pop()
self.dic['model']['num_classes'] = num_classes
if self.train_dataset_config and \
(not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
self.dic['train_dataset']['num_classes'] = num_classes
if self.val_dataset_config and \
(not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
self.dic['val_dataset']['num_classes'] = num_classes
def _check_sync_img_channels(self):
img_channels_set = set()
model_cfg = self.dic['model']
# If the model has backbone, in_channels is the input params of backbone.
# Otherwise, in_channels is the input params of the model.
if 'backbone' in model_cfg:
x = model_cfg['backbone'].get('in_channels', None)
if x is not None:
img_channels_set.add(x)
elif model_cfg.get('in_channels', None) is not None:
img_channels_set.add(model_cfg.get('in_channels'))
if self.train_dataset_config and \
('img_channels' in self.train_dataset_config):
img_channels_set.add(self.train_dataset_config['img_channels'])
if self.val_dataset_config and \
('img_channels' in self.val_dataset_config):
img_channels_set.add(self.val_dataset_config['img_channels'])
if len(img_channels_set) > 1:
raise ValueError(
'`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
.format(img_channels_set))
img_channels = 3 if len(img_channels_set) == 0 \
else img_channels_set.pop()
if 'backbone' in model_cfg:
self.dic['model']['backbone']['in_channels'] = img_channels
else:
self.dic['model']['in_channels'] = img_channels
if self.train_dataset_config and \
self.train_dataset_config['type'] == "Dataset":
self.dic['train_dataset']['img_channels'] = img_channels
if self.val_dataset_config and \
self.val_dataset_config['type'] == "Dataset":
self.dic['val_dataset']['img_channels'] = img_channels

@ -31,7 +31,7 @@ class ComponentManager:
Examples 1:
from paddlers.models.ppseg.cvlibs.manager import ComponentManager
from paddleseg.cvlibs.manager import ComponentManager
model_manager = ComponentManager()
@ -49,7 +49,7 @@ class ComponentManager:
Examples 2:
# Or an easier way, using it as a Python decorator, while just add it above the class declaration.
from paddlers.models.ppseg.cvlibs.manager import ComponentManager
from paddleseg.cvlibs.manager import ComponentManager
model_manager = ComponentManager()

@ -24,7 +24,7 @@ def constant_init(param, **kwargs):
Examples:
from paddlers.models.ppseg.cvlibs import param_init
from paddleseg.cvlibs import param_init
import paddle.nn as nn
linear = nn.Linear(2, 4)
@ -46,7 +46,7 @@ def normal_init(param, **kwargs):
Examples:
from paddlers.models.ppseg.cvlibs import param_init
from paddleseg.cvlibs import param_init
import paddle.nn as nn
linear = nn.Linear(2, 4)
@ -79,7 +79,7 @@ def kaiming_normal_init(param, **kwargs):
Examples:
from paddlers.models.ppseg.cvlibs import param_init
from paddleseg.cvlibs import param_init
import paddle.nn as nn
linear = nn.Linear(2, 4)
@ -109,7 +109,7 @@ def kaiming_uniform(param, **kwargs):
Examples:
from paddlers.models.ppseg.cvlibs import param_init
from paddleseg.cvlibs import param_init
import paddle.nn as nn
linear = nn.Linear(2, 4)
@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):
initializer = nn.initializer.KaimingUniform(**kwargs)
initializer(param, param.block)
def xavier_uniform(param, **kwargs):
r"""
This implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where
.. math::
x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
Args:
param (Tensor): Tensor that needs to be initialized.
Examples:
from paddleseg.cvlibs import param_init
import paddle.nn as nn
linear = nn.Linear(2, 4)
param_init.xavier_uniform(linear.weight)
"""
initializer = nn.initializer.XavierUniform(**kwargs)
initializer(param, param.block)

@ -27,3 +27,4 @@ from .drive import DRIVE
from .hrf import HRF
from .chase_db1 import CHASEDB1
from .pp_humanseg14k import PPHumanSeg14K
from .pssl import PSSLDataset

@ -17,12 +17,12 @@ import os
import numpy as np
from PIL import Image
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
import paddlers.models.ppseg.transforms.functional as F
from paddleseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
import paddleseg.transforms.functional as F
URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
@ -89,23 +89,31 @@ class ADE20K(Dataset):
self.file_list.append([img_path, label_path])
def __getitem__(self, idx):
data = {}
data['trans_info'] = []
image_path, label_path = self.file_list[idx]
data['img'] = image_path
data['gt_fields'] = [
] # If key in gt_fields, the data[key] have transforms synchronous.
if self.mode == 'val':
im, _ = self.transforms(im=image_path)
data = self.transforms(data)
label = np.asarray(Image.open(label_path))
# The class 0 is ignored. And it will equal to 255 after
# subtracted 1, because the dtype of label is uint8.
label = label - 1
label = label[np.newaxis, :, :]
return im, label
data['label'] = label
return data
else:
im, label = self.transforms(im=image_path, label=label_path)
label = label - 1
data['label'] = label_path
data['gt_fields'].append('label')
data = self.transforms(data)
data['label'] = data['label'] - 1
# Recover the ignore pixels adding by transform
label[label == 254] = 255
data['label'][data['label'] == 254] = 255
if self.edge:
edge_mask = F.mask_to_binary_edge(
label, radius=2, num_classes=self.num_classes)
return im, label, edge_mask
else:
return im, label
data['edge'] = edge_mask
return data

@ -14,11 +14,11 @@
import os
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddlers.models.ppseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
from paddleseg.datasets import Dataset
URL = 'https://bj.bcebos.com/paddleseg/dataset/chase_db1/chase_db1.zip'

@ -15,9 +15,9 @@
import os
import glob
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.datasets import Dataset
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
@manager.DATASETS.add_component

@ -15,9 +15,9 @@
import os
import glob
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.datasets import Dataset
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
@manager.DATASETS.add_component

@ -18,9 +18,9 @@ import paddle
import numpy as np
from PIL import Image
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
import paddlers.models.ppseg.transforms.functional as F
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
import paddleseg.transforms.functional as F
@manager.DATASETS.add_component
@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):
Examples:
import paddlers.models.ppseg.transforms as T
from paddlers.models.ppseg.datasets import Dataset
import paddleseg.transforms as T
from paddleseg.datasets import Dataset
transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
dataset_root = 'dataset_root_path'
train_path = 'train_path'
num_classes = 2
@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
"""
def __init__(self,
transforms,
mode,
dataset_root,
transforms,
num_classes,
mode='train',
img_channels=3,
train_path=None,
val_path=None,
test_path=None,
@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
ignore_index=255,
edge=False):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.transforms = Compose(transforms, img_channels=img_channels)
self.file_list = list()
self.mode = mode.lower()
self.num_classes = num_classes
self.img_channels = img_channels
self.ignore_index = ignore_index
self.edge = edge
@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
raise ValueError(
"mode should be 'train', 'val' or 'test', but got {}.".format(
self.mode))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
if not os.path.exists(self.dataset_root):
raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
if num_classes < 1:
raise ValueError(
"`num_classes` should be greater than 1, but got {}".format(
num_classes))
if img_channels not in [1, 3]:
raise ValueError("`img_channels` should in [1, 3], but got {}".
format(img_channels))
if self.mode == 'train':
if train_path is None:
@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
self.file_list.append([image_path, label_path])
def __getitem__(self, idx):
data = {}
data['trans_info'] = []
image_path, label_path = self.file_list[idx]
if self.mode == 'test':
im, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, image_path
elif self.mode == 'val':
im, _ = self.transforms(im=image_path)
label = np.asarray(Image.open(label_path))
label = label[np.newaxis, :, :]
return im, label
data['img'] = image_path
data['label'] = label_path
# If key in gt_fields, the data[key] have transforms synchronous.
data['gt_fields'] = []
if self.mode == 'val':
data = self.transforms(data)
data['label'] = data['label'][np.newaxis, :, :]
else:
im, label = self.transforms(im=image_path, label=label_path)
data['gt_fields'].append('label')
data = self.transforms(data)
if self.edge:
edge_mask = F.mask_to_binary_edge(
label, radius=2, num_classes=self.num_classes)
return im, label, edge_mask
else:
return im, label
data['label'], radius=2, num_classes=self.num_classes)
data['edge'] = edge_mask
return data
def __len__(self):
return len(self.file_list)

@ -14,11 +14,11 @@
import os
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddlers.models.ppseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
from paddleseg.datasets import Dataset
URL = 'https://bj.bcebos.com/paddleseg/dataset/drive/drive.zip'

@ -18,12 +18,12 @@ import copy
import cv2
import numpy as np
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
import paddlers.models.ppseg.transforms.functional as F
from paddleseg.datasets import Dataset
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
import paddleseg.transforms.functional as F
URL = "https://paddleseg.bj.bcebos.com/dataset/EG1800.zip"

@ -14,11 +14,11 @@
import os
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddlers.models.ppseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
from paddleseg.datasets import Dataset
URL = 'https://bj.bcebos.com/paddleseg/dataset/hrf/hrf.zip'

@ -15,10 +15,10 @@
import os
from .dataset import Dataset
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
URL = "https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip"

@ -15,10 +15,10 @@
import os
from .dataset import Dataset
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"

@ -15,9 +15,9 @@
import os
from PIL import Image
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.datasets import Dataset
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
@manager.DATASETS.add_component

@ -15,8 +15,8 @@
import os
from .dataset import Dataset
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
@manager.DATASETS.add_component

@ -0,0 +1,135 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from paddleseg.datasets import Dataset
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
@manager.DATASETS.add_component
class PSSLDataset(Dataset):
"""
The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
is computed by the Consensus explanation algorithm.
The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation
Models" (https://arxiv.org/abs/2207.03335).
The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification
Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
as follows:
imagenet_root
|
|--train
| |--n01440764
| | |--n01440764_10026.JPEG
| | |--...
| |--nxxxxxxxx
| |--...
where only the "train" set is needed.
The PSSL dataset has the folder structure as follows:
pssl_root
|
|--train
| |--n01440764
| | |--n01440764_10026.JPEG_eiseg.npz
| | |--...
| |--nxxxxxxxx
| |--...
|
|--imagenet_lsvrc_2015_synsets.txt
|--train.txt
where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
Args:
transforms (list): Transforms for image.
imagenet_root (str): The path to the original ImageNet dataset.
pssl_root (str): The path to the PSSL dataset.
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
edge (bool, optional): Whether to compute edge while training. Default: False.
"""
ignore_index = 1001 # 0~999 is target class, 1000 is bg
NUM_CLASSES = 1001 # consider target class and bg
def __init__(self,
transforms,
imagenet_root,
pssl_root,
mode='train',
edge=False):
mode = mode.lower()
if mode not in ['train']:
raise ValueError("mode should be 'train', but got {}.".format(mode))
if transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
self.transforms = Compose(transforms)
self.mode = mode
self.edge = edge
self.num_classes = self.NUM_CLASSES
self.ignore_index = self.num_classes # 1001
self.file_list = []
self.class_id_dict = {}
if imagenet_root is None or not os.path.isdir(pssl_root):
raise ValueError(
"The dataset is not Found or the folder structure is nonconfoumance."
)
train_list_file = os.path.join(pssl_root, "train.txt")
if not os.path.exists(train_list_file):
raise ValueError("Train list file isn't exists.")
for idx, line in enumerate(open(train_list_file)):
# line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
label_path = line.strip()
img_path = label_path.split('.JPEG')[0] + '.JPEG'
label_path = os.path.join(pssl_root, label_path)
img_path = os.path.join(imagenet_root, img_path)
self.file_list.append([img_path, label_path])
# mapping class name to class id.
class_id_file = os.path.join(pssl_root,
"imagenet_lsvrc_2015_synsets.txt")
if not os.path.exists(class_id_file):
raise ValueError("Class id file isn't exists.")
for idx, line in enumerate(open(class_id_file)):
class_name = line.strip()
self.class_id_dict[class_name] = idx
def __getitem__(self, idx):
image_path, label_path = self.file_list[idx]
# transform label
class_name = (image_path.split('/')[-1]).split('_')[0]
class_id = self.class_id_dict[class_name]
pssl_seg = np.load(label_path)['arr_0']
gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
# [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
gt_semantic_seg[pssl_seg == 1] = class_id
im, label = self.transforms(im=image_path, label=gt_semantic_seg)
return im, label

@ -14,11 +14,11 @@
import os
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddlers.models.ppseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
from paddleseg.datasets import Dataset
URL = 'https://bj.bcebos.com/paddleseg/dataset/stare/stare.zip'

@ -18,12 +18,12 @@ import copy
import cv2
import numpy as np
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
import paddlers.models.ppseg.transforms.functional as F
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
from paddleseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
import paddleseg.transforms.functional as F
URL = "https://paddleseg.bj.bcebos.com/dataset/Supervisely_face.zip"

@ -14,11 +14,11 @@
import os
from paddlers.models.ppseg.datasets import Dataset
from paddlers.models.ppseg.utils.download import download_file_and_uncompress
from paddlers.models.ppseg.utils import seg_env
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.transforms import Compose
from paddleseg.datasets import Dataset
from paddleseg.utils.download import download_file_and_uncompress
from paddleseg.utils import seg_env
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"

@ -49,9 +49,18 @@ from .segnet import SegNet
from .encnet import ENCNet
from .hrnet_contrast import HRNetW48Contrast
from .espnet import ESPNetV2
from .pp_liteseg import PPLiteSeg
from .dmnet import DMNet
from .espnetv1 import ESPNetV1
from .enet import ENet
from .bisenetv1 import BiseNetV1
from .fastfcn import FastFCN
from .pfpnnet import PFPNNet
from .glore import GloRe
from .ddrnet import DDRNet_23
from .ccnet import CCNet
from .mobileseg import MobileSeg
from .upernet import UPerNet
from .sinet import SINet
from .lraspp import LRASPP
from .topformer import TopFormer

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -14,9 +14,9 @@
import paddle
import paddle.nn as nn
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg import utils
import numpy as np
@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self, num_classes, pretrained=None):
def __init__(self, num_classes, in_channels=3, pretrained=None):
super().__init__()
n_channels = 3
self.encoder = Encoder(n_channels, [64, 128, 256, 512])
self.encoder = Encoder(in_channels, [64, 128, 256, 512])
filters = np.array([64, 128, 256, 512, 1024])
self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
self.att5 = AttentionBlock(

@ -21,3 +21,7 @@ from .swin_transformer import *
from .mobilenetv2 import *
from .mix_transformer import *
from .stdcnet import *
from .lite_hrnet import *
from .shufflenetv2 import *
from .ghostnet import *
from .top_transformer import *

@ -0,0 +1,318 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Uniform, KaimingNormal
from paddleseg.cvlibs import manager
from paddleseg.utils import utils, logger
__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
initializer=KaimingNormal(), name=name + "_weights"),
bias_attr=False)
bn_name = name + "_bn"
self._batch_norm = BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(
name=bn_name + "_scale", regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(
name=bn_name + "_offset", regularizer=L2Decay(0.0)),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance")
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class SEBlock(nn.Layer):
def __init__(self, num_channels, reduction_ratio=4, name=None):
super(SEBlock, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
stdv = 1.0 / math.sqrt(num_channels * 1.0)
med_ch = num_channels // reduction_ratio
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
bias_attr=ParamAttr(name=name + "_1_offset"))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_channels,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
bias_attr=ParamAttr(name=name + "_2_offset"))
def forward(self, inputs):
pool = self.pool2d_gap(inputs)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = paddle.clip(x=excitation, min=0, max=1)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = paddle.multiply(inputs, excitation)
return out
class GhostModule(nn.Layer):
def __init__(self,
in_channels,
output_channels,
kernel_size=1,
ratio=2,
dw_size=3,
stride=1,
relu=True,
name=None):
super(GhostModule, self).__init__()
init_channels = int(math.ceil(output_channels / ratio))
new_channels = int(init_channels * (ratio - 1))
self.primary_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=init_channels,
kernel_size=kernel_size,
stride=stride,
groups=1,
act="relu" if relu else None,
name=name + "_primary_conv")
self.cheap_operation = ConvBNLayer(
in_channels=init_channels,
out_channels=new_channels,
kernel_size=dw_size,
stride=1,
groups=init_channels,
act="relu" if relu else None,
name=name + "_cheap_operation")
def forward(self, inputs):
x = self.primary_conv(inputs)
y = self.cheap_operation(x)
out = paddle.concat([x, y], axis=1)
return out
class GhostBottleneck(nn.Layer):
def __init__(self,
in_channels,
hidden_dim,
output_channels,
kernel_size,
stride,
use_se,
name=None):
super(GhostBottleneck, self).__init__()
self._stride = stride
self._use_se = use_se
self._num_channels = in_channels
self._output_channels = output_channels
self.ghost_module_1 = GhostModule(
in_channels=in_channels,
output_channels=hidden_dim,
kernel_size=1,
stride=1,
relu=True,
name=name + "_ghost_module_1")
if stride == 2:
self.depthwise_conv = ConvBNLayer(
in_channels=hidden_dim,
out_channels=hidden_dim,
kernel_size=kernel_size,
stride=stride,
groups=hidden_dim,
act=None,
name=name +
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
if use_se:
self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
self.ghost_module_2 = GhostModule(
in_channels=hidden_dim,
output_channels=output_channels,
kernel_size=1,
relu=False,
name=name + "_ghost_module_2")
if stride != 1 or in_channels != output_channels:
self.shortcut_depthwise = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
groups=in_channels,
act=None,
name=name +
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
self.shortcut_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=output_channels,
kernel_size=1,
stride=1,
groups=1,
act=None,
name=name + "_shortcut_conv")
def forward(self, inputs):
x = self.ghost_module_1(inputs)
if self._stride == 2:
x = self.depthwise_conv(x)
if self._use_se:
x = self.se_block(x)
x = self.ghost_module_2(x)
if self._stride == 1 and self._num_channels == self._output_channels:
shortcut = inputs
else:
shortcut = self.shortcut_depthwise(inputs)
shortcut = self.shortcut_conv(shortcut)
return paddle.add(x=x, y=shortcut)
class GhostNet(nn.Layer):
def __init__(self, scale, in_channels=3, pretrained=None):
super(GhostNet, self).__init__()
self.cfgs = [
# k, t, c, SE, s
[3, 16, 16, 0, 1],
[3, 48, 24, 0, 2],
[3, 72, 24, 0, 1], # x4
[5, 72, 40, 1, 2],
[5, 120, 40, 1, 1], # x8
[3, 240, 80, 0, 2],
[3, 200, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 480, 112, 1, 1],
[3, 672, 112, 1, 1], # x16
[5, 672, 160, 1, 2],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1] # x32
]
self.scale = scale
self.pretrained = pretrained
output_channels = int(self._make_divisible(16 * self.scale, 4))
self.conv1 = ConvBNLayer(
in_channels=in_channels,
out_channels=output_channels,
kernel_size=3,
stride=2,
groups=1,
act="relu",
name="conv1")
# build inverted residual blocks
self.out_index = [2, 4, 10, 15]
self.feat_channels = []
self.ghost_bottleneck_list = []
for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
in_channels = output_channels
output_channels = int(self._make_divisible(c * self.scale, 4))
hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
ghost_bottleneck = self.add_sublayer(
name="_ghostbottleneck_" + str(idx),
sublayer=GhostBottleneck(
in_channels=in_channels,
hidden_dim=hidden_dim,
output_channels=output_channels,
kernel_size=k,
stride=s,
use_se=use_se,
name="_ghostbottleneck_" + str(idx)))
self.ghost_bottleneck_list.append(ghost_bottleneck)
if idx in self.out_index:
self.feat_channels.append(output_channels)
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, inputs):
feat_list = []
x = self.conv1(inputs)
for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
x = ghost_bottleneck(x)
if idx in self.out_index:
feat_list.append(x)
return feat_list
def _make_divisible(self, v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
@manager.BACKBONES.add_component
def GhostNet_x0_5(**kwargs):
model = GhostNet(scale=0.5, **kwargs)
return model
@manager.BACKBONES.add_component
def GhostNet_x1_0(**kwargs):
model = GhostNet(scale=1.0, **kwargs)
return model
@manager.BACKBONES.add_component
def GhostNet_x1_3(**kwargs):
model = GhostNet(scale=1.3, **kwargs)
return model

@ -18,9 +18,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
from paddleseg.utils import utils
__all__ = [
"HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
@ -37,6 +37,7 @@ class HRNet(nn.Layer):
(https://arxiv.org/pdf/1908.07919.pdf).
Args:
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path of pretrained model.
stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
@ -56,6 +57,7 @@ class HRNet(nn.Layer):
"""
def __init__(self,
in_channels=3,
pretrained=None,
stage1_num_modules=1,
stage1_num_blocks=(4, ),
@ -91,7 +93,7 @@ class HRNet(nn.Layer):
self.feat_channels = [sum(stage4_num_channels)]
self.conv_layer1_1 = layers.ConvBNReLU(
in_channels=3,
in_channels=in_channels,
out_channels=64,
kernel_size=3,
stride=2,

@ -0,0 +1,974 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on
https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
"""
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from numbers import Integral
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Normal, Constant
from paddleseg.cvlibs import manager
from paddleseg import utils
__all__ = [
"Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
"Lite_HRNet_wider_naive", "LiteHRNet"
]
def Conv2d(in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
weight_init=Normal(std=0.001),
bias_init=Constant(0.)):
weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
if bias:
bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
else:
bias_attr = False
conv = nn.Conv2D(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
weight_attr=weight_attr,
bias_attr=bias_attr)
return conv
def channel_shuffle(x, groups):
x_shape = paddle.shape(x)
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
num_channels = x.shape[1]
channels_per_group = num_channels // groups
x = paddle.reshape(
x=x, shape=[batch_size, groups, channels_per_group, height, width])
x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
class ConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
groups=1,
norm_type=None,
norm_groups=32,
norm_decay=0.,
freeze_norm=False,
act=None):
super(ConvNormLayer, self).__init__()
self.act = act
norm_lr = 0. if freeze_norm else 1.
if norm_type is not None:
assert norm_type in ['bn', 'sync_bn', 'gn'], \
"norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
param_attr = ParamAttr(
initializer=Constant(1.0),
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay), )
bias_attr = ParamAttr(
learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
global_stats = True if freeze_norm else None
if norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(
ch_out,
weight_attr=param_attr,
bias_attr=bias_attr,
use_global_stats=global_stats, )
elif norm_type == 'gn':
self.norm = nn.GroupNorm(
num_groups=norm_groups,
num_channels=ch_out,
weight_attr=param_attr,
bias_attr=bias_attr)
norm_params = self.norm.parameters()
if freeze_norm:
for param in norm_params:
param.stop_gradient = True
conv_bias_attr = False
else:
conv_bias_attr = True
self.norm = None
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.001)),
bias_attr=conv_bias_attr)
def forward(self, inputs):
out = self.conv(inputs)
if self.norm is not None:
out = self.norm(out)
if self.act == 'relu':
out = F.relu(out)
elif self.act == 'sigmoid':
out = F.sigmoid(out)
return out
class DepthWiseSeparableConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride=1,
dw_norm_type=None,
pw_norm_type=None,
norm_decay=0.,
freeze_norm=False,
dw_act=None,
pw_act=None):
super(DepthWiseSeparableConvNormLayer, self).__init__()
self.depthwise_conv = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_in,
filter_size=filter_size,
stride=stride,
groups=ch_in,
norm_type=dw_norm_type,
act=dw_act,
norm_decay=norm_decay,
freeze_norm=freeze_norm, )
self.pointwise_conv = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
norm_type=pw_norm_type,
act=pw_act,
norm_decay=norm_decay,
freeze_norm=freeze_norm, )
def forward(self, x):
x = self.depthwise_conv(x)
x = self.pointwise_conv(x)
return x
class CrossResolutionWeightingModule(nn.Layer):
def __init__(self,
channels,
ratio=16,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(CrossResolutionWeightingModule, self).__init__()
self.channels = channels
total_channel = sum(channels)
self.conv1 = ConvNormLayer(
ch_in=total_channel,
ch_out=total_channel // ratio,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.conv2 = ConvNormLayer(
ch_in=total_channel // ratio,
ch_out=total_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='sigmoid',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
out = []
for idx, xi in enumerate(x[:-1]):
kernel_size = stride = pow(2, len(x) - idx - 1)
xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
out.append(xi)
out.append(x[-1])
out = paddle.concat(out, 1)
out = self.conv1(out)
out = self.conv2(out)
out = paddle.split(out, self.channels, 1)
out = [
s * F.interpolate(
a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
]
return out
class SpatialWeightingModule(nn.Layer):
def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
super(SpatialWeightingModule, self).__init__()
self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
self.conv1 = ConvNormLayer(
ch_in=in_channel,
ch_out=in_channel // ratio,
filter_size=1,
stride=1,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.conv2 = ConvNormLayer(
ch_in=in_channel // ratio,
ch_out=in_channel,
filter_size=1,
stride=1,
act='sigmoid',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
out = self.global_avgpooling(x)
out = self.conv1(out)
out = self.conv2(out)
return x * out
class ConditionalChannelWeightingBlock(nn.Layer):
def __init__(self,
in_channels,
stride,
reduce_ratio,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(ConditionalChannelWeightingBlock, self).__init__()
assert stride in [1, 2]
branch_channels = [channel // 2 for channel in in_channels]
self.cross_resolution_weighting = CrossResolutionWeightingModule(
branch_channels,
ratio=reduce_ratio,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.depthwise_convs = nn.LayerList([
ConvNormLayer(
channel,
channel,
filter_size=3,
stride=stride,
groups=channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay) for channel in branch_channels
])
self.spatial_weighting = nn.LayerList([
SpatialWeightingModule(
channel,
ratio=4,
freeze_norm=freeze_norm,
norm_decay=norm_decay) for channel in branch_channels
])
def forward(self, x):
x = [s.chunk(2, axis=1) for s in x]
x1 = [s[0] for s in x]
x2 = [s[1] for s in x]
x2 = self.cross_resolution_weighting(x2)
x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
out = [channel_shuffle(s, groups=2) for s in out]
return out
class ShuffleUnit(nn.Layer):
def __init__(self,
in_channel,
out_channel,
stride,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(ShuffleUnit, self).__init__()
branch_channel = out_channel // 2
self.stride = stride
if self.stride == 1:
assert in_channel == branch_channel * 2, \
"when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
if stride > 1:
self.branch1 = nn.Sequential(
ConvNormLayer(
ch_in=in_channel,
ch_out=in_channel,
filter_size=3,
stride=self.stride,
groups=in_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=in_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
self.branch2 = nn.Sequential(
ConvNormLayer(
ch_in=branch_channel if stride == 1 else in_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=3,
stride=self.stride,
groups=branch_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
def forward(self, x):
if self.stride > 1:
x1 = self.branch1(x)
x2 = self.branch2(x)
else:
x1, x2 = x.chunk(2, axis=1)
x2 = self.branch2(x2)
out = paddle.concat([x1, x2], axis=1)
out = channel_shuffle(out, groups=2)
return out
class IterativeHead(nn.Layer):
def __init__(self,
in_channels,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(IterativeHead, self).__init__()
num_branches = len(in_channels)
self.in_channels = in_channels[::-1]
projects = []
for i in range(num_branches):
if i != num_branches - 1:
projects.append(
DepthWiseSeparableConvNormLayer(
ch_in=self.in_channels[i],
ch_out=self.in_channels[i + 1],
filter_size=3,
stride=1,
dw_act=None,
pw_act='relu',
dw_norm_type=norm_type,
pw_norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
else:
projects.append(
DepthWiseSeparableConvNormLayer(
ch_in=self.in_channels[i],
ch_out=self.in_channels[i],
filter_size=3,
stride=1,
dw_act=None,
pw_act='relu',
dw_norm_type=norm_type,
pw_norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
self.projects = nn.LayerList(projects)
def forward(self, x):
x = x[::-1]
y = []
last_x = None
for i, s in enumerate(x):
if last_x is not None:
last_x = F.interpolate(
last_x,
size=paddle.shape(s)[-2:],
mode='bilinear',
align_corners=True)
s = s + last_x
s = self.projects[i](s)
y.append(s)
last_x = s
return y[::-1]
class Stem(nn.Layer):
def __init__(self,
in_channel,
stem_channel,
out_channel,
expand_ratio,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(Stem, self).__init__()
self.conv1 = ConvNormLayer(
in_channel,
stem_channel,
filter_size=3,
stride=2,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
mid_channel = int(round(stem_channel * expand_ratio))
branch_channel = stem_channel // 2
if stem_channel == out_channel:
inc_channel = out_channel - branch_channel
else:
inc_channel = out_channel - stem_channel
self.branch1 = nn.Sequential(
ConvNormLayer(
ch_in=branch_channel,
ch_out=branch_channel,
filter_size=3,
stride=2,
groups=branch_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay),
ConvNormLayer(
ch_in=branch_channel,
ch_out=inc_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay), )
self.expand_conv = ConvNormLayer(
ch_in=branch_channel,
ch_out=mid_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.depthwise_conv = ConvNormLayer(
ch_in=mid_channel,
ch_out=mid_channel,
filter_size=3,
stride=2,
groups=mid_channel,
norm_type=norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
self.linear_conv = ConvNormLayer(
ch_in=mid_channel,
ch_out=branch_channel
if stem_channel == out_channel else stem_channel,
filter_size=1,
stride=1,
norm_type=norm_type,
act='relu',
freeze_norm=freeze_norm,
norm_decay=norm_decay)
def forward(self, x):
x = self.conv1(x)
x1, x2 = x.chunk(2, axis=1)
x1 = self.branch1(x1)
x2 = self.expand_conv(x2)
x2 = self.depthwise_conv(x2)
x2 = self.linear_conv(x2)
out = paddle.concat([x1, x2], axis=1)
out = channel_shuffle(out, groups=2)
return out
class LiteHRNetModule(nn.Layer):
def __init__(self,
num_branches,
num_blocks,
in_channels,
reduce_ratio,
module_type,
multiscale_output=False,
with_fuse=True,
norm_type='bn',
freeze_norm=False,
norm_decay=0.):
super(LiteHRNetModule, self).__init__()
assert num_branches == len(in_channels),\
"num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
assert module_type in [
'LITE', 'NAIVE'
], "module_type should be one of ['LITE', 'NAIVE']"
self.num_branches = num_branches
self.in_channels = in_channels
self.multiscale_output = multiscale_output
self.with_fuse = with_fuse
self.norm_type = 'bn'
self.module_type = module_type
if self.module_type == 'LITE':
self.layers = self._make_weighting_blocks(
num_blocks,
reduce_ratio,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
elif self.module_type == 'NAIVE':
self.layers = self._make_naive_branches(
num_branches,
num_blocks,
freeze_norm=freeze_norm,
norm_decay=norm_decay)
if self.with_fuse:
self.fuse_layers = self._make_fuse_layers(
freeze_norm=freeze_norm, norm_decay=norm_decay)
self.relu = nn.ReLU()
def _make_weighting_blocks(self,
num_blocks,
reduce_ratio,
stride=1,
freeze_norm=False,
norm_decay=0.):
layers = []
for i in range(num_blocks):
layers.append(
ConditionalChannelWeightingBlock(
self.in_channels,
stride=stride,
reduce_ratio=reduce_ratio,
norm_type=self.norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
return nn.Sequential(*layers)
def _make_naive_branches(self,
num_branches,
num_blocks,
freeze_norm=False,
norm_decay=0.):
branches = []
for branch_idx in range(num_branches):
layers = []
for i in range(num_blocks):
layers.append(
ShuffleUnit(
self.in_channels[branch_idx],
self.in_channels[branch_idx],
stride=1,
norm_type=self.norm_type,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
branches.append(nn.Sequential(*layers))
return nn.LayerList(branches)
def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
if self.num_branches == 1:
return None
fuse_layers = []
num_out_branches = self.num_branches if self.multiscale_output else 1
for i in range(num_out_branches):
fuse_layer = []
for j in range(self.num_branches):
if j > i:
fuse_layer.append(
nn.Sequential(
Conv2d(
self.in_channels[j],
self.in_channels[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[i]),
nn.Upsample(
scale_factor=2**(j - i), mode='nearest')))
elif j == i:
fuse_layer.append(None)
else:
conv_downsamples = []
for k in range(i - j):
if k == i - j - 1:
conv_downsamples.append(
nn.Sequential(
Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=3,
stride=2,
padding=1,
groups=self.in_channels[j],
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
Conv2d(
self.in_channels[j],
self.in_channels[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[i])))
else:
conv_downsamples.append(
nn.Sequential(
Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=3,
stride=2,
padding=1,
groups=self.in_channels[j],
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
Conv2d(
self.in_channels[j],
self.in_channels[j],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(self.in_channels[j]),
nn.ReLU()))
fuse_layer.append(nn.Sequential(*conv_downsamples))
fuse_layers.append(nn.LayerList(fuse_layer))
return nn.LayerList(fuse_layers)
def forward(self, x):
if self.num_branches == 1:
return [self.layers[0](x[0])]
if self.module_type == 'LITE':
out = self.layers(x)
elif self.module_type == 'NAIVE':
for i in range(self.num_branches):
x[i] = self.layers[i](x[i])
out = x
if self.with_fuse:
out_fuse = []
for i in range(len(self.fuse_layers)):
y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
for j in range(self.num_branches):
if j == 0:
y += y
elif i == j:
y += out[j]
else:
y += self.fuse_layers[i][j](out[j])
if i == 0:
out[i] = y
out_fuse.append(self.relu(y))
out = out_fuse
elif not self.multiscale_output:
out = [out[0]]
return out
class LiteHRNet(nn.Layer):
"""
@inproceedings{Yulitehrnet21,
title={Lite-HRNet: A Lightweight High-Resolution Network},
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
booktitle={CVPR},year={2021}
}
Args:
network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
"naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
"wider_naive": Naive network with wider channels in each block.
"lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
"lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
in_channels (int, optional): The channels of input image. Default: 3.
freeze_at (int): the stage to freeze
freeze_norm (bool): whether to freeze norm in HRNet
norm_decay (float): weight decay for normalization layer weights
return_idx (List): the stage to return
"""
def __init__(self,
network_type,
in_channels=3,
freeze_at=0,
freeze_norm=True,
norm_decay=0.,
return_idx=[0, 1, 2, 3],
use_head=False,
pretrained=None):
super(LiteHRNet, self).__init__()
if isinstance(return_idx, Integral):
return_idx = [return_idx]
assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
"the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
assert len(return_idx) > 0, "need one or more return index"
self.freeze_at = freeze_at
self.freeze_norm = freeze_norm
self.norm_decay = norm_decay
self.return_idx = return_idx
self.norm_type = 'bn'
self.use_head = use_head
self.pretrained = pretrained
self.module_configs = {
"lite_18": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["LITE", "LITE", "LITE"],
"reduce_ratios": [8, 8, 8],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
"lite_30": {
"num_modules": [3, 8, 3],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["LITE", "LITE", "LITE"],
"reduce_ratios": [8, 8, 8],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
"naive": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["NAIVE", "NAIVE", "NAIVE"],
"reduce_ratios": [1, 1, 1],
"num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
},
"wider_naive": {
"num_modules": [2, 4, 2],
"num_branches": [2, 3, 4],
"num_blocks": [2, 2, 2],
"module_type": ["NAIVE", "NAIVE", "NAIVE"],
"reduce_ratios": [1, 1, 1],
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
},
}
self.stages_config = self.module_configs[network_type]
self.stem = Stem(in_channels, 32, 32, 1)
num_channels_pre_layer = [32]
for stage_idx in range(3):
num_channels = self.stages_config["num_channels"][stage_idx]
setattr(self, 'transition{}'.format(stage_idx),
self._make_transition_layer(num_channels_pre_layer,
num_channels, self.freeze_norm,
self.norm_decay))
stage, num_channels_pre_layer = self._make_stage(
self.stages_config, stage_idx, num_channels, True,
self.freeze_norm, self.norm_decay)
setattr(self, 'stage{}'.format(stage_idx), stage)
num_channels = self.stages_config["num_channels"][-1]
self.feat_channels = num_channels
if self.use_head:
self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
self.freeze_norm, self.norm_decay)
self.feat_channels = [num_channels[0]]
for i in range(1, len(num_channels)):
self.feat_channels.append(num_channels[i] // 2)
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def _make_transition_layer(self,
num_channels_pre_layer,
num_channels_cur_layer,
freeze_norm=False,
norm_decay=0.):
num_branches_pre = len(num_channels_pre_layer)
num_branches_cur = len(num_channels_cur_layer)
transition_layers = []
for i in range(num_branches_cur):
if i < num_branches_pre:
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
transition_layers.append(
nn.Sequential(
Conv2d(
num_channels_pre_layer[i],
num_channels_pre_layer[i],
kernel_size=3,
stride=1,
padding=1,
groups=num_channels_pre_layer[i],
bias=False),
nn.BatchNorm2D(num_channels_pre_layer[i]),
Conv2d(
num_channels_pre_layer[i],
num_channels_cur_layer[i],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(num_channels_cur_layer[i]),
nn.ReLU()))
else:
transition_layers.append(None)
else:
conv_downsamples = []
for j in range(i + 1 - num_branches_pre):
conv_downsamples.append(
nn.Sequential(
Conv2d(
num_channels_pre_layer[-1],
num_channels_pre_layer[-1],
groups=num_channels_pre_layer[-1],
kernel_size=3,
stride=2,
padding=1,
bias=False, ),
nn.BatchNorm2D(num_channels_pre_layer[-1]),
Conv2d(
num_channels_pre_layer[-1],
num_channels_cur_layer[i]
if j == i - num_branches_pre else
num_channels_pre_layer[-1],
kernel_size=1,
stride=1,
padding=0,
bias=False, ),
nn.BatchNorm2D(num_channels_cur_layer[i]
if j == i - num_branches_pre else
num_channels_pre_layer[-1]),
nn.ReLU()))
transition_layers.append(nn.Sequential(*conv_downsamples))
return nn.LayerList(transition_layers)
def _make_stage(self,
stages_config,
stage_idx,
in_channels,
multiscale_output,
freeze_norm=False,
norm_decay=0.):
num_modules = stages_config["num_modules"][stage_idx]
num_branches = stages_config["num_branches"][stage_idx]
num_blocks = stages_config["num_blocks"][stage_idx]
reduce_ratio = stages_config['reduce_ratios'][stage_idx]
module_type = stages_config['module_type'][stage_idx]
modules = []
for i in range(num_modules):
if not multiscale_output and i == num_modules - 1:
reset_multiscale_output = False
else:
reset_multiscale_output = True
modules.append(
LiteHRNetModule(
num_branches,
num_blocks,
in_channels,
reduce_ratio,
module_type,
multiscale_output=reset_multiscale_output,
with_fuse=True,
freeze_norm=freeze_norm,
norm_decay=norm_decay))
in_channels = modules[-1].in_channels
return nn.Sequential(*modules), in_channels
def forward(self, x):
x = self.stem(x)
y_list = [x]
for stage_idx in range(3):
x_list = []
transition = getattr(self, 'transition{}'.format(stage_idx))
for j in range(self.stages_config["num_branches"][stage_idx]):
if transition[j] is not None:
if j >= len(y_list):
x_list.append(transition[j](y_list[-1]))
else:
x_list.append(transition[j](y_list[j]))
else:
x_list.append(y_list[j])
y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
if self.use_head:
y_list = self.head_layer(y_list)
res = []
for i, layer in enumerate(y_list):
if i == self.freeze_at:
layer.stop_gradient = True
if i in self.return_idx:
res.append(layer)
return res
@manager.BACKBONES.add_component
def Lite_HRNet_18(**kwargs):
model = LiteHRNet(network_type="lite_18", **kwargs)
return model
@manager.BACKBONES.add_component
def Lite_HRNet_30(**kwargs):
model = LiteHRNet(network_type="lite_30", **kwargs)
return model
@manager.BACKBONES.add_component
def Lite_HRNet_naive(**kwargs):
model = LiteHRNet(network_type="naive", **kwargs)
return model
@manager.BACKBONES.add_component
def Lite_HRNet_wider_naive(**kwargs):
model = LiteHRNet(network_type="wider_naive", **kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -20,9 +20,9 @@ import paddle.nn as nn
import paddle.nn.functional as F
import paddle.nn.initializer as paddle_init
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.models.backbones.transformer_utils import *
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
from paddleseg.models.backbones.transformer_utils import *
class Mlp(nn.Layer):
@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
in_channels=3,
num_classes=1000,
embed_dims=[64, 128, 256, 512],
num_heads=[1, 2, 4, 8],
@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
img_size=img_size,
patch_size=7,
stride=4,
in_chans=in_chans,
in_chans=in_channels,
embed_dim=embed_dims[0])
self.patch_embed2 = OverlapPatchEmbed(
img_size=img_size // 4,

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -12,13 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg import utils
from paddleseg.cvlibs import manager
from paddleseg import utils
__all__ = [
"MobileNetV2_x0_25",
"MobileNetV2_x0_5",
"MobileNetV2_x0_75",
"MobileNetV2_x1_0",
"MobileNetV2_x1_5",
"MobileNetV2_x2_0",
]
@manager.BACKBONES.add_component
class MobileNetV2(nn.Layer):
"""
The MobileNetV2 implementation based on PaddlePaddle.
@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
(https://arxiv.org/abs/1801.04381).
Args:
channel_ratio (float, optional): The ratio of channel. Default: 1.0
min_channel (int, optional): The minimum of channel. Default: 16
scale (float, optional): The scale of channel. Default: 1.0
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None
"""
def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
super(MobileNetV2, self).__init__()
self.channel_ratio = channel_ratio
self.min_channel = min_channel
def __init__(self, scale=1.0, in_channels=3, pretrained=None):
super().__init__()
self.scale = scale
self.pretrained = pretrained
prefix_name = ""
self.stage0 = conv_bn(3, self.depth(32), 3, 2)
self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
self.stage2 = nn.Sequential(
InvertedResidual(self.depth(16), self.depth(24), 2, 6),
InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
self.stage3 = nn.Sequential(
InvertedResidual(self.depth(24), self.depth(32), 2, 6),
InvertedResidual(self.depth(32), self.depth(32), 1, 6),
InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2), # x4
(6, 32, 3, 2), # x8
(6, 64, 4, 2),
(6, 96, 3, 1), # x16
(6, 160, 3, 2),
(6, 320, 1, 1), # x32
]
self.out_index = [1, 2, 4, 6]
self.stage4 = nn.Sequential(
InvertedResidual(self.depth(32), self.depth(64), 2, 6),
InvertedResidual(self.depth(64), self.depth(64), 1, 6),
InvertedResidual(self.depth(64), self.depth(64), 1, 6),
InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
self.conv1 = ConvBNLayer(
num_channels=in_channels,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
name=prefix_name + "conv1_1")
self.stage5 = nn.Sequential(
InvertedResidual(self.depth(64), self.depth(96), 1, 6),
InvertedResidual(self.depth(96), self.depth(96), 1, 6),
InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
self.block_list = []
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
block = self.add_sublayer(
prefix_name + "conv" + str(i),
sublayer=InvresiBlocks(
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s,
name=prefix_name + "conv" + str(i)))
self.block_list.append(block)
in_c = int(c * scale)
self.stage6 = nn.Sequential(
InvertedResidual(self.depth(96), self.depth(160), 2, 6),
InvertedResidual(self.depth(160), self.depth(160), 1, 6),
InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
out_channels = [
bottleneck_params_list[idx][1] for idx in self.out_index
]
self.feat_channels = [int(c * scale) for c in out_channels]
self.init_weight()
def depth(self, channels):
min_channel = min(channels, self.min_channel)
return max(min_channel, int(channels * self.channel_ratio))
def forward(self, x):
def forward(self, inputs):
feat_list = []
feature_1_2 = self.stage0(x)
feature_1_2 = self.stage1(feature_1_2)
feature_1_4 = self.stage2(feature_1_2)
feature_1_8 = self.stage3(feature_1_4)
feature_1_16 = self.stage4(feature_1_8)
feature_1_16 = self.stage5(feature_1_16)
feature_1_32 = self.stage6(feature_1_16)
feature_1_32 = self.stage7(feature_1_32)
feat_list.append(feature_1_4)
feat_list.append(feature_1_8)
feat_list.append(feature_1_16)
feat_list.append(feature_1_32)
y = self.conv1(inputs, if_act=True)
for idx, block in enumerate(self.block_list):
y = block(y)
if idx in self.out_index:
feat_list.append(y)
return feat_list
def init_weight(self):
@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
utils.load_entire_model(self, self.pretrained)
def conv_bn(inp, oup, kernel, stride):
return nn.Sequential(
nn.Conv2D(
in_channels=inp,
out_channels=oup,
kernel_size=kernel,
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
name=None,
use_cudnn=True):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
param_attr=ParamAttr(name=name + "_bn_scale"),
bias_attr=ParamAttr(name=name + "_bn_offset"),
moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance")
def forward(self, inputs, if_act=True):
y = self._conv(inputs)
y = self._batch_norm(y)
if if_act:
y = F.relu6(y)
return y
class InvertedResidualUnit(nn.Layer):
def __init__(self, num_channels, num_in_filter, num_filters, stride,
filter_size, padding, expansion_factor, name):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_expand")
self._bottleneck_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=(kernel - 1) // 2,
bias_attr=False),
nn.BatchNorm2D(
num_features=oup, epsilon=1e-05, momentum=0.1),
nn.ReLU())
class InvertedResidual(nn.Layer):
def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
self.use_res_connect = self.stride == 1 and inp == oup
self.conv = nn.Sequential(
nn.Conv2D(
inp,
inp * expand_ratio,
kernel_size=1,
stride=1,
padding=0,
dilation=1,
groups=1,
bias_attr=False),
nn.BatchNorm2D(
num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
nn.ReLU(),
nn.Conv2D(
inp * expand_ratio,
inp * expand_ratio,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=inp * expand_ratio,
bias_attr=False),
nn.BatchNorm2D(
num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
nn.ReLU(),
nn.Conv2D(
inp * expand_ratio,
oup,
kernel_size=1,
stride=1,
padding=0,
dilation=1,
groups=1,
bias_attr=False),
nn.BatchNorm2D(
num_features=oup, epsilon=1e-05, momentum=0.1), )
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
padding=padding,
num_groups=num_expfilter,
use_cudnn=False,
name=name + "_dwise")
self._linear_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_linear")
def forward(self, inputs, ifshortcut):
y = self._expand_conv(inputs, if_act=True)
y = self._bottleneck_conv(y, if_act=True)
y = self._linear_conv(y, if_act=False)
if ifshortcut:
y = paddle.add(inputs, y)
return y
class InvresiBlocks(nn.Layer):
def __init__(self, in_c, t, c, n, s, name):
super(InvresiBlocks, self).__init__()
self._first_block = InvertedResidualUnit(
num_channels=in_c,
num_in_filter=in_c,
num_filters=c,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_1")
self._block_list = []
for i in range(1, n):
block = self.add_sublayer(
name + "_" + str(i + 1),
sublayer=InvertedResidualUnit(
num_channels=c,
num_in_filter=c,
num_filters=c,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_" + str(i + 1)))
self._block_list.append(block)
def forward(self, inputs):
y = self._first_block(inputs, ifshortcut=False)
for block in self._block_list:
y = block(y, ifshortcut=True)
return y
@manager.BACKBONES.add_component
def MobileNetV2_x0_25(**kwargs):
model = MobileNetV2(scale=0.25, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x0_5(**kwargs):
model = MobileNetV2(scale=0.5, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x0_75(**kwargs):
model = MobileNetV2(scale=0.75, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x1_0(**kwargs):
model = MobileNetV2(scale=1.0, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x1_5(**kwargs):
model = MobileNetV2(scale=1.5, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV2_x2_0(**kwargs):
model = MobileNetV2(scale=2.0, **kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -14,11 +14,13 @@
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils, logger
from paddleseg.models import layers
__all__ = [
"MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
@ -28,8 +30,92 @@ __all__ = [
"MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
]
def make_divisible(v, divisor=8, min_value=None):
MODEL_STAGES_PATTERN = {
"MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
"MobileNetV3_large":
["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
}
# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
# k: kernel_size
# exp: middle channel number in depthwise block
# c: output channel number in depthwise block
# se: whether to use SE block
# act: which activation to use
# s: stride in depthwise block
# d: dilation rate in depthwise block
NET_CONFIG = {
"large": [
# k, exp, c, se, act, s
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # x4
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # x8
[3, 240, 80, False, "hardswish", 2],
[3, 200, 80, False, "hardswish", 1],
[3, 184, 80, False, "hardswish", 1],
[3, 184, 80, False, "hardswish", 1],
[3, 480, 112, True, "hardswish", 1],
[3, 672, 112, True, "hardswish", 1], # x16
[5, 672, 160, True, "hardswish", 2],
[5, 960, 160, True, "hardswish", 1],
[5, 960, 160, True, "hardswish", 1], # x32
],
"small": [
# k, exp, c, se, act, s
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hardswish", 2],
[5, 240, 40, True, "hardswish", 1],
[5, 240, 40, True, "hardswish", 1],
[5, 120, 48, True, "hardswish", 1],
[5, 144, 48, True, "hardswish", 1],
[5, 288, 96, True, "hardswish", 2],
[5, 576, 96, True, "hardswish", 1],
[5, 576, 96, True, "hardswish", 1],
],
"large_os8": [
# k, exp, c, se, act, s, {d}
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # x4
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # x8
[3, 240, 80, False, "hardswish", 1],
[3, 200, 80, False, "hardswish", 1, 2],
[3, 184, 80, False, "hardswish", 1, 2],
[3, 184, 80, False, "hardswish", 1, 2],
[3, 480, 112, True, "hardswish", 1, 2],
[3, 672, 112, True, "hardswish", 1, 2],
[5, 672, 160, True, "hardswish", 1, 2],
[5, 960, 160, True, "hardswish", 1, 4],
[5, 960, 160, True, "hardswish", 1, 4],
],
"small_os8": [
# k, exp, c, se, act, s, {d}
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hardswish", 1],
[5, 240, 40, True, "hardswish", 1, 2],
[5, 240, 40, True, "hardswish", 1, 2],
[5, 120, 48, True, "hardswish", 1, 2],
[5, 144, 48, True, "hardswish", 1, 2],
[5, 288, 96, True, "hardswish", 1, 2],
[5, 576, 96, True, "hardswish", 1, 4],
[5, 576, 96, True, "hardswish", 1, 4],
]
}
OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
def _make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
return new_v
class MobileNetV3(nn.Layer):
"""
The MobileNetV3 implementation based on PaddlePaddle.
def _create_act(act):
if act == "hardswish":
return nn.Hardswish()
elif act == "relu":
return nn.ReLU()
elif act is None:
return None
else:
raise RuntimeError(
"The activation function is not supported: {}".format(act))
The original article refers to Jingdong
Andrew Howard, et, al. "Searching for MobileNetV3"
(https://arxiv.org/pdf/1905.02244.pdf).
class MobileNetV3(nn.Layer):
"""
MobileNetV3
Args:
pretrained (str, optional): The path of pretrained model.
scale (float, optional): The scale of channels . Default: 1.0.
model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
config: list. MobileNetV3 depthwise blocks config.
in_channels (int, optional): The channels of input image. Default: 3.
scale: float=1.0. The coefficient that controls the size of network parameters.
Returns:
model: nn.Layer. Specific MobileNetV3 model depends on args.
"""
def __init__(self,
pretrained=None,
config,
stages_pattern,
out_index,
in_channels=3,
scale=1.0,
model_name="small",
output_stride=None):
super(MobileNetV3, self).__init__()
pretrained=None):
super().__init__()
self.cfg = config
self.out_index = out_index
self.scale = scale
self.pretrained = pretrained
inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # output 1 -> out_index=2
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # output 2 -> out_index=5
[3, 240, 80, False, "hard_swish", 2],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish",
1], # output 3 -> out_index=11
[5, 672, 160, True, "hard_swish", 2],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish",
1], # output 3 -> out_index=14
]
self.out_indices = [2, 5, 11, 14]
self.feat_channels = [
make_divisible(i * scale) for i in [24, 40, 112, 160]
]
self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, "relu", 2], # output 1 -> out_index=0
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1], # output 2 -> out_index=3
[5, 96, 40, True, "hard_swish", 2],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7
[5, 288, 96, True, "hard_swish", 2],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10
]
self.out_indices = [0, 3, 7, 10]
self.feat_channels = [
make_divisible(i * scale) for i in [16, 24, 48, 96]
]
self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280
else:
raise NotImplementedError(
"mode[{}_model] is not implemented!".format(model_name))
###################################################
# modify stride and dilation based on output_stride
self.dilation_cfg = [1] * len(self.cfg)
self.modify_bottle_params(output_stride=output_stride)
###################################################
self.conv1 = ConvBNLayer(
in_c=3,
out_c=make_divisible(inplanes * scale),
self.conv = ConvBNLayer(
in_c=in_channels,
out_c=_make_divisible(inplanes * self.scale),
filter_size=3,
stride=2,
padding=1,
num_groups=1,
if_act=True,
act="hard_swish")
self.block_list = []
inplanes = make_divisible(inplanes * scale)
for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
######################################
# add dilation rate
dilation_rate = self.dilation_cfg[i]
######################################
self.block_list.append(
ResidualUnit(
in_c=inplanes,
mid_c=make_divisible(scale * exp),
out_c=make_divisible(scale * c),
filter_size=k,
stride=s,
dilation=dilation_rate,
use_se=se,
act=nl,
name="conv" + str(i + 2)))
self.add_sublayer(
sublayer=self.block_list[-1], name="conv" + str(i + 2))
inplanes = make_divisible(scale * c)
self.pretrained = pretrained
act="hardswish")
self.blocks = nn.Sequential(*[
ResidualUnit(
in_c=_make_divisible(inplanes * self.scale if i == 0 else
self.cfg[i - 1][2] * self.scale),
mid_c=_make_divisible(self.scale * exp),
out_c=_make_divisible(self.scale * c),
filter_size=k,
stride=s,
use_se=se,
act=act,
dilation=td[0] if td else 1)
for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
])
out_channels = [config[idx][2] for idx in self.out_index]
self.feat_channels = [
_make_divisible(self.scale * c) for c in out_channels
]
self.init_res(stages_pattern)
self.init_weight()
def modify_bottle_params(self, output_stride=None):
if output_stride is not None and output_stride % 2 != 0:
raise ValueError("output stride must to be even number")
if output_stride is not None:
stride = 2
rate = 1
for i, _cfg in enumerate(self.cfg):
stride = stride * _cfg[-1]
if stride > output_stride:
rate = rate * _cfg[-1]
self.cfg[i][-1] = 1
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def init_res(self, stages_pattern, return_patterns=None,
return_stages=None):
if return_patterns and return_stages:
msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
logger.warning(msg)
return_stages = None
if return_stages is True:
return_patterns = stages_pattern
# return_stages is int or bool
if type(return_stages) is int:
return_stages = [return_stages]
if isinstance(return_stages, list):
if max(return_stages) > len(stages_pattern) or min(
return_stages) < 0:
msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
logger.warning(msg)
return_stages = [
val for val in return_stages
if val >= 0 and val < len(stages_pattern)
]
return_patterns = [stages_pattern[i] for i in return_stages]
self.dilation_cfg[i] = rate
def forward(self, x):
x = self.conv(x)
def forward(self, inputs, label=None):
x = self.conv1(inputs)
# A feature list saves each downsampling feature.
feat_list = []
for i, block in enumerate(self.block_list):
for idx, block in enumerate(self.blocks):
x = block(x)
if i in self.out_indices:
if idx in self.out_index:
feat_list.append(x)
return feat_list
def init_weight(self):
if self.pretrained is not None:
utils.load_pretrained_model(self, self.pretrained)
class ConvBNLayer(nn.Layer):
def __init__(self,
@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
filter_size,
stride,
padding,
dilation=1,
num_groups=1,
if_act=True,
act=None):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
act=None,
dilation=1):
super().__init__()
self.conv = nn.Conv2D(
self.conv = Conv2D(
in_channels=in_c,
out_channels=out_c,
kernel_size=filter_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=num_groups,
bias_attr=False)
self.bn = layers.SyncBatchNorm(
num_features=out_c,
weight_attr=paddle.ParamAttr(
regularizer=paddle.regularizer.L2Decay(0.0)),
bias_attr=paddle.ParamAttr(
regularizer=paddle.regularizer.L2Decay(0.0)))
self._act_op = layers.Activation(act='hardswish')
bias_attr=False,
dilation=dilation)
self.bn = BatchNorm(
num_channels=out_c,
act=None,
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.if_act = if_act
self.act = _create_act(act)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
x = self._act_op(x)
x = self.act(x)
return x
@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
filter_size,
stride,
use_se,
dilation=1,
act=None,
name=''):
super(ResidualUnit, self).__init__()
dilation=1):
super().__init__()
self.if_shortcut = stride == 1 and in_c == out_c
self.if_se = use_se
@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
padding=0,
if_act=True,
act=act)
self.bottleneck_conv = ConvBNLayer(
in_c=mid_c,
out_c=mid_c,
filter_size=filter_size,
stride=stride,
padding='same',
dilation=dilation,
padding=int((filter_size - 1) // 2) * dilation,
num_groups=mid_c,
if_act=True,
act=act)
act=act,
dilation=dilation)
if self.if_se:
self.mid_se = SEModule(mid_c, name=name + "_se")
self.mid_se = SEModule(mid_c)
self.linear_conv = ConvBNLayer(
in_c=mid_c,
out_c=out_c,
@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
padding=0,
if_act=False,
act=None)
self.dilation = dilation
def forward(self, inputs):
x = self.expand_conv(inputs)
def forward(self, x):
identity = x
x = self.expand_conv(x)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = inputs + x
x = paddle.add(identity, x)
return x
# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
class Hardsigmoid(nn.Layer):
def __init__(self, slope=0.2, offset=0.5):
super().__init__()
self.slope = slope
self.offset = offset
def forward(self, x):
return nn.functional.hardsigmoid(
x, slope=self.slope, offset=self.offset)
class SEModule(nn.Layer):
def __init__(self, channel, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.conv1 = nn.Conv2D(
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.conv2 = nn.Conv2D(
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = F.hardsigmoid(outputs)
return paddle.multiply(x=inputs, y=outputs)
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
return paddle.multiply(x=identity, y=x)
@manager.BACKBONES.add_component
def MobileNetV3_small_x0_35(**kwargs):
model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.35,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x0_5(**kwargs):
model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x0_75(**kwargs):
model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_0(**kwargs):
model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_25(**kwargs):
model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=1.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x0_35(**kwargs):
model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.35,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x0_5(**kwargs):
model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x0_75(**kwargs):
model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_0(**kwargs):
model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_25(**kwargs):
model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=1.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_0_os8(**kwargs):
model = MobileNetV3(
config=NET_CONFIG["large_os8"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
out_index=OUT_INDEX["large"],
**kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_0_os8(**kwargs):
model = MobileNetV3(
config=NET_CONFIG["small_os8"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
out_index=OUT_INDEX["small"],
**kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
__all__ = [
"ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path of pretrained model.
"""
def __init__(self,
input_channel=3,
layers=50,
output_stride=8,
multi_grid=(1, 1, 1),
in_channels=3,
pretrained=None,
data_format='NCHW'):
super(ResNet_vd, self).__init__()
@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
dilation_dict = {3: 2}
self.conv1_1 = ConvBNLayer(
in_channels=input_channel,
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,

@ -0,0 +1,315 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr, reshape, transpose, concat, split
from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
from paddle.nn.initializer import KaimingNormal
from paddle.nn.functional import swish
from paddleseg.cvlibs import manager
from paddleseg.utils import utils, logger
__all__ = [
'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
'ShuffleNetV2_swish'
]
def channel_shuffle(x, groups):
x_shape = paddle.shape(x)
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
num_channels = x.shape[1]
channels_per_group = num_channels // groups
# reshape
x = reshape(
x=x, shape=[batch_size, groups, channels_per_group, height, width])
# transpose
x = transpose(x=x, perm=[0, 2, 1, 3, 4])
# flatten
x = reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
class ConvBNLayer(Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(
initializer=KaimingNormal(), name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
out_channels,
param_attr=ParamAttr(name=name + "_bn_scale"),
bias_attr=ParamAttr(name=name + "_bn_offset"),
act=act,
moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance")
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class InvertedResidual(Layer):
def __init__(self, in_channels, out_channels, stride, act="relu",
name=None):
super(InvertedResidual, self).__init__()
self._conv_pw = ConvBNLayer(
in_channels=in_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv1')
self._conv_dw = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=out_channels // 2,
act=None,
name='stage_' + name + '_conv2')
self._conv_linear = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv3')
def forward(self, inputs):
x1, x2 = split(
inputs,
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
axis=1)
x2 = self._conv_pw(x2)
x2 = self._conv_dw(x2)
x2 = self._conv_linear(x2)
out = concat([x1, x2], axis=1)
return channel_shuffle(out, 2)
class InvertedResidualDS(Layer):
def __init__(self, in_channels, out_channels, stride, act="relu",
name=None):
super(InvertedResidualDS, self).__init__()
# branch1
self._conv_dw_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=3,
stride=stride,
padding=1,
groups=in_channels,
act=None,
name='stage_' + name + '_conv4')
self._conv_linear_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv5')
# branch2
self._conv_pw_2 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv1')
self._conv_dw_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=stride,
padding=1,
groups=out_channels // 2,
act=None,
name='stage_' + name + '_conv2')
self._conv_linear_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1,
padding=0,
groups=1,
act=act,
name='stage_' + name + '_conv3')
def forward(self, inputs):
x1 = self._conv_dw_1(inputs)
x1 = self._conv_linear_1(x1)
x2 = self._conv_pw_2(inputs)
x2 = self._conv_dw_2(x2)
x2 = self._conv_linear_2(x2)
out = concat([x1, x2], axis=1)
return channel_shuffle(out, 2)
class ShuffleNet(Layer):
def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
super(ShuffleNet, self).__init__()
self.scale = scale
self.pretrained = pretrained
stage_repeats = [4, 8, 4]
if scale == 0.25:
stage_out_channels = [-1, 24, 24, 48, 96, 512]
elif scale == 0.33:
stage_out_channels = [-1, 24, 32, 64, 128, 512]
elif scale == 0.5:
stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif scale == 1.0:
stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise NotImplementedError("This scale size:[" + str(scale) +
"] is not implemented!")
self.out_index = [3, 11, 15]
self.feat_channels = stage_out_channels[1:5]
# 1. conv1
self._conv1 = ConvBNLayer(
in_channels=in_channels,
out_channels=stage_out_channels[1],
kernel_size=3,
stride=2,
padding=1,
act=act,
name='stage1_conv')
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
# 2. bottleneck sequences
self._block_list = []
for stage_id, num_repeat in enumerate(stage_repeats):
for i in range(num_repeat):
if i == 0:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidualDS(
in_channels=stage_out_channels[stage_id + 1],
out_channels=stage_out_channels[stage_id + 2],
stride=2,
act=act,
name=str(stage_id + 2) + '_' + str(i + 1)))
else:
block = self.add_sublayer(
name=str(stage_id + 2) + '_' + str(i + 1),
sublayer=InvertedResidual(
in_channels=stage_out_channels[stage_id + 2],
out_channels=stage_out_channels[stage_id + 2],
stride=1,
act=act,
name=str(stage_id + 2) + '_' + str(i + 1)))
self._block_list.append(block)
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, inputs):
feat_list = []
y = self._conv1(inputs)
y = self._max_pool(y)
feat_list.append(y)
for idx, inv in enumerate(self._block_list):
y = inv(y)
if idx in self.out_index:
feat_list.append(y)
return feat_list
@manager.BACKBONES.add_component
def ShuffleNetV2_x0_25(**kwargs):
model = ShuffleNet(scale=0.25, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x0_33(**kwargs):
model = ShuffleNet(scale=0.33, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x0_5(**kwargs):
model = ShuffleNet(scale=0.5, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x1_0(**kwargs):
model = ShuffleNet(scale=1.0, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x1_5(**kwargs):
model = ShuffleNet(scale=1.5, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_x2_0(**kwargs):
model = ShuffleNet(scale=2.0, **kwargs)
return model
@manager.BACKBONES.add_component
def ShuffleNetV2_swish(**kwargs):
model = ShuffleNet(scale=1.0, act="swish", **kwargs)
return model

@ -17,9 +17,9 @@ import math
import paddle
import paddle.nn as nn
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm
from paddleseg.utils import utils
from paddleseg.cvlibs import manager, param_init
from paddleseg.models.layers.layer_libs import SyncBatchNorm
__all__ = ["STDC1", "STDC2"]
@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
block_num(int,optional): block_num of features block. Default: 4.
type(str,optional): feature fusion method "cat"/"add". Default: "cat".
num_classes(int, optional): class number for image classification. Default: 1000.
dropout(float,optional): dropout ratio. if >0,use dropout ratio. Default: 0.20.
use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective
learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained(str, optional): the path of pretrained model.
"""
@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
layers=[4, 5, 3],
block_num=4,
type="cat",
num_classes=1000,
dropout=0.20,
use_conv_last=False,
relative_lr=1.0,
in_channels=3,
pretrained=None):
super(STDCNet, self).__init__()
if type == "cat":
block = CatBottleneck
elif type == "add":
block = AddBottleneck
self.use_conv_last = use_conv_last
self.features = self._make_layers(base, layers, block_num, block)
self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
if (layers == [4, 5, 3]): #stdc1446
self.x2 = nn.Sequential(self.features[:1])
self.x4 = nn.Sequential(self.features[1:2])
self.x8 = nn.Sequential(self.features[2:6])
self.x16 = nn.Sequential(self.features[6:11])
self.x32 = nn.Sequential(self.features[11:])
elif (layers == [2, 2, 2]): #stdc813
self.x2 = nn.Sequential(self.features[:1])
self.x4 = nn.Sequential(self.features[1:2])
self.x8 = nn.Sequential(self.features[2:4])
self.x16 = nn.Sequential(self.features[4:6])
self.x32 = nn.Sequential(self.features[6:])
else:
raise NotImplementedError(
"model with layers:{} is not implemented!".format(layers))
self.layers = layers
self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
self.features = self._make_layers(in_channels, base, layers, block_num,
block, relative_lr)
self.pretrained = pretrained
self.init_weight()
@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
"""
forward function for feature extract.
"""
feat2 = self.x2(x)
feat4 = self.x4(feat2)
feat8 = self.x8(feat4)
feat16 = self.x16(feat8)
feat32 = self.x32(feat16)
if self.use_conv_last:
feat32 = self.conv_last(feat32)
return feat2, feat4, feat8, feat16, feat32
def _make_layers(self, base, layers, block_num, block):
out_feats = []
x = self.features[0](x)
out_feats.append(x)
x = self.features[1](x)
out_feats.append(x)
idx = [[2, 2 + self.layers[0]],
[2 + self.layers[0], 2 + sum(self.layers[0:2])],
[2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
for start_idx, end_idx in idx:
for i in range(start_idx, end_idx):
x = self.features[i](x)
out_feats.append(x)
return out_feats
def _make_layers(self, in_channels, base, layers, block_num, block,
relative_lr):
features = []
features += [ConvBNRelu(3, base // 2, 3, 2)]
features += [ConvBNRelu(base // 2, base, 3, 2)]
features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]
for i, layer in enumerate(layers):
for j in range(layer):
if i == 0 and j == 0:
features.append(block(base, base * 4, block_num, 2))
features.append(
block(base, base * 4, block_num, 2, relative_lr))
elif j == 0:
features.append(
block(base * int(math.pow(2, i + 1)), base * int(
math.pow(2, i + 2)), block_num, 2))
math.pow(2, i + 2)), block_num, 2, relative_lr))
else:
features.append(
block(base * int(math.pow(2, i + 2)), base * int(
math.pow(2, i + 2)), block_num, 1))
math.pow(2, i + 2)), block_num, 1, relative_lr))
return nn.Sequential(*features)
@ -125,16 +119,24 @@ class STDCNet(nn.Layer):
class ConvBNRelu(nn.Layer):
def __init__(self, in_planes, out_planes, kernel=3, stride=1):
def __init__(self,
in_planes,
out_planes,
kernel=3,
stride=1,
relative_lr=1.0):
super(ConvBNRelu, self).__init__()
param_attr = paddle.ParamAttr(learning_rate=relative_lr)
self.conv = nn.Conv2D(
in_planes,
out_planes,
kernel_size=kernel,
stride=stride,
padding=kernel // 2,
weight_attr=param_attr,
bias_attr=False)
self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
self.bn = nn.BatchNorm2D(
out_planes, weight_attr=param_attr, bias_attr=param_attr)
self.relu = nn.ReLU()
def forward(self, x):
@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):
class AddBottleneck(nn.Layer):
def __init__(self, in_planes, out_planes, block_num=3, stride=1):
def __init__(self,
in_planes,
out_planes,
block_num=3,
stride=1,
relative_lr=1.0):
super(AddBottleneck, self).__init__()
assert block_num > 1, "block number should be larger than 1."
self.conv_list = nn.LayerList()
self.stride = stride
param_attr = paddle.ParamAttr(learning_rate=relative_lr)
if stride == 2:
self.avd_layer = nn.Sequential(
nn.Conv2D(
@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
stride=2,
padding=1,
groups=out_planes // 2,
weight_attr=param_attr,
bias_attr=False),
nn.BatchNorm2D(out_planes // 2), )
nn.BatchNorm2D(
out_planes // 2,
weight_attr=param_attr,
bias_attr=param_attr), )
self.skip = nn.Sequential(
nn.Conv2D(
in_planes,
@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
stride=2,
padding=1,
groups=in_planes,
weight_attr=param_attr,
bias_attr=False),
nn.BatchNorm2D(in_planes),
nn.BatchNorm2D(
in_planes, weight_attr=param_attr, bias_attr=param_attr),
nn.Conv2D(
in_planes, out_planes, kernel_size=1, bias_attr=False),
nn.BatchNorm2D(out_planes), )
in_planes,
out_planes,
kernel_size=1,
bias_attr=False,
weight_attr=param_attr),
nn.BatchNorm2D(
out_planes, weight_attr=param_attr, bias_attr=param_attr), )
stride = 1
for idx in range(block_num):
if idx == 0:
self.conv_list.append(
ConvBNRelu(
in_planes, out_planes // 2, kernel=1))
in_planes,
out_planes // 2,
kernel=1,
relative_lr=relative_lr))
elif idx == 1 and block_num == 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 2, stride=stride))
out_planes // 2,
out_planes // 2,
stride=stride,
relative_lr=relative_lr))
elif idx == 1 and block_num > 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 4, stride=stride))
out_planes // 2,
out_planes // 4,
stride=stride,
relative_lr=relative_lr))
elif idx < block_num - 1:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx + 1))))
ConvBNRelu(
out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx + 1)),
relative_lr=relative_lr))
else:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx))))
ConvBNRelu(out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx))),
relative_lr=relative_lr)
def forward(self, x):
out_list = []
@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):
class CatBottleneck(nn.Layer):
def __init__(self, in_planes, out_planes, block_num=3, stride=1):
def __init__(self,
in_planes,
out_planes,
block_num=3,
stride=1,
relative_lr=1.0):
super(CatBottleneck, self).__init__()
assert block_num > 1, "block number should be larger than 1."
self.conv_list = nn.LayerList()
self.stride = stride
param_attr = paddle.ParamAttr(learning_rate=relative_lr)
if stride == 2:
self.avd_layer = nn.Sequential(
nn.Conv2D(
@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
stride=2,
padding=1,
groups=out_planes // 2,
weight_attr=param_attr,
bias_attr=False),
nn.BatchNorm2D(out_planes // 2), )
nn.BatchNorm2D(
out_planes // 2,
weight_attr=param_attr,
bias_attr=param_attr), )
self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
stride = 1
@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
if idx == 0:
self.conv_list.append(
ConvBNRelu(
in_planes, out_planes // 2, kernel=1))
in_planes,
out_planes // 2,
kernel=1,
relative_lr=relative_lr))
elif idx == 1 and block_num == 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 2, stride=stride))
out_planes // 2,
out_planes // 2,
stride=stride,
relative_lr=relative_lr))
elif idx == 1 and block_num > 2:
self.conv_list.append(
ConvBNRelu(
out_planes // 2, out_planes // 4, stride=stride))
out_planes // 2,
out_planes // 4,
stride=stride,
relative_lr=relative_lr))
elif idx < block_num - 1:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx + 1))))
ConvBNRelu(
out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx + 1)),
relative_lr=relative_lr))
else:
self.conv_list.append(
ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
// int(math.pow(2, idx))))
ConvBNRelu(
out_planes // int(math.pow(2, idx)),
out_planes // int(math.pow(2, idx)),
relative_lr=relative_lr))
def forward(self, x):
out_list = []

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -17,9 +17,9 @@ import paddle.nn as nn
import paddle.nn.functional as F
import numpy as np
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.models.backbones.transformer_utils import *
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
from paddleseg.models.backbones.transformer_utils import *
class Mlp(nn.Layer):
@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
Args:
pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
patch_size (int | tuple(int)): Patch size. Default: 4.
in_chans (int): Number of input image channels. Default: 3.
in_channels (int): Number of input image channels. Default: 3.
embed_dim (int): Number of linear projection output channels. Default: 96.
depths (tuple[int]): Depths of each Swin Transformer stage.
num_heads (tuple[int]): Number of attention head of each stage.
@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
def __init__(self,
pretrain_img_size=224,
patch_size=4,
in_chans=3,
in_channels=3,
embed_dim=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
# split image into non-overlapping patches
self.patch_embed = PatchEmbed(
patch_size=patch_size,
in_chans=in_chans,
in_chans=in_channels,
embed_dim=embed_dim,
norm_layer=norm_layer if self.patch_norm else None)

@ -0,0 +1,716 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
"""
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager
from paddleseg import utils
from paddleseg.models.backbones.transformer_utils import Identity, DropPath
__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
def make_divisible(val, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * val:
new_v += divisor
return new_v
class HSigmoid(nn.Layer):
def __init__(self, inplace=True):
super().__init__()
self.relu = nn.ReLU6()
def forward(self, x):
return self.relu(x + 3) / 6
class Conv2DBN(nn.Layer):
def __init__(self,
in_channels,
out_channels,
ks=1,
stride=1,
pad=0,
dilation=1,
groups=1,
bn_weight_init=1,
lr_mult=1.0):
super().__init__()
conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
self.c = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=ks,
stride=stride,
padding=pad,
dilation=dilation,
groups=groups,
weight_attr=conv_weight_attr,
bias_attr=False)
bn_weight_attr = paddle.ParamAttr(
initializer=nn.initializer.Constant(bn_weight_init),
learning_rate=lr_mult)
bn_bias_attr = paddle.ParamAttr(
initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
self.bn = nn.BatchNorm2D(
out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
def forward(self, inputs):
out = self.c(inputs)
out = self.bn(out)
return out
class ConvBNAct(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1,
norm=nn.BatchNorm2D,
act=None,
bias_attr=False,
lr_mult=1.0):
super(ConvBNAct, self).__init__()
param_attr = paddle.ParamAttr(learning_rate=lr_mult)
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=param_attr,
bias_attr=param_attr if bias_attr else False)
self.act = act() if act is not None else Identity()
self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
if norm is not None else Identity()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.act(x)
return x
class MLP(nn.Layer):
def __init__(self,
in_features,
hidden_features=None,
out_features=None,
act_layer=nn.ReLU,
drop=0.,
lr_mult=1.0):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
param_attr = paddle.ParamAttr(learning_rate=lr_mult)
self.dwconv = nn.Conv2D(
hidden_features,
hidden_features,
3,
1,
1,
groups=hidden_features,
weight_attr=param_attr,
bias_attr=param_attr)
self.act = act_layer()
self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.dwconv(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class InvertedResidual(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
expand_ratio,
activations=None,
lr_mult=1.0):
super(InvertedResidual, self).__init__()
assert stride in [1, 2], "The stride should be 1 or 2."
if activations is None:
activations = nn.ReLU
hidden_dim = int(round(in_channels * expand_ratio))
self.use_res_connect = stride == 1 and in_channels == out_channels
layers = []
if expand_ratio != 1:
layers.append(
Conv2DBN(
in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
layers.append(activations())
layers.extend([
Conv2DBN(
hidden_dim,
hidden_dim,
ks=kernel_size,
stride=stride,
pad=kernel_size // 2,
groups=hidden_dim,
lr_mult=lr_mult), activations(), Conv2DBN(
hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
])
self.conv = nn.Sequential(*layers)
self.out_channels = out_channels
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class TokenPyramidModule(nn.Layer):
def __init__(self,
cfgs,
out_indices,
in_channels=3,
inp_channel=16,
activation=nn.ReLU,
width_mult=1.,
lr_mult=1.):
super().__init__()
self.out_indices = out_indices
self.stem = nn.Sequential(
Conv2DBN(
in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
activation())
self.layers = []
for i, (k, t, c, s) in enumerate(cfgs):
output_channel = make_divisible(c * width_mult, 8)
exp_size = t * inp_channel
exp_size = make_divisible(exp_size * width_mult, 8)
layer_name = 'layer{}'.format(i + 1)
layer = InvertedResidual(
inp_channel,
output_channel,
kernel_size=k,
stride=s,
expand_ratio=t,
activations=activation,
lr_mult=lr_mult)
self.add_sublayer(layer_name, layer)
self.layers.append(layer_name)
inp_channel = output_channel
def forward(self, x):
outs = []
x = self.stem(x)
for i, layer_name in enumerate(self.layers):
layer = getattr(self, layer_name)
x = layer(x)
if i in self.out_indices:
outs.append(x)
return outs
class Attention(nn.Layer):
def __init__(self,
dim,
key_dim,
num_heads,
attn_ratio=4,
activation=None,
lr_mult=1.0):
super().__init__()
self.num_heads = num_heads
self.scale = key_dim**-0.5
self.key_dim = key_dim
self.nh_kd = nh_kd = key_dim * num_heads
self.d = int(attn_ratio * key_dim)
self.dh = int(attn_ratio * key_dim) * num_heads
self.attn_ratio = attn_ratio
self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
self.proj = nn.Sequential(
activation(),
Conv2DBN(
self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
def forward(self, x):
x_shape = paddle.shape(x)
H, W = x_shape[2], x_shape[3]
qq = self.to_q(x).reshape(
[0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
[0, 1, 3, 2])
attn = paddle.matmul(qq, kk)
attn = F.softmax(attn, axis=-1)
xx = paddle.matmul(attn, vv)
xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
xx = self.proj(xx)
return xx
class Block(nn.Layer):
def __init__(self,
dim,
key_dim,
num_heads,
mlp_ratios=4.,
attn_ratio=2.,
drop=0.,
drop_path=0.,
act_layer=nn.ReLU,
lr_mult=1.0):
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.mlp_ratios = mlp_ratios
self.attn = Attention(
dim,
key_dim=key_dim,
num_heads=num_heads,
attn_ratio=attn_ratio,
activation=act_layer,
lr_mult=lr_mult)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
mlp_hidden_dim = int(dim * mlp_ratios)
self.mlp = MLP(in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
lr_mult=lr_mult)
def forward(self, x):
h = x
x = self.attn(x)
x = self.drop_path(x)
x = h + x
h = x
x = self.mlp(x)
x = self.drop_path(x)
x = x + h
return x
class BasicLayer(nn.Layer):
def __init__(self,
block_num,
embedding_dim,
key_dim,
num_heads,
mlp_ratios=4.,
attn_ratio=2.,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=None,
lr_mult=1.0):
super().__init__()
self.block_num = block_num
self.transformer_blocks = nn.LayerList()
for i in range(self.block_num):
self.transformer_blocks.append(
Block(
embedding_dim,
key_dim=key_dim,
num_heads=num_heads,
mlp_ratios=mlp_ratios,
attn_ratio=attn_ratio,
drop=drop,
drop_path=drop_path[i]
if isinstance(drop_path, list) else drop_path,
act_layer=act_layer,
lr_mult=lr_mult))
def forward(self, x):
# token * N
for i in range(self.block_num):
x = self.transformer_blocks[i](x)
return x
class PyramidPoolAgg(nn.Layer):
def __init__(self, stride):
super().__init__()
self.stride = stride
self.tmp = Identity() # avoid the error of paddle.flops
def forward(self, inputs):
'''
# The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
# so exporting the inference model will raise error.
_, _, H, W = inputs[-1].shape
H = (H - 1) // self.stride + 1
W = (W - 1) // self.stride + 1
return paddle.concat(
[F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
'''
out = []
ks = 2**len(inputs)
stride = self.stride**len(inputs)
for x in inputs:
x = F.avg_pool2d(x, int(ks), int(stride))
ks /= 2
stride /= 2
out.append(x)
out = paddle.concat(out, axis=1)
return out
class InjectionMultiSum(nn.Layer):
def __init__(self, in_channels, out_channels, activations=None,
lr_mult=1.0):
super(InjectionMultiSum, self).__init__()
self.local_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
self.global_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
self.global_act = ConvBNAct(
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
self.act = HSigmoid()
def forward(self, x_low, x_global):
xl_hw = paddle.shape(x_low)[2:]
local_feat = self.local_embedding(x_low)
global_act = self.global_act(x_global)
sig_act = F.interpolate(
self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
global_feat = self.global_embedding(x_global)
global_feat = F.interpolate(
global_feat, xl_hw, mode='bilinear', align_corners=False)
out = local_feat * sig_act + global_feat
return out
class InjectionMultiSumCBR(nn.Layer):
def __init__(self, in_channels, out_channels, activations=None):
'''
local_embedding: conv-bn-relu
global_embedding: conv-bn-relu
global_act: conv
'''
super(InjectionMultiSumCBR, self).__init__()
self.local_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1)
self.global_embedding = ConvBNAct(
in_channels, out_channels, kernel_size=1)
self.global_act = ConvBNAct(
in_channels, out_channels, kernel_size=1, norm=None, act=None)
self.act = HSigmoid()
def forward(self, x_low, x_global):
xl_hw = paddle.shape(x)[2:]
local_feat = self.local_embedding(x_low)
# kernel
global_act = self.global_act(x_global)
global_act = F.interpolate(
self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
# feat_h
global_feat = self.global_embedding(x_global)
global_feat = F.interpolate(
global_feat, xl_hw, mode='bilinear', align_corners=False)
out = local_feat * global_act + global_feat
return out
class FuseBlockSum(nn.Layer):
def __init__(self, in_channels, out_channels, activations=None):
super(FuseBlockSum, self).__init__()
self.fuse1 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
self.fuse2 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
def forward(self, x_low, x_high):
xl_hw = paddle.shape(x)[2:]
inp = self.fuse1(x_low)
kernel = self.fuse2(x_high)
feat_h = F.interpolate(
kernel, xl_hw, mode='bilinear', align_corners=False)
out = inp + feat_h
return out
class FuseBlockMulti(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
stride=1,
activations=None, ):
super(FuseBlockMulti, self).__init__()
assert stride in [1, 2], "The stride should be 1 or 2."
self.fuse1 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
self.fuse2 = ConvBNAct(
in_channels, out_channels, kernel_size=1, act=None)
self.act = HSigmoid()
def forward(self, x_low, x_high):
xl_hw = paddle.shape(x)[2:]
inp = self.fuse1(x_low)
sig_act = self.fuse2(x_high)
sig_act = F.interpolate(
self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
out = inp * sig_act
return out
SIM_BLOCK = {
"fuse_sum": FuseBlockSum,
"fuse_multi": FuseBlockMulti,
"multi_sum": InjectionMultiSum,
"multi_sum_cbr": InjectionMultiSumCBR,
}
class TopTransformer(nn.Layer):
def __init__(self,
cfgs,
injection_out_channels,
encoder_out_indices,
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=8,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="muli_sum",
injection=True,
lr_mult=1.0,
in_channels=3,
pretrained=None):
super().__init__()
self.feat_channels = [
c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
]
self.injection_out_channels = injection_out_channels
self.injection = injection
self.embed_dim = sum(self.feat_channels)
self.trans_out_indices = trans_out_indices
self.tpm = TokenPyramidModule(
cfgs=cfgs,
out_indices=encoder_out_indices,
in_channels=in_channels,
lr_mult=lr_mult)
self.ppa = PyramidPoolAgg(stride=c2t_stride)
dpr = [x.item() for x in \
paddle.linspace(0, drop_path_rate, depths)]
self.trans = BasicLayer(
block_num=depths,
embedding_dim=self.embed_dim,
key_dim=key_dim,
num_heads=num_heads,
mlp_ratios=mlp_ratios,
attn_ratio=attn_ratios,
drop=0,
attn_drop=0,
drop_path=dpr,
act_layer=act_layer,
lr_mult=lr_mult)
self.SIM = nn.LayerList()
inj_module = SIM_BLOCK[injection_type]
if self.injection:
for i in range(len(self.feat_channels)):
if i in trans_out_indices:
self.SIM.append(
inj_module(
self.feat_channels[i],
injection_out_channels[i],
activations=act_layer,
lr_mult=lr_mult))
else:
self.SIM.append(Identity())
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, x):
ouputs = self.tpm(x)
out = self.ppa(ouputs)
out = self.trans(out)
if self.injection:
xx = out.split(self.feat_channels, axis=1)
results = []
for i in range(len(self.feat_channels)):
if i in self.trans_out_indices:
local_tokens = ouputs[i]
global_semantics = xx[i]
out_ = self.SIM[i](local_tokens, global_semantics)
results.append(out_)
return results
else:
ouputs.append(out)
return ouputs
@manager.BACKBONES.add_component
def TopTransformer_Base(**kwargs):
cfgs = [
# k, t, c, s
[3, 1, 16, 1], # 1/2
[3, 4, 32, 2], # 1/4 1
[3, 3, 32, 1], #
[5, 3, 64, 2], # 1/8 3
[5, 3, 64, 1], #
[3, 3, 128, 2], # 1/16 5
[3, 3, 128, 1], #
[5, 6, 160, 2], # 1/32 7
[5, 6, 160, 1], #
[3, 6, 160, 1], #
]
model = TopTransformer(
cfgs=cfgs,
injection_out_channels=[None, 256, 256, 256],
encoder_out_indices=[2, 4, 6, 9],
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=8,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="multi_sum",
injection=True,
**kwargs)
return model
@manager.BACKBONES.add_component
def TopTransformer_Small(**kwargs):
cfgs = [
# k, t, c, s
[3, 1, 16, 1], # 1/2
[3, 4, 24, 2], # 1/4 1
[3, 3, 24, 1], #
[5, 3, 48, 2], # 1/8 3
[5, 3, 48, 1], #
[3, 3, 96, 2], # 1/16 5
[3, 3, 96, 1], #
[5, 6, 128, 2], # 1/32 7
[5, 6, 128, 1], #
[3, 6, 128, 1], #
]
model = TopTransformer(
cfgs=cfgs,
injection_out_channels=[None, 192, 192, 192],
encoder_out_indices=[2, 4, 6, 9],
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=6,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="multi_sum",
injection=True,
**kwargs)
return model
@manager.BACKBONES.add_component
def TopTransformer_Tiny(**kwargs):
cfgs = [
# k, t, c, s
[3, 1, 16, 1], # 1/2
[3, 4, 16, 2], # 1/4 1
[3, 3, 16, 1], #
[5, 3, 32, 2], # 1/8 3
[5, 3, 32, 1], #
[3, 3, 64, 2], # 1/16 5
[3, 3, 64, 1], #
[5, 6, 96, 2], # 1/32 7
[5, 6, 96, 1], #
]
model = TopTransformer(
cfgs=cfgs,
injection_out_channels=[None, 128, 128, 128],
encoder_out_indices=[2, 4, 6, 8],
trans_out_indices=[1, 2, 3],
depths=4,
key_dim=16,
num_heads=4,
attn_ratios=2,
mlp_ratios=2,
c2t_stride=2,
drop_path_rate=0.,
act_layer=nn.ReLU6,
injection_type="multi_sum",
injection=True,
**kwargs)
return model

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
return x
keep_prob = paddle.to_tensor(1 - drop_prob)
shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
random_tensor = paddle.floor(random_tensor) # binarize
output = x.divide(keep_prob) * random_tensor
return output

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -20,9 +20,9 @@ import paddle.nn as nn
import paddle.nn.functional as F
import numpy as np
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils, logger
from paddlers.models.ppseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity
from paddleseg.cvlibs import manager
from paddleseg.utils import utils, logger
from paddleseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity
class Mlp(nn.Layer):
@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
in_channels=3,
embed_dim=768,
depth=12,
num_heads=12,
@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
self.patch_embed = PatchEmbed(
img_size=img_size,
patch_size=patch_size,
in_chans=in_chans,
in_chans=in_channels,
embed_dim=embed_dim)
self.pos_w = self.patch_embed.num_patches_in_w
self.pos_h = self.patch_embed.num_patches_in_h

@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,9 +15,9 @@
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
from paddleseg.models import layers
__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):
Args:
backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path of pretrained model.
output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
"""
def __init__(self, backbone, pretrained=None, output_stride=16):
def __init__(self,
backbone,
in_channels=3,
pretrained=None,
output_stride=16):
super(XceptionDeeplab, self).__init__()
@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
self.feat_channels = [128, 2048]
self._conv1 = ConvBNLayer(
3,
in_channels,
32,
3,
stride=2,

@ -18,9 +18,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddlers.models.ppseg.models import layers
from paddleseg import utils
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
@manager.MODELS.add_component
@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
num_classes,
lambd=0.25,
align_corners=False,
in_channels=3,
pretrained=None):
super().__init__()
@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
sb_channels = (C1, C3, C4, C5)
mid_channels = 128
self.db = DetailBranch(db_channels)
self.sb = SemanticBranch(sb_channels)
self.db = DetailBranch(in_channels, db_channels)
self.sb = SemanticBranch(in_channels, sb_channels)
self.bga = BGA(mid_channels, align_corners)
self.aux_head1 = SegHead(C1, C1, num_classes)
@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
class DetailBranch(nn.Layer):
"""The detail branch of BiSeNet, which has wide channels but shallow layers."""
def __init__(self, in_channels):
def __init__(self, in_channels, feature_channels):
super().__init__()
C1, C2, C3 = in_channels
C1, C2, C3 = feature_channels
self.convs = nn.Sequential(
# stage 1
layers.ConvBNReLU(
3, C1, 3, stride=2),
in_channels, C1, 3, stride=2),
layers.ConvBNReLU(C1, C1, 3),
# stage 2
layers.ConvBNReLU(
@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
class SemanticBranch(nn.Layer):
"""The semantic branch of BiSeNet, which has narrow channels but deep layers."""
def __init__(self, in_channels):
def __init__(self, in_channels, feature_channels):
super().__init__()
C1, C3, C4, C5 = in_channels
C1, C3, C4, C5 = feature_channels
self.stem = StemBlock(3, C1)
self.stem = StemBlock(in_channels, C1)
self.stage3 = nn.Sequential(
GatherAndExpansionLayer2(C1, C3, 6),

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -0,0 +1,174 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component
class CCNet(nn.Layer):
"""
The CCNet implementation based on PaddlePaddle.
The original article refers to
Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
(https://arxiv.org/abs/1811.11721)
Args:
num_classes (int): The unique number of target classes.
backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
dropout_prob (float, optional): The probability of dropout. Default: 0.0.
recurrence (int, optional): The number of recurrent operations. Defautl: 1.
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=(2, 3),
enable_auxiliary_loss=True,
dropout_prob=0.0,
recurrence=1,
align_corners=False,
pretrained=None):
super().__init__()
self.enable_auxiliary_loss = enable_auxiliary_loss
self.recurrence = recurrence
self.align_corners = align_corners
self.backbone = backbone
self.backbone_indices = backbone_indices
backbone_channels = [
backbone.feat_channels[i] for i in backbone_indices
]
if enable_auxiliary_loss:
self.aux_head = layers.AuxLayer(
backbone_channels[0],
512,
num_classes,
dropout_prob=dropout_prob)
self.head = RCCAModule(
backbone_channels[1],
512,
num_classes,
dropout_prob=dropout_prob,
recurrence=recurrence)
self.pretrained = pretrained
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, x):
feat_list = self.backbone(x)
logit_list = []
output = self.head(feat_list[self.backbone_indices[-1]])
logit_list.append(output)
if self.training and self.enable_auxiliary_loss:
aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
logit_list.append(aux_out)
return [
F.interpolate(
logit,
paddle.shape(x)[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
class RCCAModule(nn.Layer):
def __init__(self,
in_channels,
out_channels,
num_classes,
dropout_prob=0.1,
recurrence=1):
super().__init__()
inter_channels = in_channels // 4
self.recurrence = recurrence
self.conva = layers.ConvBNLeakyReLU(
in_channels, inter_channels, 3, padding=1, bias_attr=False)
self.cca = CrissCrossAttention(inter_channels)
self.convb = layers.ConvBNLeakyReLU(
inter_channels, inter_channels, 3, padding=1, bias_attr=False)
self.out = layers.AuxLayer(
in_channels + inter_channels,
out_channels,
num_classes,
dropout_prob=dropout_prob)
def forward(self, x):
feat = self.conva(x)
for i in range(self.recurrence):
feat = self.cca(feat)
feat = self.convb(feat)
output = self.out(paddle.concat([x, feat], axis=1))
return output
class CrissCrossAttention(nn.Layer):
def __init__(self, in_channels):
super().__init__()
self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
self.softmax = nn.Softmax(axis=3)
self.gamma = self.create_parameter(
shape=(1, ), default_initializer=nn.initializer.Constant(0))
self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
def forward(self, x):
b, c, h, w = paddle.shape(x)
proj_q = self.q_conv(x)
proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
[b * w, -1, h]).transpose([0, 2, 1])
proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
[b * h, -1, w]).transpose([0, 2, 1])
proj_k = self.k_conv(x)
proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
proj_v = self.v_conv(x)
proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
[b, w, h, h]).transpose([0, 2, 1, 3])
energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
[b * w, h, h])
attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
[b, w, -1, h]).transpose([0, 2, 3, 1])
out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
[b, h, -1, w]).transpose([0, 2, 1, 3])
return self.gamma * (out_h + out_w) + x
def Inf(self, B, H, W):
return -paddle.tile(
paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
[B * W, 1, 1])

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -0,0 +1,403 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
from paddleseg.utils import utils
class DualResNet(nn.Layer):
"""
The DDRNet implementation based on PaddlePaddle.
The original article refers to
Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
(https://arxiv.org/abs/2101.06085)
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): Number of input channels. Default: 3.
block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
planes (int): Base channels in network. Default: 64.
spp_planes (int): Branch channels for DAPPM. Default: 128.
head_planes (int): Mid channels of segmentation head. Default: 128.
enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
in_channels=3,
block_layers=[2, 2, 2, 2],
planes=64,
spp_planes=128,
head_planes=128,
enable_auxiliary_loss=False,
pretrained=None):
super().__init__()
highres_planes = planes * 2
self.enable_auxiliary_loss = enable_auxiliary_loss
self.conv1 = nn.Sequential(
layers.ConvBNReLU(
in_channels, planes, kernel_size=3, stride=2, padding=1),
layers.ConvBNReLU(
planes, planes, kernel_size=3, stride=2, padding=1), )
self.relu = nn.ReLU()
self.layer1 = self._make_layers(BasicBlock, planes, planes,
block_layers[0])
self.layer2 = self._make_layers(
BasicBlock, planes, planes * 2, block_layers[1], stride=2)
self.layer3 = self._make_layers(
BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
self.layer4 = self._make_layers(
BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
self.compression3 = layers.ConvBN(
planes * 4, highres_planes, kernel_size=1, bias_attr=False)
self.compression4 = layers.ConvBN(
planes * 8, highres_planes, kernel_size=1, bias_attr=False)
self.down3 = layers.ConvBN(
highres_planes,
planes * 4,
kernel_size=3,
stride=2,
bias_attr=False)
self.down4 = nn.Sequential(
layers.ConvBNReLU(
highres_planes,
planes * 4,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False),
layers.ConvBN(
planes * 4,
planes * 8,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False))
self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
2)
self.layer4_ = self._make_layers(BasicBlock, highres_planes,
highres_planes, 2)
self.layer5_ = self._make_layers(Bottleneck, highres_planes,
highres_planes, 1)
self.layer5 = self._make_layers(
Bottleneck, planes * 8, planes * 8, 1, stride=2)
self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
if self.enable_auxiliary_loss:
self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
self.head = DDRNetHead(planes * 4, head_planes, num_classes)
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
else:
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
param_init.kaiming_normal_init(m.weight)
elif isinstance(m, nn.BatchNorm2D):
param_init.constant_init(m.weight, value=1)
param_init.constant_init(m.bias, value=0)
def _make_layers(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2D(
inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias_attr=False),
nn.BatchNorm2D(planes * block.expansion), )
layers = []
layers.append(block(inplanes, planes, stride, downsample))
inplanes = planes * block.expansion
for i in range(1, blocks):
if i == (blocks - 1):
layers.append(block(inplanes, planes, stride=1, no_relu=True))
else:
layers.append(block(inplanes, planes, stride=1, no_relu=False))
return nn.Sequential(*layers)
def forward(self, x):
n, c, h, w = paddle.shape(x)
width_output = w // 8
height_output = h // 8
x = self.conv1(x)
stage1_out = self.layer1(x)
stage2_out = self.layer2(self.relu(stage1_out))
stage3_out = self.layer3(self.relu(stage2_out))
stage3_out_dual = self.layer3_(self.relu(stage2_out))
x = stage3_out + self.down3(self.relu(stage3_out_dual))
stage3_merge = stage3_out_dual + F.interpolate(
self.compression3(self.relu(stage3_out)),
size=[height_output, width_output],
mode='bilinear')
stage4_out = self.layer4(self.relu(x))
stage4_out_dual = self.layer4_(self.relu(stage3_merge))
x = stage4_out + self.down4(self.relu(stage4_out_dual))
stage4_merge = stage4_out_dual + F.interpolate(
self.compression4(self.relu(stage4_out)),
size=[height_output, width_output],
mode='bilinear')
stage5_out_dual = self.layer5_(self.relu(stage4_merge))
x = F.interpolate(
self.spp(self.layer5(self.relu(x))),
size=[height_output, width_output],
mode='bilinear')
output = self.head(x + stage5_out_dual)
logit_list = []
logit_list.append(output)
if self.enable_auxiliary_loss:
aux_out = self.aux_head(stage3_merge)
logit_list.append(aux_out)
return [
F.interpolate(
logit, [h, w], mode='bilinear') for logit in logit_list
]
class BasicBlock(nn.Layer):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
no_relu=False):
super().__init__()
self.conv_bn_relu = layers.ConvBNReLU(
inplanes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.relu = nn.ReLU()
self.conv_bn = layers.ConvBN(
planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
self.downsample = downsample
self.stride = stride
self.no_relu = no_relu
def forward(self, x):
residual = x
out = self.conv_bn_relu(x)
out = self.conv_bn(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
if self.no_relu:
return out
else:
return self.relu(out)
class Bottleneck(nn.Layer):
expansion = 2
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
no_relu=True):
super().__init__()
self.conv_bn_relu1 = layers.ConvBNReLU(
inplanes, planes, kernel_size=1, bias_attr=False)
self.conv_bn_relu2 = layers.ConvBNReLU(
planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.conv_bn = layers.ConvBN(
planes, planes * self.expansion, kernel_size=1, bias_attr=False)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
self.no_relu = no_relu
def forward(self, x):
residual = x
out = self.conv_bn_relu1(x)
out = self.conv_bn_relu2(out)
out = self.conv_bn(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
if self.no_relu:
return out
else:
return self.relu(out)
class DAPPM(nn.Layer):
def __init__(self, inplanes, branch_planes, outplanes):
super().__init__()
self.scale1 = nn.Sequential(
nn.AvgPool2D(
kernel_size=5, stride=2, padding=2),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale2 = nn.Sequential(
nn.AvgPool2D(
kernel_size=9, stride=4, padding=4),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale3 = nn.Sequential(
nn.AvgPool2D(
kernel_size=17, stride=8, padding=8),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale4 = nn.Sequential(
nn.AdaptiveAvgPool2D((1, 1)),
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.scale0 = nn.Sequential(
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, branch_planes, kernel_size=1, bias_attr=False), )
self.process1 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.process2 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.process3 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.process4 = nn.Sequential(
layers.SyncBatchNorm(branch_planes),
nn.ReLU(),
nn.Conv2D(
branch_planes,
branch_planes,
kernel_size=3,
padding=1,
bias_attr=False), )
self.compression = nn.Sequential(
layers.SyncBatchNorm(branch_planes * 5),
nn.ReLU(),
nn.Conv2D(
branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
self.shortcut = nn.Sequential(
layers.SyncBatchNorm(inplanes),
nn.ReLU(),
nn.Conv2D(
inplanes, outplanes, kernel_size=1, bias_attr=False))
def forward(self, x):
n, c, h, w = paddle.shape(x)
x0 = self.scale0(x)
x1 = self.process1(
F.interpolate(
self.scale1(x), size=[h, w], mode='bilinear') + x0)
x2 = self.process2(
F.interpolate(
self.scale2(x), size=[h, w], mode='bilinear') + x1)
x3 = self.process3(
F.interpolate(
self.scale3(x), size=[h, w], mode='bilinear') + x2)
x4 = self.process4(
F.interpolate(
self.scale4(x), size=[h, w], mode='bilinear') + x3)
out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
1)) + self.shortcut(x)
return out
class DDRNetHead(nn.Layer):
def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
super().__init__()
self.bn1 = nn.BatchNorm2D(inplanes)
self.relu = nn.ReLU()
self.conv_bn_relu = layers.ConvBNReLU(
inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
self.conv = nn.Conv2D(
interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
self.scale_factor = scale_factor
def forward(self, x):
x = self.bn1(x)
x = self.relu(x)
x = self.conv_bn_relu(x)
out = self.conv(x)
if self.scale_factor is not None:
out = F.interpolate(
out, scale_factor=self.scale_factor, mode='bilinear')
return out
@manager.MODELS.add_component
def DDRNet_23(**kwargs):
return DualResNet(
block_layers=[2, 2, 2, 2],
planes=64,
spp_planes=128,
head_planes=128,
**kwargs)

@ -18,11 +18,11 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.models.backbones import resnet_vd
from paddlers.models.ppseg.models import deeplab
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.models.backbones import resnet_vd
from paddleseg.models import deeplab
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
__all__ = ['DeepLabV3P', 'DeepLabV3']

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
@manager.MODELS.add_component
@ -209,7 +209,9 @@ class EMAU(nn.Layer):
mu = F.normalize(mu, axis=1, p=2)
mu = self.mu * (1 - self.momentum) + mu * self.momentum
if paddle.distributed.get_world_size() > 1:
mu = paddle.distributed.all_reduce(mu)
out = paddle.distributed.all_reduce(mu)
if out is not None:
mu = out
mu /= paddle.distributed.get_world_size()
self.mu = mu

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddleseg import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager, param_init
__all__ = ['ENet']
@ -34,6 +34,7 @@ class ENet(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
pretrained (str, optional): The path or url of pretrained model. Default: None.
encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
function; otherwise, PReLU is used. Default: False.
@ -43,13 +44,14 @@ class ENet(nn.Layer):
def __init__(self,
num_classes,
in_channels=3,
pretrained=None,
encoder_relu=False,
decoder_relu=True):
super(ENet, self).__init__()
self.numclasses = num_classes
self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)
self.downsample1_0 = DownsamplingBottleneck(
16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)

@ -18,9 +18,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddlers.models.ppseg.models import layers
from paddleseg import utils
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle.nn as nn
import paddle.nn.functional as F
import paddle
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
__all__ = ['FastSCNN']
@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
(https://arxiv.org/pdf/1902.04502.pdf).
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):
def __init__(self,
num_classes,
in_channels=3,
enable_auxiliary_loss=True,
align_corners=False,
pretrained=None):
super().__init__()
self.learning_to_downsample = LearningToDownsample(32, 48, 64)
self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
64)
self.global_feature_extractor = GlobalFeatureExtractor(
in_channels=64,
block_channels=[64, 96, 128],
@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
"""
def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
def __init__(self,
in_channels=3,
dw_channels1=32,
dw_channels2=48,
out_channels=64):
super(LearningToDownsample, self).__init__()
self.conv_bn_relu = layers.ConvBNReLU(
in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
in_channels=in_channels,
out_channels=dw_channels1,
kernel_size=3,
stride=2)
self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
in_channels=dw_channels1,
out_channels=dw_channels2,

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle.nn as nn
import paddle.nn.functional as F
import paddle
from paddlers.models.ppseg import utils
from paddlers.models.ppseg.cvlibs import manager, param_init
from paddlers.models.ppseg.models import layers
from paddleseg import utils
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
from paddle.nn import functional as F
from paddlers.models.ppseg.utils import utils
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
@manager.MODELS.add_component
@ -92,7 +92,7 @@ class GINet(nn.Layer):
return [
F.interpolate(
logit, (h, w),
logit, [h, w],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]

@ -0,0 +1,198 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component
class GloRe(nn.Layer):
"""
The GloRe implementation based on PaddlePaddle.
The original article refers to:
Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
(https://arxiv.org/pdf/1811.12814.pdf)
Args:
num_classes (int): The unique number of target classes.
backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=(2, 3),
gru_channels=512,
gru_num_state=128,
gru_num_node=64,
enable_auxiliary_loss=True,
align_corners=False,
pretrained=None):
super().__init__()
self.backbone = backbone
backbone_channels = [
backbone.feat_channels[i] for i in backbone_indices
]
self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
gru_channels, gru_num_state, gru_num_node,
enable_auxiliary_loss)
self.align_corners = align_corners
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
feat_list = self.backbone(x)
logit_list = self.head(feat_list)
return [
F.interpolate(
logit,
paddle.shape(x)[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class GloReHead(nn.Layer):
def __init__(self,
num_classes,
backbone_indices,
backbone_channels,
gru_channels=512,
gru_num_state=128,
gru_num_node=64,
enable_auxiliary_loss=True):
super().__init__()
in_channels = backbone_channels[1]
self.conv_bn_relu = layers.ConvBNReLU(
in_channels, gru_channels, 1, bias_attr=False)
self.gru_module = GruModule(
num_input=gru_channels,
num_state=gru_num_state,
num_node=gru_num_node)
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
self.auxlayer = layers.AuxLayer(
in_channels=backbone_channels[0],
inter_channels=backbone_channels[0] // 4,
out_channels=num_classes)
self.backbone_indices = backbone_indices
self.enable_auxiliary_loss = enable_auxiliary_loss
def forward(self, feat_list):
logit_list = []
x = feat_list[self.backbone_indices[1]]
feature = self.conv_bn_relu(x)
gru_output = self.gru_module(feature)
output = self.dropout(gru_output)
logit = self.classifier(output)
logit_list.append(logit)
if self.enable_auxiliary_loss:
low_level_feat = feat_list[self.backbone_indices[0]]
auxiliary_logit = self.auxlayer(low_level_feat)
logit_list.append(auxiliary_logit)
return logit_list
class GCN(nn.Layer):
def __init__(self, num_state, num_node, bias=False):
super(GCN, self).__init__()
self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
self.relu = nn.ReLU()
self.conv2 = nn.Conv1D(
num_state, num_state, kernel_size=1, bias_attr=bias)
def forward(self, x):
h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
h = paddle.transpose(h, perm=(0, 2, 1))
h = h + x
h = self.relu(self.conv2(h))
return h
class GruModule(nn.Layer):
def __init__(self,
num_input=512,
num_state=128,
num_node=64,
normalize=False):
super(GruModule, self).__init__()
self.normalize = normalize
self.num_state = num_state
self.num_node = num_node
self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
self.extend_dim = nn.Conv2D(
self.num_state, num_input, kernel_size=1, bias_attr=False)
self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
def forward(self, input):
n, c, h, w = input.shape
# B, C, H, W
reduction_dim = self.reduction_dim(input)
# B, N, H, W
mat_B = self.projection_mat(input)
# B, C, H*W
reshaped_reduction = paddle.reshape(
reduction_dim, shape=[n, self.num_state, h * w])
# B, N, H*W
reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
# B, N, H*W
reproject = reshaped_B
# B, C, N
node_state_V = paddle.matmul(
reshaped_reduction, paddle.transpose(
reshaped_B, perm=[0, 2, 1]))
if self.normalize:
node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
# B, C, N
gcn_out = self.gcn(node_state_V)
# B, C, H*W
Y = paddle.matmul(gcn_out, reproject)
# B, C, H, W
Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
Y_extend = self.extend_dim(Y)
Y_extend = self.extend_bn(Y_extend)
out = input + Y_extend
return out

@ -18,11 +18,11 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.models.backbones import resnet_vd
from paddlers.models.ppseg.models import deeplab
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.models.backbones import resnet_vd
from paddleseg.models import deeplab
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component
@ -31,6 +31,7 @@ class HarDNet(nn.Layer):
Args:
num_classes (int): The unique number of target classes.
in_channels (int, optional): The channels of input image. Default: 3.
stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
@ -43,6 +44,7 @@ class HarDNet(nn.Layer):
def __init__(self,
num_classes,
in_channels=3,
stem_channels=(16, 24, 32, 48),
ch_list=(64, 96, 160, 224, 320),
grmul=1.7,
@ -60,7 +62,7 @@ class HarDNet(nn.Layer):
self.stem = nn.Sequential(
layers.ConvBNReLU(
3, stem_channels[0], kernel_size=3, bias_attr=False),
in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
layers.ConvBNReLU(
stem_channels[0],
stem_channels[1],

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.utils import utils
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
@manager.MODELS.add_component

@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
from .activation import Activation
from .pyramid_pool import ASPPModule, PPModule
from .attention import AttentionBlock
from .nonlocal2d import NonLocal2D
from .wrap_functions import *
from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten

@ -33,7 +33,7 @@ class Activation(nn.Layer):
Examples:
from paddlers.models.ppseg.models.common.activation import Activation
from paddleseg.models.common.activation import Activation
relu = Activation("relu")
print(relu)

@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddleseg.models import layers
class AttentionBlock(nn.Layer):
@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
if self.out_project is not None:
context = self.out_project(context)
return context
class DualAttentionModule(nn.Layer):
"""
Dual attention module.
Args:
in_channels (int): The number of input channels.
out_channels (int): The number of output channels.
"""
def __init__(self, in_channels, out_channels):
super().__init__()
inter_channels = in_channels // 4
self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
self.pam = PAM(inter_channels)
self.cam = CAM(inter_channels)
self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
def forward(self, feats):
channel_feats = self.channel_conv(feats)
channel_feats = self.cam(channel_feats)
channel_feats = self.conv1(channel_feats)
position_feats = self.position_conv(feats)
position_feats = self.pam(position_feats)
position_feats = self.conv2(position_feats)
feats_sum = position_feats + channel_feats
out = self.conv3(feats_sum)
return out
class PAM(nn.Layer):
"""
Position attention module.
Args:
in_channels (int): The number of input channels.
"""
def __init__(self, in_channels):
super().__init__()
mid_channels = in_channels // 8
self.mid_channels = mid_channels
self.in_channels = in_channels
self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
self.gamma = self.create_parameter(
shape=[1],
dtype='float32',
default_initializer=nn.initializer.Constant(0))
def forward(self, x):
x_shape = paddle.shape(x)
# query: n, h * w, c1
query = self.query_conv(x)
query = paddle.reshape(query, (0, self.mid_channels, -1))
query = paddle.transpose(query, (0, 2, 1))
# key: n, c1, h * w
key = self.key_conv(x)
key = paddle.reshape(key, (0, self.mid_channels, -1))
# sim: n, h * w, h * w
sim = paddle.bmm(query, key)
sim = F.softmax(sim, axis=-1)
value = self.value_conv(x)
value = paddle.reshape(value, (0, self.in_channels, -1))
sim = paddle.transpose(sim, (0, 2, 1))
# feat: from (n, c2, h * w) -> (n, c2, h, w)
feat = paddle.bmm(value, sim)
feat = paddle.reshape(feat,
(0, self.in_channels, x_shape[2], x_shape[3]))
out = self.gamma * feat + x
return out
class CAM(nn.Layer):
"""
Channel attention module.
Args:
in_channels (int): The number of input channels.
"""
def __init__(self, channels):
super().__init__()
self.channels = channels
self.gamma = self.create_parameter(
shape=[1],
dtype='float32',
default_initializer=nn.initializer.Constant(0))
def forward(self, x):
x_shape = paddle.shape(x)
# query: n, c, h * w
query = paddle.reshape(x, (0, self.channels, -1))
# key: n, h * w, c
key = paddle.reshape(x, (0, self.channels, -1))
key = paddle.transpose(key, (0, 2, 1))
# sim: n, c, c
sim = paddle.bmm(query, key)
# The danet author claims that this can avoid gradient divergence
sim = paddle.max(sim, axis=-1, keepdim=True).tile(
[1, 1, self.channels]) - sim
sim = F.softmax(sim, axis=-1)
# feat: from (n, c, h * w) to (n, c, h, w)
value = paddle.reshape(x, (0, self.channels, -1))
feat = paddle.bmm(sim, value)
feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
out = self.gamma * feat + x
return out

@ -17,7 +17,7 @@ import os
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddleseg.models import layers
def SyncBatchNorm(*args, **kwargs):
@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
return x
class ConvBNAct(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
act_type=None,
**kwargs):
super().__init__()
self._conv = nn.Conv2D(
in_channels, out_channels, kernel_size, padding=padding, **kwargs)
if 'data_format' in kwargs:
data_format = kwargs['data_format']
else:
data_format = 'NCHW'
self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
self._act_type = act_type
if act_type is not None:
self._act = layers.Activation(act_type)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
if self._act_type is not None:
x = self._act(x)
return x
class ConvBN(nn.Layer):
def __init__(self,
in_channels,
@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
x = self._batch_norm(x)
x = self._prelu(x)
return x
class ConvBNLeakyReLU(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
**kwargs):
super().__init__()
self._conv = nn.Conv2D(
in_channels, out_channels, kernel_size, padding=padding, **kwargs)
if 'data_format' in kwargs:
data_format = kwargs['data_format']
else:
data_format = 'NCHW'
self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
self._relu = layers.Activation("leakyrelu")
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
x = self._relu(x)
return x

@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddleseg.models import layers
class NonLocal2D(nn.Layer):

@ -16,7 +16,7 @@ import paddle
import paddle.nn.functional as F
from paddle import nn
from paddlers.models.ppseg.models import layers
from paddleseg.models import layers
class ASPPModule(nn.Layer):

@ -0,0 +1,285 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Constant
from paddleseg.models import layers
from paddleseg.models.layers import tensor_fusion_helper as helper
class UAFM(nn.Layer):
"""
The base of Unified Attention Fusion Module.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__()
self.conv_x = layers.ConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.ConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
self.resize_mode = resize_mode
def check(self, x, y):
assert x.ndim == 4 and y.ndim == 4
x_h, x_w = x.shape[2:]
y_h, y_w = y.shape[2:]
assert x_h >= y_h and x_w >= y_w
def prepare(self, x, y):
x = self.prepare_x(x, y)
y = self.prepare_y(x, y)
return x, y
def prepare_x(self, x, y):
x = self.conv_x(x)
return x
def prepare_y(self, x, y):
y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
return y_up
def fuse(self, x, y):
out = x + y
out = self.conv_out(out)
return out
def forward(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
self.check(x, y)
x, y = self.prepare(x, y)
out = self.fuse(x, y)
return out
class UAFM_ChAtten(UAFM):
"""
The UAFM with channel attention, which uses mean and max values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNAct(
4 * y_ch,
y_ch // 2,
kernel_size=1,
bias_attr=False,
act_type="leakyrelu"),
layers.ConvBN(
y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_hw([x, y], self.training)
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_ChAtten_S(UAFM):
"""
The UAFM with channel attention, which uses mean values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNAct(
2 * y_ch,
y_ch // 2,
kernel_size=1,
bias_attr=False,
act_type="leakyrelu"),
layers.ConvBN(
y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_reduce_hw([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFM_SpAtten(UAFM):
"""
The UAFM with spatial attention, which uses mean and max values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
4, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
self._scale = self.create_parameter(
shape=[1],
attr=ParamAttr(initializer=Constant(value=1.)),
dtype="float32")
self._scale.stop_gradient = True
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (self._scale - atten)
out = self.conv_out(out)
return out
class UAFM_SpAtten_S(UAFM):
"""
The UAFM with spatial attention, which uses mean values.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
2, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out
class UAFMMobile(UAFM):
"""
Unified Attention Fusion Module for mobile.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_x = layers.SeparableConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.SeparableConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
class UAFMMobile_SpAtten(UAFM):
"""
Unified Attention Fusion Module with spatial attention for mobile.
Args:
x_ch (int): The channel of x tensor, which is the low level feature.
y_ch (int): The channel of y tensor, which is the high level feature.
out_ch (int): The channel of output tensor.
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
"""
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
self.conv_x = layers.SeparableConvBNReLU(
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
self.conv_out = layers.SeparableConvBNReLU(
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
self.conv_xy_atten = nn.Sequential(
layers.ConvBNReLU(
4, 2, kernel_size=3, padding=1, bias_attr=False),
layers.ConvBN(
2, 1, kernel_size=3, padding=1, bias_attr=False))
def fuse(self, x, y):
"""
Args:
x (Tensor): The low level feature.
y (Tensor): The high level feature.
"""
atten = helper.avg_max_reduce_channel([x, y])
atten = F.sigmoid(self.conv_xy_atten(atten))
out = x * atten + y * (1 - atten)
out = self.conv_out(out)
return out

@ -0,0 +1,133 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
def avg_reduce_hw(x):
# Reduce hw by avg
# Return cat([avg_pool_0, avg_pool_1, ...])
if not isinstance(x, (list, tuple)):
return F.adaptive_avg_pool2d(x, 1)
elif len(x) == 1:
return F.adaptive_avg_pool2d(x[0], 1)
else:
res = []
for xi in x:
res.append(F.adaptive_avg_pool2d(xi, 1))
return paddle.concat(res, axis=1)
def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
assert not isinstance(x, (list, tuple))
avg_pool = F.adaptive_avg_pool2d(x, 1)
# TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
if is_training:
max_pool = F.adaptive_max_pool2d(x, 1)
else:
max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
if use_concat:
res = paddle.concat([avg_pool, max_pool], axis=1)
else:
res = [avg_pool, max_pool]
return res
def avg_max_reduce_hw(x, is_training):
# Reduce hw by avg and max
# Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
if not isinstance(x, (list, tuple)):
return avg_max_reduce_hw_helper(x, is_training)
elif len(x) == 1:
return avg_max_reduce_hw_helper(x[0], is_training)
else:
res_avg = []
res_max = []
for xi in x:
avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
res_avg.append(avg)
res_max.append(max)
res = res_avg + res_max
return paddle.concat(res, axis=1)
def avg_reduce_channel(x):
# Reduce channel by avg
# Return cat([avg_ch_0, avg_ch_1, ...])
if not isinstance(x, (list, tuple)):
return paddle.mean(x, axis=1, keepdim=True)
elif len(x) == 1:
return paddle.mean(x[0], axis=1, keepdim=True)
else:
res = []
for xi in x:
res.append(paddle.mean(xi, axis=1, keepdim=True))
return paddle.concat(res, axis=1)
def max_reduce_channel(x):
# Reduce channel by max
# Return cat([max_ch_0, max_ch_1, ...])
if not isinstance(x, (list, tuple)):
return paddle.max(x, axis=1, keepdim=True)
elif len(x) == 1:
return paddle.max(x[0], axis=1, keepdim=True)
else:
res = []
for xi in x:
res.append(paddle.max(xi, axis=1, keepdim=True))
return paddle.concat(res, axis=1)
def avg_max_reduce_channel_helper(x, use_concat=True):
# Reduce hw by avg and max, only support single input
assert not isinstance(x, (list, tuple))
mean_value = paddle.mean(x, axis=1, keepdim=True)
max_value = paddle.max(x, axis=1, keepdim=True)
if use_concat:
res = paddle.concat([mean_value, max_value], axis=1)
else:
res = [mean_value, max_value]
return res
def avg_max_reduce_channel(x):
# Reduce hw by avg and max
# Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
if not isinstance(x, (list, tuple)):
return avg_max_reduce_channel_helper(x)
elif len(x) == 1:
return avg_max_reduce_channel_helper(x[0])
else:
res = []
for xi in x:
res.extend(avg_max_reduce_channel_helper(xi, False))
return paddle.concat(res, axis=1)
def cat_avg_max_reduce_channel(x):
# Reduce hw by cat+avg+max
assert isinstance(x, (list, tuple)) and len(x) > 1
x = paddle.concat(x, axis=1)
mean_value = paddle.mean(x, axis=1, keepdim=True)
max_value = paddle.max(x, axis=1, keepdim=True)
res = paddle.concat([mean_value, max_value], axis=1)
return res

@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
raise ValueError(
"if type of `weight` is str, it should equal to 'dynamic', but it is {}"
.format(self.weight))
elif isinstance(self.weight, paddle.VarBase):
elif not isinstance(self.weight, paddle.Tensor):
raise TypeError(
'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
.format(type(self.weight)))

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
logit = paddle.transpose(logit, [0, 2, 3, 1])
label = label.astype('int64')
# In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
# When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
loss = F.cross_entropy(
logit,
label,
@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
loss = loss * semantic_weights
if self.weight is not None:
_one_hot = F.one_hot(label, logit.shape[-1])
_one_hot = F.one_hot(label * mask, logit.shape[-1])
coef = paddle.sum(_one_hot * self.weight, axis=-1)
else:
coef = paddle.ones_like(label)

@ -16,9 +16,9 @@ import numpy as np
import paddle
from paddle import nn
import paddle.nn.functional as F
from scipy.ndimage.interpolation import shift
from scipy.ndimage import shift
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -13,44 +13,65 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
class DiceLoss(nn.Layer):
"""
Implements the dice loss function.
The implements of the dice loss.
Args:
ignore_index (int64): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
smooth (float32): laplace smoothing,
to smooth dice loss and accelerate convergence. following:
https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
weight (list[float], optional): The weight for each class. Default: None.
ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
is ignored and does not contribute to the input gradient. Default ``255``.
smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
Default: 1.0
"""
def __init__(self, ignore_index=255, smooth=0.):
super(DiceLoss, self).__init__()
def __init__(self, weight=None, ignore_index=255, smooth=1.0):
super().__init__()
self.weight = weight
self.ignore_index = ignore_index
self.eps = 1e-5
self.smooth = smooth
self.eps = 1e-8
def forward(self, logits, labels):
labels = paddle.cast(labels, dtype='int32')
labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
num_class = logits.shape[1]
if self.weight is not None:
assert num_class == len(self.weight), \
"The lenght of weight should be euqal to the num class"
mask = labels != self.ignore_index
mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')
labels[labels == self.ignore_index] = 0
labels_one_hot = F.one_hot(labels, num_class)
labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
logits = F.softmax(logits, axis=1)
mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
logits = logits * mask
labels_one_hot = labels_one_hot * mask
dice_loss = 0.0
for i in range(num_class):
dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
mask, self.smooth, self.eps)
if self.weight is not None:
dice_loss_i *= self.weight[i]
dice_loss += dice_loss_i
dice_loss = dice_loss / num_class
return dice_loss
dims = (0, ) + tuple(range(2, labels.ndimension() + 1))
intersection = paddle.sum(logits * labels_one_hot, dims)
cardinality = paddle.sum(logits + labels_one_hot, dims)
dice_loss = ((2. * intersection + self.smooth) /
(cardinality + self.eps + self.smooth)).mean()
return 1 - dice_loss
def dice_loss_helper(logit, label, mask, smooth, eps):
assert logit.shape == label.shape, \
"The shape of logit and label should be the same"
logit = paddle.reshape(logit, [0, -1])
label = paddle.reshape(label, [0, -1])
mask = paddle.reshape(mask, [0, -1])
logit *= mask
label *= mask
intersection = paddle.sum(logit * label, axis=1)
cardinality = paddle.sum(logit + label, axis=1)
dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
dice_loss = dice_loss.mean()
return dice_loss

@ -16,8 +16,8 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import losses
from paddleseg.cvlibs import manager
from paddleseg.models import losses
@manager.LOSSES.add_component

@ -17,44 +17,116 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
class FocalLoss(nn.Layer):
"""
Focal Loss.
The implement of focal loss.
Code referenced from:
https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
The focal loss requires the label is 0 or 1 for now.
Args:
gamma (float): the coefficient of Focal Loss.
ignore_index (int64): Specifies a target value that is ignored
alpha (float, list, optional): The alpha of focal loss. alpha is the weight
of class 1, 1-alpha is the weight of class 0. Default: 0.25
gamma (float, optional): The gamma of Focal Loss. Default: 2.0
ignore_index (int64, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
"""
def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
super(FocalLoss, self).__init__()
def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
super().__init__()
self.alpha = alpha
self.gamma = gamma
self.ignore_index = ignore_index
self.edge_label = edge_label
self.EPS = 1e-10
def forward(self, logit, label):
logit = paddle.reshape(
logit, [logit.shape[0], logit.shape[1], -1]) # N,C,H,W => N,C,H*W
logit = paddle.transpose(logit, [0, 2, 1]) # N,C,H*W => N,H*W,C
logit = paddle.reshape(logit,
[-1, logit.shape[2]]) # N,H*W,C => N*H*W,C
label = paddle.reshape(label, [-1, 1])
range_ = paddle.arange(0, label.shape[0])
range_ = paddle.unsqueeze(range_, axis=-1)
label = paddle.cast(label, dtype='int64')
label = paddle.concat([range_, label], axis=-1)
logpt = F.log_softmax(logit)
logpt = paddle.gather_nd(logpt, label)
pt = paddle.exp(logpt.detach())
loss = -1 * (1 - pt)**self.gamma * logpt
loss = paddle.mean(loss)
return loss
"""
Forward computation.
Args:
logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
(N, C, H, W), where C is number of classes.
label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
where each value is 0 <= label[i] <= C-1.
Returns:
(Tensor): The average loss.
"""
assert logit.ndim == 4, "The ndim of logit should be 4."
assert logit.shape[1] == 2, "The channel of logit should be 2."
assert label.ndim == 3, "The ndim of label should be 3."
class_num = logit.shape[1] # class num is 2
logit = paddle.transpose(logit, [0, 2, 3, 1]) # N,C,H,W => N,H,W,C
mask = label != self.ignore_index # N,H,W
mask = paddle.unsqueeze(mask, 3)
mask = paddle.cast(mask, 'float32')
mask.stop_gradient = True
label = F.one_hot(label, class_num) # N,H,W,C
label = paddle.cast(label, logit.dtype)
label.stop_gradient = True
loss = F.sigmoid_focal_loss(
logit=logit,
label=label,
alpha=self.alpha,
gamma=self.gamma,
reduction='none')
loss = loss * mask
avg_loss = paddle.sum(loss) / (
paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
return avg_loss
@manager.LOSSES.add_component
class MultiClassFocalLoss(nn.Layer):
"""
The implement of focal loss for multi class.
Args:
alpha (float, list, optional): The alpha of focal loss. alpha is the weight
of class 1, 1-alpha is the weight of class 0. Default: 0.25
gamma (float, optional): The gamma of Focal Loss. Default: 2.0
ignore_index (int64, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
"""
def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
super().__init__()
self.num_class = num_class
self.alpha = alpha
self.gamma = gamma
self.ignore_index = ignore_index
self.EPS = 1e-10
def forward(self, logit, label):
"""
Forward computation.
Args:
logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
(N, C, H, W), where C is number of classes.
label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
where each value is 0 <= label[i] <= C-1.
Returns:
(Tensor): The average loss.
"""
assert logit.ndim == 4, "The ndim of logit should be 4."
assert label.ndim == 3, "The ndim of label should be 3."
logit = paddle.transpose(logit, [0, 2, 3, 1])
label = label.astype('int64')
ce_loss = F.cross_entropy(
logit, label, ignore_index=self.ignore_index, reduction='none')
pt = paddle.exp(-ce_loss)
focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
mask = paddle.cast(label != self.ignore_index, 'float32')
focal_loss *= mask
avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
return avg_loss

@ -13,7 +13,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):
def __init__(self, reduction='mean', ignore_index=255):
super().__init__(reduction=reduction)
self.ignore_index = ignore_index
self.EPS = 1e-10
def forward(self, input, label):
mask = label != self.ignore_index
mask = paddle.cast(mask, "float32")
label.stop_gradient = True
mask.stop_gradient = True
output = paddle.nn.functional.l1_loss(
input, label, "none", name=self.name) * mask
if self.reduction == "mean":
return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
elif self.reduction == "none":
return output
elif self.reduction == "sum":
return paddle.sum(output)
else:
raise ValueError(
"The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
"received %s, which is not allowed." % self.reduction)

@ -22,7 +22,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
signs = 2. * labels - 1.
signs.stop_gradient = True
errors = 1. - logits * signs
errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
'descending', True)
if hasattr(paddle, "_legacy_C_ops"):
errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
'descending', True)
else:
errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
'descending', True)
errors_sorted.stop_gradient = False
gt_sorted = paddle.gather(labels, perm)
grad = lovasz_grad(gt_sorted)
@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
else:
class_pred = probas[:, c]
errors = paddle.abs(fg - class_pred)
errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
'descending', True)
if hasattr(paddle, "_legacy_C_ops"):
errors_sorted, perm = paddle._legacy_C_ops.argsort(
errors, 'axis', 0, 'descending', True)
else:
errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
'descending', True)
errors_sorted.stop_gradient = False
fg_sorted = paddle.gather(fg, perm)

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):
# get the label after ohem
n, c, h, w = logit.shape
label = label.reshape((-1, ))
label = label.reshape((-1, )).astype('int64')
valid_mask = (label != self.ignore_index).astype('int64')
num_valid = valid_mask.sum()
label = label * valid_mask

@ -16,8 +16,8 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.models import losses
from paddleseg.cvlibs import manager
from paddleseg.models import losses
@manager.LOSSES.add_component

@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
elif num_hard >= n_view / 2:
num_easy_keep = num_easy
num_hard_keep = n_view - num_easy_keep
else:
elif num_easy >= n_view / 2:
num_hard_keep = num_hard
num_easy_keep = n_view - num_hard_keep
else:
num_hard_keep = num_hard
num_easy_keep = num_easy
indices = None
if num_hard > 0:

@ -16,7 +16,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -17,7 +17,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
_euler_num = 2.718281828
_pi = 3.14159265

@ -18,7 +18,7 @@ import paddle
from paddle import nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component
@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
label_num_conn, label_conn = cv2.connectedComponents(
labels_np_class.astype(np.uint8))
origin_pred_num_conn = pred_num_conn
if pred_num_conn > 2 * label_num_conn:
pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
real_pred_num = pred_num_conn - 1
@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
# Connected Components Matching and SC Loss Calculation
if real_label_num > 0 and real_pred_num > 0:
img_connectivity = compute_class_connectiveity(
pred_conn, label_conn, pred_num_conn, label_num_conn,
pred_i, real_label_num, real_pred_num, zero)
pred_conn, label_conn, pred_num_conn,
origin_pred_num_conn, label_num_conn, pred_i,
real_label_num, real_pred_num, zero)
sc_loss += 1 - img_connectivity
elif real_label_num == 0 and real_pred_num == 0:
# if no connected component, SC Loss = 0, so pass
@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):
def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
label_num_conn, pred, real_label_num,
real_pred_num, zero):
origin_pred_num_conn, label_num_conn, pred,
real_label_num, real_pred_num, zero):
pred_conn = paddle.to_tensor(pred_conn)
label_conn = paddle.to_tensor(label_conn)
pred_conn = F.one_hot(pred_conn, pred_num_conn)
pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
label_conn = F.one_hot(label_conn, label_num_conn)
ious = paddle.zeros((real_label_num, real_pred_num))

@ -16,7 +16,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.cvlibs import manager
from paddleseg.cvlibs import manager
@manager.LOSSES.add_component

@ -0,0 +1,162 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
@manager.MODELS.add_component
class LRASPP(nn.Layer):
"""
Semantic segmentation model with a light R-ASPP head.
The original article refers to
Howard, Andrew, et al. "Searching for mobilenetv3."
(https://arxiv.org/pdf/1909.11065.pdf)
Args:
num_classes (int): The number of target classes.
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
has feat_channels, of which the length is 5.
backbone_indices (List(int), optional): The values indicate the indices of backbone output
used as the input of the LR-ASPP head.
Default: [0, 1, 3].
lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
Default: [32, 64].
lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
Default: 128
resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
Default: bilinear.
use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
a 49x49 kernel for average pooling.
Default: True.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[0, 1, 3],
lraspp_head_inter_chs=[32, 64],
lraspp_head_out_ch=128,
resize_mode='bilinear',
use_gap=True,
pretrained=None):
super().__init__()
# backbone
assert hasattr(backbone, 'feat_channels'), \
"The backbone should has feat_channels."
assert len(backbone.feat_channels) >= len(backbone_indices), \
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
assert len(backbone.feat_channels) > max(backbone_indices), \
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
f"less than the length of feat_channels ({len(backbone.feat_channels)})."
self.backbone = backbone
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
"should not be lesser than 1"
# head
assert len(backbone_indices) == len(
lraspp_head_inter_chs
) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
self.backbone_indices = backbone_indices
self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
lraspp_head_inter_chs, lraspp_head_out_ch,
num_classes, resize_mode, use_gap)
# pretrained
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
x_hw = paddle.shape(x)[2:]
feats_backbone = self.backbone(x)
assert len(feats_backbone) >= len(self.backbone_indices), \
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
y = self.lraspp_head(feats_backbone)
y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
logit_list = [y]
return logit_list
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class LRASPPHead(nn.Layer):
def __init__(self,
indices,
in_chs,
mid_chs,
out_ch,
n_classes,
resize_mode,
use_gap,
align_corners=False):
super().__init__()
self.indices = indices[-2::-1]
self.in_chs = [in_chs[i] for i in indices[::-1]]
self.mid_chs = mid_chs[::-1]
self.convs = nn.LayerList()
self.conv_ups = nn.LayerList()
for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
self.convs.append(
nn.Conv2D(
in_ch, mid_ch, kernel_size=1, bias_attr=False))
self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
self.conv_w = nn.Sequential(
nn.AvgPool2D(
kernel_size=(49, 49), stride=(16, 20))
if not use_gap else nn.AdaptiveAvgPool2D(1),
nn.Conv2D(
self.in_chs[0], out_ch, 1, bias_attr=False),
nn.Sigmoid())
self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
self.conv_out = nn.Conv2D(
out_ch, n_classes, kernel_size=1, bias_attr=False)
self.interp = partial(
F.interpolate, mode=resize_mode, align_corners=align_corners)
def forward(self, in_feat_list):
x = in_feat_list[-1]
x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
y = self.conv_t(x)
for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
feat = in_feat_list[idx]
y = self.interp(y, paddle.shape(feat)[2:])
y = paddle.concat([y, conv(feat)], axis=1)
y = conv_up(y)
y = self.conv_out(y)
return y

@ -1,4 +1,4 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -16,9 +16,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddlers.models.ppseg.models import layers
from paddlers.models.ppseg.cvlibs import manager
from paddlers.models.ppseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
class MLAHeads(nn.Layer):

@ -0,0 +1,289 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
@manager.MODELS.add_component
class MobileSeg(nn.Layer):
"""
The semantic segmentation models for mobile devices.
Args:
num_classes (int): The number of target classes.
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
has feat_channels, of which the length is 5.
backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
Default: [2, 3, 4].
cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
Default: [64, 64, 64].
resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
Default: bilinear.
use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[1, 2, 3],
cm_bin_sizes=[1, 2],
cm_out_ch=64,
arm_type='UAFMMobile',
arm_out_chs=[32, 48, 64],
seg_head_inter_chs=[32, 32, 32],
resize_mode='bilinear',
use_last_fuse=False,
pretrained=None):
super().__init__()
# backbone
assert hasattr(backbone, 'feat_channels'), \
"The backbone should has feat_channels."
assert len(backbone.feat_channels) >= len(backbone_indices), \
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
assert len(backbone.feat_channels) > max(backbone_indices), \
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
f"less than the length of feat_channels ({len(backbone.feat_channels)})."
self.backbone = backbone
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
"should not be lesser than 1"
self.backbone_indices = backbone_indices # [..., x16_id, x32_id]
backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
# head
if len(arm_out_chs) == 1:
arm_out_chs = arm_out_chs * len(backbone_indices)
assert len(arm_out_chs) == len(backbone_indices), "The length of " \
"arm_out_chs and backbone_indices should be equal"
self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
cm_bin_sizes, cm_out_ch, arm_type,
resize_mode, use_last_fuse)
if len(seg_head_inter_chs) == 1:
seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
"seg_head_inter_chs and backbone_indices should be equal"
self.seg_heads = nn.LayerList() # [..., head_16, head32]
for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
# pretrained
self.pretrained = pretrained
self.init_weight()
def forward(self, x):
x_hw = paddle.shape(x)[2:]
feats_backbone = self.backbone(x) # [x4, x8, x16, x32]
assert len(feats_backbone) >= len(self.backbone_indices), \
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
feats_selected = [feats_backbone[i] for i in self.backbone_indices]
feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32]
if self.training:
logit_list = []
for x, seg_head in zip(feats_head, self.seg_heads):
x = seg_head(x)
logit_list.append(x)
logit_list = [
F.interpolate(
x, x_hw, mode='bilinear', align_corners=False)
for x in logit_list
]
else:
x = self.seg_heads[0](feats_head[0])
x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
logit_list = [x]
return logit_list
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class MobileSegHead(nn.Layer):
"""
The head of MobileSeg.
Args:
backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
arm_out_chs (List(int)): The out channels of each arm module.
cm_bin_sizes (List(int)): The bin size of context module.
cm_out_ch (int): The output channel of the last context module.
arm_type (str): The type of attention refinement module.
resize_mode (str): The resize mode for the upsampling operation in decoder.
"""
def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
arm_type, resize_mode, use_last_fuse):
super().__init__()
self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
cm_out_ch, cm_bin_sizes)
assert hasattr(layers,arm_type), \
"Not support arm_type ({})".format(arm_type)
arm_class = eval("layers." + arm_type)
self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32]
for i in range(len(backbone_out_chs)):
low_chs = backbone_out_chs[i]
high_ch = cm_out_ch if i == len(
backbone_out_chs) - 1 else arm_out_chs[i + 1]
out_ch = arm_out_chs[i]
arm = arm_class(
low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
self.arm_list.append(arm)
self.use_last_fuse = use_last_fuse
if self.use_last_fuse:
self.fuse_convs = nn.LayerList()
for i in range(1, len(arm_out_chs)):
conv = layers.SeparableConvBNReLU(
arm_out_chs[i],
arm_out_chs[0],
kernel_size=3,
bias_attr=False)
self.fuse_convs.append(conv)
self.last_conv = layers.SeparableConvBNReLU(
len(arm_out_chs) * arm_out_chs[0],
arm_out_chs[0],
kernel_size=3,
bias_attr=False)
def forward(self, in_feat_list):
"""
Args:
in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
x2, x4 and x8 are optional.
Returns:
out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
x2, x4 and x8 are optional.
The length of in_feat_list and out_feat_list are the same.
"""
high_feat = self.cm(in_feat_list[-1])
out_feat_list = []
for i in reversed(range(len(in_feat_list))):
low_feat = in_feat_list[i]
arm = self.arm_list[i]
high_feat = arm(low_feat, high_feat)
out_feat_list.insert(0, high_feat)
if self.use_last_fuse:
x_list = [out_feat_list[0]]
size = paddle.shape(out_feat_list[0])[2:]
for i, (x, conv
) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
x = conv(x)
x = F.interpolate(
x, size=size, mode='bilinear', align_corners=False)
x_list.append(x)
x = paddle.concat(x_list, axis=1)
x = self.last_conv(x)
out_feat_list[0] = x
return out_feat_list
class MobileContextModule(nn.Layer):
"""
Context Module for Mobile Model.
Args:
in_channels (int): The number of input channels to pyramid pooling module.
inter_channels (int): The number of inter channels to pyramid pooling module.
out_channels (int): The number of output channels after pyramid pooling module.
bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
align_corners (bool): An argument of F.interpolate. It should be set to False
when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
"""
def __init__(self,
in_channels,
inter_channels,
out_channels,
bin_sizes,
align_corners=False):
super().__init__()
self.stages = nn.LayerList([
self._make_stage(in_channels, inter_channels, size)
for size in bin_sizes
])
self.conv_out = layers.SeparableConvBNReLU(
in_channels=inter_channels,
out_channels=out_channels,
kernel_size=3,
bias_attr=False)
self.align_corners = align_corners
def _make_stage(self, in_channels, out_channels, size):
prior = nn.AdaptiveAvgPool2D(output_size=size)
conv = layers.ConvBNReLU(
in_channels=in_channels, out_channels=out_channels, kernel_size=1)
return nn.Sequential(prior, conv)
def forward(self, input):
out = None
input_shape = paddle.shape(input)[2:]
for stage in self.stages:
x = stage(input)
x = F.interpolate(
x,
input_shape,
mode='bilinear',
align_corners=self.align_corners)
if out is None:
out = x
else:
out += x
out = self.conv_out(out)
return out
class SegHead(nn.Layer):
def __init__(self, in_chan, mid_chan, n_classes):
super().__init__()
self.conv = layers.SeparableConvBNReLU(
in_chan, mid_chan, kernel_size=3, bias_attr=False)
self.conv_out = nn.Conv2D(
mid_chan, n_classes, kernel_size=1, bias_attr=False)
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save