Merge pull request #40 from Bobholamovic/update_ppseg

[Feat] Update ppseg and Add CondenseNet V2
2 years ago · ebceda8419
parent f403abcadd afec4186fe
commit ebceda8419
143 changed files with 8360 additions and 1965 deletions
--- a/docs/apis/train.md
+++ b/docs/apis/train.md
@ -34,7 +34,7 @@
 ### 初始化`BaseSegmenter`子类对象
- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数，分别表示输入通道数、输出类别数以及是否使用预置的混合损失。部分模型如`FarSeg`暂不支持对`in_channels`参数的设置。
+- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数，分别表示输入通道数、输出类别数以及是否使用预置的混合损失。
 - `use_mixed_loss`参将在未来被弃用，因此不建议使用。
 - 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典，其中`'types'`键和`'coef'`键对应的值为两个等长的列表，分别表示损失函数对象（一个可调用对象）和损失函数的权重。例如：`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数：`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`，其中`logits`和`labels`分别是模型输出和真值标签。
 - 不同的子类支持与模型相关的输入参数，详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。
--- a/docs/intro/model_zoo.md
+++ b/docs/intro/model_zoo.md
@ -20,18 +20,21 @@ PaddleRS目前已支持的全部模型如下（标注\*的为遥感专用模型
 | 变化检测 | \*FCCDN | 是 |
 | 变化检测 | \*SNUNet | 是 |
 | 变化检测 | \*STANet | 是 |
-| 场景分类 | CondenseNetV2 | 是 |
+| 场景分类 | CondenseNet V2 | 是 |
-| 场景分类 | HRNet | 是 |
+| 场景分类 | HRNet | 否 |
-| 场景分类 | MobileNetV3 | 是 |
+| 场景分类 | MobileNetV3 | 否 |
-| 场景分类 | ResNet50-vd | 是 |
+| 场景分类 | ResNet50-vd | 否 |
 | 图像复原 | DRN | 否 |
-| 图像复原 | ESRGAN | 否 |
+| 图像复原 | ESRGAN | 是 |
 | 图像复原 | LESRCNN | 否 |
-| 目标检测 | Faster R-CNN | 是 |
+| 目标检测 | Faster R-CNN | 否 |
-| 目标检测 | PP-YOLO | 是 |
+| 目标检测 | PP-YOLO | 否 |
-| 目标检测 | PP-YOLO Tiny | 是 |
+| 目标检测 | PP-YOLO Tiny | 否 |
-| 目标检测 | PP-YOLOv2 | 是 |
+| 目标检测 | PP-YOLOv2 | 否 |
-| 目标检测 | YOLOv3 | 是 |
+| 目标检测 | YOLOv3 | 否 |
 | 图像分割 | BiSeNet V2 | 是 |
 | 图像分割 | DeepLab V3+ | 是 |
-| 图像分割 | \*FarSeg | 否 |
+| 图像分割 | \*FarSeg | 是 |
 | 图像分割 | Fast-SCNN | 是 |
 | 图像分割 | HRNet | 是 |
 | 图像分割 | UNet | 是 |
--- a/examples/README.md
+++ b/examples/README.md
@ -53,3 +53,4 @@ PaddleRS提供从科学研究到产业应用的丰富示例，希望帮助遥感
 |[【官方】第十一届 “中国软件杯”百度遥感赛项：目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测，比赛基线|
 |[【十一届软件杯】遥感解译赛道：变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测，高分方案|
 |[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测，高分方案|
 |[遥感变化检测助力信贷场景下工程进度管控](https://aistudio.baidu.com/aistudio/projectdetail/4543160)|古代飞|产业范例|变化检测，金融风控|
--- a/examples/rs_research/config_utils.py
+++ b/examples/rs_research/config_utils.py
@ -133,6 +133,7 @@ def parse_args(*args, **kwargs):
    # Global settings
    parser.add_argument('cmd', choices=['train', 'eval'])
    parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
    parser.add_argument('--seed', type=int, default=None)
    # Data
    parser.add_argument('--datasets', type=dict, default={})
--- a/examples/rs_research/run_task.py
+++ b/examples/rs_research/run_task.py
@ -15,7 +15,9 @@
 # limitations under the License.
 import os
 import random
 import numpy as np
 # Import cv2 and sklearn before paddlers to solve the
 # "ImportError: dlopen: cannot load any more object with static TLS" issue.
 import cv2
@ -62,6 +64,11 @@ if __name__ == '__main__':
    cfg = parse_args()
    print(format_cfg(cfg))
    if cfg['seed'] is not None:
        random.seed(cfg['seed'])
        np.random.seed(cfg['seed'])
        paddle.seed(cfg['seed'])
    # Automatically download data
    if cfg['download_on']:
        paddlers.utils.download_and_decompress(
--- a/paddlers/deploy/predictor.py
+++ b/paddlers/deploy/predictor.py
@ -103,11 +103,11 @@ class Predictor(object):
            config.enable_use_gpu(200, gpu_id)
            config.switch_ir_optim(True)
            if use_trt:
-                if self._model.model_type == 'segmenter':
+                if self.model_type == 'segmenter':
                    logging.warning(
                        "Semantic segmentation models do not support TensorRT acceleration, "
                        "TensorRT is forcibly disabled.")
-                elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
+                elif self.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
                    logging.warning(
                        "RCNN models do not support TensorRT acceleration, "
                        "TensorRT is forcibly disabled.")
@ -150,30 +150,29 @@ class Predictor(object):
    def preprocess(self, images, transforms):
        preprocessed_samples = self._model.preprocess(
            images, transforms, to_tensor=False)
-        if self._model.model_type == 'classifier':
+        if self.model_type == 'classifier':
            preprocessed_samples = {'image': preprocessed_samples[0]}
-        elif self._model.model_type == 'segmenter':
+        elif self.model_type == 'segmenter':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'ori_shape': preprocessed_samples[1]
            }
-        elif self._model.model_type == 'detector':
+        elif self.model_type == 'detector':
            pass
-        elif self._model.model_type == 'change_detector':
+        elif self.model_type == 'change_detector':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'image2': preprocessed_samples[1],
                'ori_shape': preprocessed_samples[2]
            }
-        elif self._model.model_type == 'restorer':
+        elif self.model_type == 'restorer':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'tar_shape': preprocessed_samples[1]
            }
        else:
            logging.error(
-                "Invalid model type {}".format(self._model.model_type),
+                "Invalid model type {}".format(self.model_type), exit=True)
                exit=True)
        return preprocessed_samples
    def postprocess(self,
@ -182,7 +181,7 @@ class Predictor(object):
                    ori_shape=None,
                    tar_shape=None,
                    transforms=None):
-        if self._model.model_type == 'classifier':
+        if self.model_type == 'classifier':
            true_topk = min(self._model.num_classes, topk)
            if self._model.postprocess is None:
                self._model.build_postprocess_from_labels(topk)
@ -198,7 +197,7 @@ class Predictor(object):
                'scores_map': s,
                'label_names_map': n,
            } for l, s, n in zip(class_ids, scores, label_names)]
-        elif self._model.model_type in ('segmenter', 'change_detector'):
+        elif self.model_type in ('segmenter', 'change_detector'):
            label_map, score_map = self._model.postprocess(
                net_outputs,
                batch_origin_shape=ori_shape,
@ -207,13 +206,13 @@ class Predictor(object):
                'label_map': l,
                'score_map': s
            } for l, s in zip(label_map, score_map)]
-        elif self._model.model_type == 'detector':
+        elif self.model_type == 'detector':
            net_outputs = {
                k: v
                for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
            }
            preds = self._model.postprocess(net_outputs)
-        elif self._model.model_type == 'restorer':
+        elif self.model_type == 'restorer':
            res_maps = self._model.postprocess(
                net_outputs[0],
                batch_tar_shape=tar_shape,
@ -221,8 +220,7 @@ class Predictor(object):
            preds = [{'res_map': res_map} for res_map in res_maps]
        else:
            logging.error(
-                "Invalid model type {}.".format(self._model.model_type),
+                "Invalid model type {}.".format(self.model_type), exit=True)
                exit=True)
        return preds
@ -360,6 +358,12 @@ class Predictor(object):
            batch_size (int, optional): Batch size used in inference. Defaults to 1.
            quiet (bool, optional): If True, disable the progress bar. Defaults to False.
        """
        if self.model_type not in ('segmenter', 'change_detector'):
            raise RuntimeError(
                "Model type is {}, which does not support inference with sliding windows.".
                format(self.model_type))
        slider_predict(
            partial(
                self.predict, quiet=True),
@ -375,3 +379,7 @@ class Predictor(object):
    def batch_predict(self, image_list, **params):
        return self.predict(img_file=image_list, **params)
    @property
    def model_type(self):
        return self._model.model_type
--- a/paddlers/models/hash.txt
+++ b/paddlers/models/hash.txt
@ -0,0 +1 @@
 ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef
--- a/paddlers/models/ppseg/init.py
+++ b/paddlers/models/ppseg/init.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/core/infer.py
+++ b/paddlers/models/ppseg/core/infer.py
@ -21,88 +21,16 @@ import paddle
 import paddle.nn.functional as F
-def get_reverse_list(ori_shape, transforms):
+def reverse_transform(pred, trans_info, mode='nearest'):
    """
    get reverse list of transform.
    Args:
        ori_shape (list): Origin shape of image.
        transforms (list): List of transform.
    Returns:
        list: List of tuple, there are two format:
            ('resize', (h, w)) The image shape before resize,
            ('padding', (h, w)) The image shape before padding.
    """
    reverse_list = []
    h, w = ori_shape[0], ori_shape[1]
    for op in transforms:
        if op.__class__.__name__ in ['Resize']:
            reverse_list.append(('resize', (h, w)))
            h, w = op.target_size[0], op.target_size[1]
        if op.__class__.__name__ in ['ResizeByLong']:
            reverse_list.append(('resize', (h, w)))
            long_edge = max(h, w)
            short_edge = min(h, w)
            short_edge = int(round(short_edge * op.long_size / long_edge))
            long_edge = op.long_size
            if h > w:
                h = long_edge
                w = short_edge
            else:
                w = long_edge
                h = short_edge
        if op.__class__.__name__ in ['ResizeByShort']:
            reverse_list.append(('resize', (h, w)))
            long_edge = max(h, w)
            short_edge = min(h, w)
            long_edge = int(round(long_edge * op.short_size / short_edge))
            short_edge = op.short_size
            if h > w:
                h = long_edge
                w = short_edge
            else:
                w = long_edge
                h = short_edge
        if op.__class__.__name__ in ['Pad']:
            reverse_list.append(('padding', (h, w)))
            w, h = op.target_size[0], op.target_size[1]
        if op.__class__.__name__ in ['PadByAspectRatio']:
            reverse_list.append(('padding', (h, w)))
            ratio = w / h
            if ratio == op.aspect_ratio:
                pass
            elif ratio > op.aspect_ratio:
                h = int(w / op.aspect_ratio)
            else:
                w = int(h * op.aspect_ratio)
        if op.__class__.__name__ in ['LimitLong']:
            long_edge = max(h, w)
            short_edge = min(h, w)
            if ((op.max_long is not None) and (long_edge > op.max_long)):
                reverse_list.append(('resize', (h, w)))
                long_edge = op.max_long
                short_edge = int(round(short_edge * op.max_long / long_edge))
            elif ((op.min_long is not None) and (long_edge < op.min_long)):
                reverse_list.append(('resize', (h, w)))
                long_edge = op.min_long
                short_edge = int(round(short_edge * op.min_long / long_edge))
            if h > w:
                h = long_edge
                w = short_edge
            else:
                w = long_edge
                h = short_edge
    return reverse_list
 def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
    """recover pred to origin shape"""
    reverse_list = get_reverse_list(ori_shape, transforms)
    intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
    dtype = pred.dtype
-    for item in reverse_list[::-1]:
+    for item in trans_info[::-1]:
-        if item[0] == 'resize':
+        if isinstance(item[0], list):
            trans_mode = item[0][0]
        else:
            trans_mode = item[0]
        if trans_mode == 'resize':
            h, w = item[1][0], item[1][1]
            if paddle.get_device() == 'cpu' and dtype in intTypeList:
                pred = paddle.cast(pred, 'float32')
@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
                pred = paddle.cast(pred, dtype)
            else:
                pred = F.interpolate(pred, (h, w), mode=mode)
-        elif item[0] == 'padding':
+        elif trans_mode == 'padding':
            h, w = item[1][0], item[1][1]
            pred = pred[:, :, 0:h, 0:w]
        else:
@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):
 def inference(model,
              im,
-              ori_shape=None,
+              trans_info=None,
              transforms=None,
              is_slide=False,
              stride=None,
              crop_size=None):
@ -216,8 +143,7 @@ def inference(model,
    Args:
        model (paddle.nn.Layer): model to get logits of image.
        im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
+        trans_info (list): Image shape informating changed process. Default: None.
        transforms (list): Transforms for image.
        is_slide (bool): Whether to infer by sliding window. Default: False.
        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
@ -239,8 +165,8 @@ def inference(model,
        logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
    if hasattr(model, 'data_format') and model.data_format == 'NHWC':
        logit = logit.transpose((0, 3, 1, 2))
-    if ori_shape is not None:
+    if trans_info is not None:
-        logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
+        logit = reverse_transform(logit, trans_info, mode='bilinear')
        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
        return pred, logit
    else:
@ -249,8 +175,7 @@ def inference(model,
 def aug_inference(model,
                  im,
-                  ori_shape,
+                  trans_info,
                  transforms,
                  scales=1.0,
                  flip_horizontal=False,
                  flip_vertical=False,
@ -263,8 +188,7 @@ def aug_inference(model,
    Args:
        model (paddle.nn.Layer): model to get logits of image.
        im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
+        trans_info (list): Transforms for image.
        transforms (list): Transforms for image.
        scales (float|tuple|list):  Scales for resize. Default: 1.
        flip_horizontal (bool): Whether to flip horizontally. Default: False.
        flip_vertical (bool): Whether to flip vertically. Default: False.
@ -302,8 +226,7 @@ def aug_inference(model,
            logit = F.softmax(logit, axis=1)
            final_logit = final_logit + logit
-    final_logit = reverse_transform(
+    final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
        final_logit, ori_shape, transforms, mode='bilinear')
    pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
    return pred, final_logit
--- a/paddlers/models/ppseg/core/predict.py
+++ b/paddlers/models/ppseg/core/predict.py
@ -36,6 +36,15 @@ def partition_list(arr, m):
    return [arr[i:i + n] for i in range(0, len(arr), n)]
 def preprocess(im_path, transforms):
    data = {}
    data['img'] = im_path
    data = transforms(data)
    data['img'] = data['img'][np.newaxis, ...]
    data['img'] = paddle.to_tensor(data['img'])
    return data
 def predict(model,
            model_path,
            transforms,
@ -89,18 +98,13 @@ def predict(model,
    color_map = visualize.get_color_map_list(256, custom_color=custom_color)
    with paddle.no_grad():
        for i, im_path in enumerate(img_lists[local_rank]):
-            im = cv2.imread(im_path)
+            data = preprocess(im_path, transforms)
            ori_shape = im.shape[:2]
            im, _ = transforms(im)
            im = im[np.newaxis, ...]
            im = paddle.to_tensor(im)
            if aug_pred:
                pred, _ = infer.aug_inference(
                    model,
-                    im,
+                    data['img'],
-                    ori_shape=ori_shape,
+                    trans_info=data['trans_info'],
                    transforms=transforms.transforms,
                    scales=scales,
                    flip_horizontal=flip_horizontal,
                    flip_vertical=flip_vertical,
@ -110,9 +114,8 @@ def predict(model,
            else:
                pred, _ = infer.inference(
                    model,
-                    im,
+                    data['img'],
-                    ori_shape=ori_shape,
+                    trans_info=data['trans_info'],
                    transforms=transforms.transforms,
                    is_slide=is_slide,
                    stride=stride,
                    crop_size=crop_size)
@ -141,9 +144,4 @@ def predict(model,
            mkdir(pred_saved_path)
            pred_mask.save(pred_saved_path)
            # pred_im = utils.visualize(im_path, pred, weight=0.0)
            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
            # mkdir(pred_saved_path)
            # cv2.imwrite(pred_saved_path, pred_im)
            progbar_pred.update(i + 1)
--- a/paddlers/models/ppseg/core/train.py
+++ b/paddlers/models/ppseg/core/train.py
@ -35,17 +35,15 @@ def check_logits_losses(logits_list, losses):
            .format(len_logits, len_losses))
-def loss_computation(logits_list, labels, losses, edges=None):
+def loss_computation(logits_list, labels, edges, losses):
    check_logits_losses(logits_list, losses)
    loss_list = []
    for i in range(len(logits_list)):
        logits = logits_list[i]
        loss_i = losses['types'][i]
        coef_i = losses['coef'][i]
-
+        if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
-        if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
+            # Use edges as labels According to loss type.
                                         ) and loss_i.edge_label:
            # If use edges as labels According to loss type.
            loss_list.append(coef_i * loss_i(logits, edges))
        elif loss_i.__class__.__name__ == 'MixedLoss':
            mixed_loss_list = loss_i(logits, labels)
@ -75,13 +73,14 @@ def train(model,
          keep_checkpoint_max=5,
          test_config=None,
          precision='fp32',
          amp_level='O1',
          profiler_options=None,
          to_static_training=False):
    """
    Launch training.
    Args:
-        model（nn.Layer): A sementic segmentation model.
+        model（nn.Layer): A semantic segmentation model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
@ -98,6 +97,9 @@ def train(model,
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
        test_config(dict, optional): Evaluation config.
        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, 
            the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators 
            parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
        profiler_options (str, optional): The option of train profiler.
        to_static_training (bool, optional): Whether to use @to_static for training.
    """
@ -112,7 +114,18 @@ def train(model,
    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
-        os.makedirs(save_dir)
+        os.makedirs(save_dir, exist_ok=True)
    # use amp
    if precision == 'fp16':
        logger.info('use AMP to train. AMP level = {}'.format(amp_level))
        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
        if amp_level == 'O2':
            model, optimizer = paddle.amp.decorate(
                models=model,
                optimizers=optimizer,
                level='O2',
                save_dtype='float32')
    if nranks > 1:
        paddle.distributed.fleet.init(is_collective=True)
@ -130,18 +143,13 @@ def train(model,
        return_list=True,
        worker_init_fn=worker_init_fn, )
    # use amp
    if precision == 'fp16':
        logger.info('use amp to train')
        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)
    if to_static_training:
        model = paddle.jit.to_static(model)
-        logger.info("Successfully to apply @to_static")
+        logger.info("Successfully applied @to_static")
    avg_loss = 0.0
    avg_loss_list = []
@ -164,30 +172,29 @@ def train(model,
                else:
                    break
            reader_cost_averager.record(time.time() - batch_start)
-            images = data[0]
+            images = data['img']
-            labels = data[1].astype('int64')
+            labels = data['label'].astype('int64')
            edges = None
-            if len(data) == 3:
+            if 'edge' in data.keys():
-                edges = data[2].astype('int64')
+                edges = data['edge'].astype('int64')
            if hasattr(model, 'data_format') and model.data_format == 'NHWC':
                images = images.transpose((0, 2, 3, 1))
            if precision == 'fp16':
                with paddle.amp.auto_cast(
                        level=amp_level,
                        enable=True,
                        custom_white_list={
                            "elementwise_add", "batch_norm", "sync_batch_norm"
                        },
                        custom_black_list={'bilinear_interp_v2'}):
-                    if nranks > 1:
+                    logits_list = ddp_model(images) if nranks > 1 else model(
-                        logits_list = ddp_model(images)
+                        images)
                    else:
                        logits_list = model(images)
                    loss_list = loss_computation(
                        logits_list=logits_list,
                        labels=labels,
-                        losses=losses,
+                        edges=edges,
-                        edges=edges)
+                        losses=losses)
                    loss = sum(loss_list)
                scaled = scaler.scale(loss)  # scale the loss
@ -197,15 +204,12 @@ def train(model,
                else:
                    scaler.minimize(optimizer, scaled)  # update parameters
            else:
-                if nranks > 1:
+                logits_list = ddp_model(images) if nranks > 1 else model(images)
                    logits_list = ddp_model(images)
                else:
                    logits_list = model(images)
                loss_list = loss_computation(
                    logits_list=logits_list,
                    labels=labels,
-                    losses=losses,
+                    edges=edges,
-                    edges=edges)
+                    losses=losses)
                loss = sum(loss_list)
                loss.backward()
                # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
@ -278,7 +282,12 @@ def train(model,
                    test_config = {}
                mean_iou, acc, _, _, _ = evaluate(
-                    model, val_dataset, num_workers=num_workers, **test_config)
+                    model,
                    val_dataset,
                    num_workers=num_workers,
                    precision=precision,
                    amp_level=amp_level,
                    **test_config)
                model.train()
@ -314,7 +323,7 @@ def train(model,
            batch_start = time.time()
    # Calculate flops.
-    if local_rank == 0:
+    if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
        _, c, h, w = images.shape
        _ = paddle.flops(
            model, [1, c, h, w],
--- a/paddlers/models/ppseg/core/val.py
+++ b/paddlers/models/ppseg/core/val.py
@ -34,6 +34,8 @@ def evaluate(model,
             is_slide=False,
             stride=None,
             crop_size=None,
             precision='fp32',
             amp_level='O1',
             num_workers=0,
             print_detail=True,
             auc_roc=False):
@ -41,7 +43,7 @@ def evaluate(model,
    Launch evalution.
    Args:
-        model（nn.Layer): A sementic segmentation model.
+        model（nn.Layer): A semantic segmentation model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
        scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
@ -52,6 +54,8 @@ def evaluate(model,
            It should be provided when `is_slide` is True.
        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
            It should be provided when `is_slide` is True.
        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
        num_workers (int, optional): Num workers for data loader. Default: 0.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
        auc_roc(bool, optional): whether add auc_roc metric
@ -93,32 +97,66 @@ def evaluate(model,
    batch_cost_averager = TimeAverager()
    batch_start = time.time()
    with paddle.no_grad():
-        for iter, (im, label) in enumerate(loader):
+        for iter, data in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)
-            label = label.astype('int64')
+            label = data['label'].astype('int64')
            ori_shape = label.shape[-2:]
            if aug_eval:
-                pred, logits = infer.aug_inference(
+                if precision == 'fp16':
-                    model,
+                    with paddle.amp.auto_cast(
-                    im,
+                            level=amp_level,
-                    ori_shape=ori_shape,
+                            enable=True,
-                    transforms=eval_dataset.transforms.transforms,
+                            custom_white_list={
-                    scales=scales,
+                                "elementwise_add", "batch_norm",
-                    flip_horizontal=flip_horizontal,
+                                "sync_batch_norm"
-                    flip_vertical=flip_vertical,
+                            },
-                    is_slide=is_slide,
+                            custom_black_list={'bilinear_interp_v2'}):
-                    stride=stride,
+                        pred, logits = infer.aug_inference(
-                    crop_size=crop_size)
+                            model,
                            data['img'],
                            trans_info=data['trans_info'],
                            scales=scales,
                            flip_horizontal=flip_horizontal,
                            flip_vertical=flip_vertical,
                            is_slide=is_slide,
                            stride=stride,
                            crop_size=crop_size)
                else:
                    pred, logits = infer.aug_inference(
                        model,
                        data['img'],
                        trans_info=data['trans_info'],
                        scales=scales,
                        flip_horizontal=flip_horizontal,
                        flip_vertical=flip_vertical,
                        is_slide=is_slide,
                        stride=stride,
                        crop_size=crop_size)
            else:
-                pred, logits = infer.inference(
+                if precision == 'fp16':
-                    model,
+                    with paddle.amp.auto_cast(
-                    im,
+                            level=amp_level,
-                    ori_shape=ori_shape,
+                            enable=True,
-                    transforms=eval_dataset.transforms.transforms,
+                            custom_white_list={
-                    is_slide=is_slide,
+                                "elementwise_add", "batch_norm",
-                    stride=stride,
+                                "sync_batch_norm"
-                    crop_size=crop_size)
+                            },
                            custom_black_list={'bilinear_interp_v2'}):
                        pred, logits = infer.inference(
                            model,
                            data['img'],
                            trans_info=data['trans_info'],
                            is_slide=is_slide,
                            stride=stride,
                            crop_size=crop_size)
                else:
                    pred, logits = infer.inference(
                        model,
                        data['img'],
                        trans_info=data['trans_info'],
                        is_slide=is_slide,
                        stride=stride,
                        crop_size=crop_size)
            intersect_area, pred_area, label_area = metrics.calculate_area(
                pred,
@ -175,12 +213,12 @@ def evaluate(model,
            batch_cost_averager.reset()
            batch_start = time.time()
-    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
+    metrics_input = (intersect_area_all, pred_area_all, label_area_all)
-                                       label_area_all)
+    class_iou, miou = metrics.mean_iou(*metrics_input)
-    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
+    acc, class_precision, class_recall = metrics.class_measurement(
-    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
+        *metrics_input)
-    class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
+    kappa = metrics.kappa(*metrics_input)
-                                     label_area_all)
+    class_dice, mdice = metrics.dice(*metrics_input)
    if auc_roc:
        auc_roc = metrics.auc_roc(
@ -193,5 +231,7 @@ def evaluate(model,
        infor = infor + auc_infor if auc_roc else infor
        logger.info(infor)
        logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
-        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
+        logger.info("[EVAL] Class Precision: \n" + str(
-    return miou, acc, class_iou, class_acc, kappa
+            np.round(class_precision, 4)))
        logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
    return miou, acc, class_iou, class_precision, kappa
--- a/paddlers/models/ppseg/cvlibs/config.py
+++ b/paddlers/models/ppseg/cvlibs/config.py
@ -15,9 +15,12 @@
 import codecs
 import os
 from typing import Any, Dict, Generic
 import warnings
 from ast import literal_eval
 import paddle
 import yaml
 import six
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import logger
@ -69,7 +72,8 @@ class Config(object):
                 path: str,
                 learning_rate: float=None,
                 batch_size: int=None,
-                 iters: int=None):
+                 iters: int=None,
                 opts: list=None):
        if not path:
            raise ValueError('Please specify the configuration file path.')
@ -84,7 +88,18 @@ class Config(object):
            raise RuntimeError('Config file should in yaml format!')
        self.update(
-            learning_rate=learning_rate, batch_size=batch_size, iters=iters)
+            learning_rate=learning_rate,
            batch_size=batch_size,
            iters=iters,
            opts=opts)
        model_cfg = self.dic.get('model', None)
        if model_cfg is None:
            raise RuntimeError('No model specified in the configuration file.')
        if (not self.train_dataset_config) and (not self.val_dataset_config):
            raise ValueError(
                'One of `train_dataset` or `val_dataset should be given, but there are none.'
            )
    def _update_dic(self, dic, base_dic):
        """
@ -121,7 +136,8 @@ class Config(object):
    def update(self,
               learning_rate: float=None,
               batch_size: int=None,
-               iters: int=None):
+               iters: int=None,
               opts: list=None):
        '''Update config'''
        if learning_rate:
            if 'lr_scheduler' in self.dic:
@ -135,6 +151,27 @@ class Config(object):
        if iters:
            self.dic['iters'] = iters
        # fix parameters by --opts of command
        if opts is not None:
            if len(opts) % 2 != 0 or len(opts) == 0:
                raise ValueError(
                    "Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
                    format(opts))
            for key, value in zip(opts[0::2], opts[1::2]):
                if isinstance(value, six.string_types):
                    try:
                        value = literal_eval(value)
                    except ValueError:
                        pass
                    except SyntaxError:
                        pass
                key_list = key.split('.')
                dic = self.dic
                for subkey in key_list[:-1]:
                    dic.setdefault(subkey, dict())
                    dic = dic[subkey]
                dic[key_list[-1]] = value
    @property
    def batch_size(self) -> int:
        return self.dic.get('batch_size', 1)
@ -153,13 +190,32 @@ class Config(object):
                'No `lr_scheduler` specified in the configuration file.')
        params = self.dic.get('lr_scheduler')
        use_warmup = False
        if 'warmup_iters' in params:
            use_warmup = True
            warmup_iters = params.pop('warmup_iters')
            assert 'warmup_start_lr' in params, \
                "When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
            warmup_start_lr = params.pop('warmup_start_lr')
            end_lr = params['learning_rate']
        lr_type = params.pop('type')
        if lr_type == 'PolynomialDecay':
-            params.setdefault('decay_steps', self.iters)
+            iters = self.iters - warmup_iters if use_warmup else self.iters
            iters = max(iters, 1)
            params.setdefault('decay_steps', iters)
            params.setdefault('end_lr', 0)
            params.setdefault('power', 0.9)
        lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
        if use_warmup:
            lr_sche = paddle.optimizer.lr.LinearWarmup(
                learning_rate=lr_sche,
                warmup_steps=warmup_iters,
                start_lr=warmup_start_lr,
                end_lr=end_lr)
-        return getattr(paddle.optimizer.lr, lr_type)(**params)
+        return lr_sche
    @property
    def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
@ -202,15 +258,33 @@ class Config(object):
        args = self.optimizer_args
        optimizer_type = args.pop('type')
        params = self.model.parameters()
        if 'backbone_lr_mult' in args:
            if not hasattr(self.model, 'backbone'):
                logger.warning('The backbone_lr_mult is not effective because'
                               ' the model does not have backbone')
            else:
                backbone_lr_mult = args.pop('backbone_lr_mult')
                backbone_params = self.model.backbone.parameters()
                backbone_params_id = [id(x) for x in backbone_params]
                other_params = [
                    x for x in params if id(x) not in backbone_params_id
                ]
                params = [{
                    'params': backbone_params,
                    'learning_rate': backbone_lr_mult
                }, {
                    'params': other_params
                }]
        if optimizer_type == 'sgd':
-            return paddle.optimizer.Momentum(
+            return paddle.optimizer.Momentum(lr, parameters=params, **args)
                lr, parameters=self.model.parameters(), **args)
        elif optimizer_type == 'adam':
-            return paddle.optimizer.Adam(
+            return paddle.optimizer.Adam(lr, parameters=params, **args)
                lr, parameters=self.model.parameters(), **args)
        elif optimizer_type in paddle.optimizer.__all__:
-            return getattr(paddle.optimizer, optimizer_type)(
+            return getattr(paddle.optimizer, optimizer_type)(lr,
-                lr, parameters=self.model.parameters(), **args)
+                                                             parameters=params,
                                                             **args)
        raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
@ -295,24 +369,6 @@ class Config(object):
    @property
    def model(self) -> paddle.nn.Layer:
        model_cfg = self.dic.get('model').copy()
        if not model_cfg:
            raise RuntimeError('No model specified in the configuration file.')
        if not 'num_classes' in model_cfg:
            num_classes = None
            if self.train_dataset_config:
                if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
                    num_classes = self.train_dataset_class.NUM_CLASSES
                elif hasattr(self.train_dataset, 'num_classes'):
                    num_classes = self.train_dataset.num_classes
            elif self.val_dataset_config:
                if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
                    num_classes = self.val_dataset_class.NUM_CLASSES
                elif hasattr(self.val_dataset, 'num_classes'):
                    num_classes = self.val_dataset.num_classes
            if num_classes is not None:
                model_cfg['num_classes'] = num_classes
        if not self._model:
            self._model = self._load_object(model_cfg)
        return self._model
@ -401,3 +457,94 @@ class Config(object):
    def __str__(self) -> str:
        return yaml.dump(self.dic)
    @property
    def val_transforms(self) -> list:
        """Get val_transform from val_dataset"""
        _val_dataset = self.val_dataset_config
        if not _val_dataset:
            return []
        _transforms = _val_dataset.get('transforms', [])
        transforms = []
        for i in _transforms:
            transforms.append(self._load_object(i))
        return transforms
    def check_sync_info(self) -> None:
        """
        Check and sync the info, such as num_classes and img_channels, 
        between the config of model, train_dataset and val_dataset.
        """
        self._check_sync_num_classes()
        self._check_sync_img_channels()
    def _check_sync_num_classes(self):
        num_classes_set = set()
        if self.dic['model'].get('num_classes', None) is not None:
            num_classes_set.add(self.dic['model'].get('num_classes'))
        if self.train_dataset_config:
            if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
                num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
            elif 'num_classes' in self.train_dataset_config:
                num_classes_set.add(self.train_dataset_config['num_classes'])
        if self.val_dataset_config:
            if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
                num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
            elif 'num_classes' in self.val_dataset_config:
                num_classes_set.add(self.val_dataset_config['num_classes'])
        if len(num_classes_set) == 0:
            raise ValueError(
                '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
            )
        elif len(num_classes_set) > 1:
            raise ValueError(
                '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
                .format(num_classes_set))
        num_classes = num_classes_set.pop()
        self.dic['model']['num_classes'] = num_classes
        if self.train_dataset_config and \
            (not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
            self.dic['train_dataset']['num_classes'] = num_classes
        if self.val_dataset_config and \
            (not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
            self.dic['val_dataset']['num_classes'] = num_classes
    def _check_sync_img_channels(self):
        img_channels_set = set()
        model_cfg = self.dic['model']
        # If the model has backbone, in_channels is the input params of backbone.
        # Otherwise, in_channels is the input params of the model.
        if 'backbone' in model_cfg:
            x = model_cfg['backbone'].get('in_channels', None)
            if x is not None:
                img_channels_set.add(x)
        elif model_cfg.get('in_channels', None) is not None:
            img_channels_set.add(model_cfg.get('in_channels'))
        if self.train_dataset_config and \
            ('img_channels' in self.train_dataset_config):
            img_channels_set.add(self.train_dataset_config['img_channels'])
        if self.val_dataset_config and \
            ('img_channels' in self.val_dataset_config):
            img_channels_set.add(self.val_dataset_config['img_channels'])
        if len(img_channels_set) > 1:
            raise ValueError(
                '`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
                .format(img_channels_set))
        img_channels = 3 if len(img_channels_set) == 0 \
            else img_channels_set.pop()
        if 'backbone' in model_cfg:
            self.dic['model']['backbone']['in_channels'] = img_channels
        else:
            self.dic['model']['in_channels'] = img_channels
        if self.train_dataset_config and \
            self.train_dataset_config['type'] == "Dataset":
            self.dic['train_dataset']['img_channels'] = img_channels
        if self.val_dataset_config and \
            self.val_dataset_config['type'] == "Dataset":
            self.dic['val_dataset']['img_channels'] = img_channels
--- a/paddlers/models/ppseg/cvlibs/param_init.py
+++ b/paddlers/models/ppseg/cvlibs/param_init.py
@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):
    initializer = nn.initializer.KaimingUniform(**kwargs)
    initializer(param, param.block)
 def xavier_uniform(param, **kwargs):
    r"""
    This implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.
    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is [-x, x], where
    .. math::
        x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
    Args:
        param (Tensor): Tensor that needs to be initialized.
    Examples:
        from paddlers.models.ppseg.cvlibs import param_init
        import paddle.nn as nn
        linear = nn.Linear(2, 4)
        param_init.xavier_uniform(linear.weight)
    """
    initializer = nn.initializer.XavierUniform(**kwargs)
    initializer(param, param.block)
--- a/paddlers/models/ppseg/datasets/init.py
+++ b/paddlers/models/ppseg/datasets/init.py
@ -27,3 +27,4 @@ from .drive import DRIVE
 from .hrf import HRF
 from .chase_db1 import CHASEDB1
 from .pp_humanseg14k import PPHumanSeg14K
 from .pssl import PSSLDataset
--- a/paddlers/models/ppseg/datasets/ade.py
+++ b/paddlers/models/ppseg/datasets/ade.py
@ -89,23 +89,31 @@ class ADE20K(Dataset):
            self.file_list.append([img_path, label_path])
    def __getitem__(self, idx):
        data = {}
        data['trans_info'] = []
        image_path, label_path = self.file_list[idx]
        data['img'] = image_path
        data['gt_fields'] = [
        ]  # If key in gt_fields, the data[key] have transforms synchronous.
        if self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
+            data = self.transforms(data)
            label = np.asarray(Image.open(label_path))
            # The class 0 is ignored. And it will equal to 255 after
            # subtracted 1, because the dtype of label is uint8.
            label = label - 1
            label = label[np.newaxis, :, :]
-            return im, label
+            data['label'] = label
            return data
        else:
-            im, label = self.transforms(im=image_path, label=label_path)
+            data['label'] = label_path
-            label = label - 1
+            data['gt_fields'].append('label')
            data = self.transforms(data)
            data['label'] = data['label'] - 1
            # Recover the ignore pixels adding by transform
-            label[label == 254] = 255
+            data['label'][data['label'] == 254] = 255
            if self.edge:
                edge_mask = F.mask_to_binary_edge(
                    label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
+                data['edge'] = edge_mask
-            else:
+            return data
                return im, label
--- a/paddlers/models/ppseg/datasets/dataset.py
+++ b/paddlers/models/ppseg/datasets/dataset.py
@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):
        Examples:
-            import paddlers.models.ppseg.transforms as T
+            import paddlers.models.ppseg as ppseg.transforms as T
            from paddlers.models.ppseg.datasets import Dataset
-            transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
+            transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
            dataset_root = 'dataset_root_path'
            train_path = 'train_path'
            num_classes = 2
@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
    """
    def __init__(self,
-                 transforms,
+                 mode,
                 dataset_root,
                 transforms,
                 num_classes,
-                 mode='train',
+                 img_channels=3,
                 train_path=None,
                 val_path=None,
                 test_path=None,
@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
                 ignore_index=255,
                 edge=False):
        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
+        self.transforms = Compose(transforms, img_channels=img_channels)
        self.file_list = list()
        self.mode = mode.lower()
        self.num_classes = num_classes
        self.img_channels = img_channels
        self.ignore_index = ignore_index
        self.edge = edge
@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
            raise ValueError(
                "mode should be 'train', 'val' or 'test', but got {}.".format(
                    self.mode))
        if self.transforms is None:
            raise ValueError("`transforms` is necessary, but it is None.")
        if not os.path.exists(self.dataset_root):
            raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
                self.dataset_root))
        if self.transforms is None:
            raise ValueError("`transforms` is necessary, but it is None.")
        if num_classes < 1:
            raise ValueError(
                "`num_classes` should be greater than 1, but got {}".format(
                    num_classes))
        if img_channels not in [1, 3]:
            raise ValueError("`img_channels` should in [1, 3], but got {}".
                             format(img_channels))
        if self.mode == 'train':
            if train_path is None:
@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
                self.file_list.append([image_path, label_path])
    def __getitem__(self, idx):
        data = {}
        data['trans_info'] = []
        image_path, label_path = self.file_list[idx]
-        if self.mode == 'test':
+        data['img'] = image_path
-            im, _ = self.transforms(im=image_path)
+        data['label'] = label_path
-            im = im[np.newaxis, ...]
+        # If key in gt_fields, the data[key] have transforms synchronous.
-            return im, image_path
+        data['gt_fields'] = []
-        elif self.mode == 'val':
+        if self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
+            data = self.transforms(data)
-            label = np.asarray(Image.open(label_path))
+            data['label'] = data['label'][np.newaxis, :, :]
-            label = label[np.newaxis, :, :]
+
            return im, label
        else:
-            im, label = self.transforms(im=image_path, label=label_path)
+            data['gt_fields'].append('label')
            data = self.transforms(data)
            if self.edge:
                edge_mask = F.mask_to_binary_edge(
-                    label, radius=2, num_classes=self.num_classes)
+                    data['label'], radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
+                data['edge'] = edge_mask
-            else:
+        return data
                return im, label
    def __len__(self):
        return len(self.file_list)
--- a/paddlers/models/ppseg/datasets/pssl.py
+++ b/paddlers/models/ppseg/datasets/pssl.py
@ -0,0 +1,135 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import numpy as np
 from paddlers.models.ppseg.datasets import Dataset
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.transforms import Compose
@manager.DATASETS.add_component
 class PSSLDataset(Dataset):
    """
    The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
    is computed by the Consensus explanation algorithm.
    The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation 
    Models" (https://arxiv.org/abs/2207.03335). 
    The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification 
    Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
    To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
    as follows:
        imagenet_root
        |
        |--train
        |  |--n01440764
        |  |  |--n01440764_10026.JPEG
        |  |  |--...
        |  |--nxxxxxxxx
        |  |--...
    where only the "train" set is needed.
    The PSSL dataset has the folder structure as follows:
        pssl_root
        |
        |--train
        |  |--n01440764
        |  |  |--n01440764_10026.JPEG_eiseg.npz
        |  |  |--...
        |  |--nxxxxxxxx
        |  |--...
        |
        |--imagenet_lsvrc_2015_synsets.txt
        |--train.txt
    where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
    Args:
        transforms (list): Transforms for image.
        imagenet_root (str): The path to the original ImageNet dataset.
        pssl_root (str): The path to the PSSL dataset.
        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
        edge (bool, optional): Whether to compute edge while training. Default: False.
    """
    ignore_index = 1001  # 0~999 is target class, 1000 is bg
    NUM_CLASSES = 1001  # consider target class and bg
    def __init__(self,
                 transforms,
                 imagenet_root,
                 pssl_root,
                 mode='train',
                 edge=False):
        mode = mode.lower()
        if mode not in ['train']:
            raise ValueError("mode should be 'train', but got {}.".format(mode))
        if transforms is None:
            raise ValueError("`transforms` is necessary, but it is None.")
        self.transforms = Compose(transforms)
        self.mode = mode
        self.edge = edge
        self.num_classes = self.NUM_CLASSES
        self.ignore_index = self.num_classes  # 1001
        self.file_list = []
        self.class_id_dict = {}
        if imagenet_root is None or not os.path.isdir(pssl_root):
            raise ValueError(
                "The dataset is not Found or the folder structure is nonconfoumance."
            )
        train_list_file = os.path.join(pssl_root, "train.txt")
        if not os.path.exists(train_list_file):
            raise ValueError("Train list file isn't exists.")
        for idx, line in enumerate(open(train_list_file)):
            # line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
            label_path = line.strip()
            img_path = label_path.split('.JPEG')[0] + '.JPEG'
            label_path = os.path.join(pssl_root, label_path)
            img_path = os.path.join(imagenet_root, img_path)
            self.file_list.append([img_path, label_path])
        # mapping class name to class id.
        class_id_file = os.path.join(pssl_root,
                                     "imagenet_lsvrc_2015_synsets.txt")
        if not os.path.exists(class_id_file):
            raise ValueError("Class id file isn't exists.")
        for idx, line in enumerate(open(class_id_file)):
            class_name = line.strip()
            self.class_id_dict[class_name] = idx
    def __getitem__(self, idx):
        image_path, label_path = self.file_list[idx]
        # transform label
        class_name = (image_path.split('/')[-1]).split('_')[0]
        class_id = self.class_id_dict[class_name]
        pssl_seg = np.load(label_path)['arr_0']
        gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
        # [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
        gt_semantic_seg[pssl_seg == 1] = class_id
        im, label = self.transforms(im=image_path, label=gt_semantic_seg)
        return im, label
--- a/paddlers/models/ppseg/models/init.py
+++ b/paddlers/models/ppseg/models/init.py
@ -49,9 +49,18 @@ from .segnet import SegNet
 from .encnet import ENCNet
 from .hrnet_contrast import HRNetW48Contrast
 from .espnet import ESPNetV2
 from .pp_liteseg import PPLiteSeg
 from .dmnet import DMNet
 from .espnetv1 import ESPNetV1
 from .enet import ENet
 from .bisenetv1 import BiseNetV1
 from .fastfcn import FastFCN
 from .pfpnnet import PFPNNet
 from .glore import GloRe
 from .ddrnet import DDRNet_23
 from .ccnet import CCNet
 from .mobileseg import MobileSeg
 from .upernet import UPerNet
 from .sinet import SINet
 from .lraspp import LRASPP
 from .topformer import TopFormer
--- a/paddlers/models/ppseg/models/attention_unet.py
+++ b/paddlers/models/ppseg/models/attention_unet.py
@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):
    Args:
        num_classes (int): The unique number of target classes.
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super().__init__()
-        n_channels = 3
+        self.encoder = Encoder(in_channels, [64, 128, 256, 512])
        self.encoder = Encoder(n_channels, [64, 128, 256, 512])
        filters = np.array([64, 128, 256, 512, 1024])
        self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
        self.att5 = AttentionBlock(
--- a/paddlers/models/ppseg/models/backbones/init.py
+++ b/paddlers/models/ppseg/models/backbones/init.py
@ -21,3 +21,7 @@ from .swin_transformer import *
 from .mobilenetv2 import *
 from .mix_transformer import *
 from .stdcnet import *
 from .lite_hrnet import *
 from .shufflenetv2 import *
 from .ghostnet import *
 from .top_transformer import *
--- a/paddlers/models/ppseg/models/backbones/ghostnet.py
+++ b/paddlers/models/ppseg/models/backbones/ghostnet.py
@ -0,0 +1,318 @@
 # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
 import math
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import Uniform, KaimingNormal
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import utils, logger
 __all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 groups=1,
                 act="relu",
                 name=None):
        super(ConvBNLayer, self).__init__()
        self._conv = Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=(kernel_size - 1) // 2,
            groups=groups,
            weight_attr=ParamAttr(
                initializer=KaimingNormal(), name=name + "_weights"),
            bias_attr=False)
        bn_name = name + "_bn"
        self._batch_norm = BatchNorm(
            num_channels=out_channels,
            act=act,
            param_attr=ParamAttr(
                name=bn_name + "_scale", regularizer=L2Decay(0.0)),
            bias_attr=ParamAttr(
                name=bn_name + "_offset", regularizer=L2Decay(0.0)),
            moving_mean_name=bn_name + "_mean",
            moving_variance_name=bn_name + "_variance")
    def forward(self, inputs):
        y = self._conv(inputs)
        y = self._batch_norm(y)
        return y
 class SEBlock(nn.Layer):
    def __init__(self, num_channels, reduction_ratio=4, name=None):
        super(SEBlock, self).__init__()
        self.pool2d_gap = AdaptiveAvgPool2D(1)
        self._num_channels = num_channels
        stdv = 1.0 / math.sqrt(num_channels * 1.0)
        med_ch = num_channels // reduction_ratio
        self.squeeze = Linear(
            num_channels,
            med_ch,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
            bias_attr=ParamAttr(name=name + "_1_offset"))
        stdv = 1.0 / math.sqrt(med_ch * 1.0)
        self.excitation = Linear(
            med_ch,
            num_channels,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
            bias_attr=ParamAttr(name=name + "_2_offset"))
    def forward(self, inputs):
        pool = self.pool2d_gap(inputs)
        pool = paddle.squeeze(pool, axis=[2, 3])
        squeeze = self.squeeze(pool)
        squeeze = F.relu(squeeze)
        excitation = self.excitation(squeeze)
        excitation = paddle.clip(x=excitation, min=0, max=1)
        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
        out = paddle.multiply(inputs, excitation)
        return out
 class GhostModule(nn.Layer):
    def __init__(self,
                 in_channels,
                 output_channels,
                 kernel_size=1,
                 ratio=2,
                 dw_size=3,
                 stride=1,
                 relu=True,
                 name=None):
        super(GhostModule, self).__init__()
        init_channels = int(math.ceil(output_channels / ratio))
        new_channels = int(init_channels * (ratio - 1))
        self.primary_conv = ConvBNLayer(
            in_channels=in_channels,
            out_channels=init_channels,
            kernel_size=kernel_size,
            stride=stride,
            groups=1,
            act="relu" if relu else None,
            name=name + "_primary_conv")
        self.cheap_operation = ConvBNLayer(
            in_channels=init_channels,
            out_channels=new_channels,
            kernel_size=dw_size,
            stride=1,
            groups=init_channels,
            act="relu" if relu else None,
            name=name + "_cheap_operation")
    def forward(self, inputs):
        x = self.primary_conv(inputs)
        y = self.cheap_operation(x)
        out = paddle.concat([x, y], axis=1)
        return out
 class GhostBottleneck(nn.Layer):
    def __init__(self,
                 in_channels,
                 hidden_dim,
                 output_channels,
                 kernel_size,
                 stride,
                 use_se,
                 name=None):
        super(GhostBottleneck, self).__init__()
        self._stride = stride
        self._use_se = use_se
        self._num_channels = in_channels
        self._output_channels = output_channels
        self.ghost_module_1 = GhostModule(
            in_channels=in_channels,
            output_channels=hidden_dim,
            kernel_size=1,
            stride=1,
            relu=True,
            name=name + "_ghost_module_1")
        if stride == 2:
            self.depthwise_conv = ConvBNLayer(
                in_channels=hidden_dim,
                out_channels=hidden_dim,
                kernel_size=kernel_size,
                stride=stride,
                groups=hidden_dim,
                act=None,
                name=name +
                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
            )
        if use_se:
            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
        self.ghost_module_2 = GhostModule(
            in_channels=hidden_dim,
            output_channels=output_channels,
            kernel_size=1,
            relu=False,
            name=name + "_ghost_module_2")
        if stride != 1 or in_channels != output_channels:
            self.shortcut_depthwise = ConvBNLayer(
                in_channels=in_channels,
                out_channels=in_channels,
                kernel_size=kernel_size,
                stride=stride,
                groups=in_channels,
                act=None,
                name=name +
                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
            )
            self.shortcut_conv = ConvBNLayer(
                in_channels=in_channels,
                out_channels=output_channels,
                kernel_size=1,
                stride=1,
                groups=1,
                act=None,
                name=name + "_shortcut_conv")
    def forward(self, inputs):
        x = self.ghost_module_1(inputs)
        if self._stride == 2:
            x = self.depthwise_conv(x)
        if self._use_se:
            x = self.se_block(x)
        x = self.ghost_module_2(x)
        if self._stride == 1 and self._num_channels == self._output_channels:
            shortcut = inputs
        else:
            shortcut = self.shortcut_depthwise(inputs)
            shortcut = self.shortcut_conv(shortcut)
        return paddle.add(x=x, y=shortcut)
 class GhostNet(nn.Layer):
    def __init__(self, scale, in_channels=3, pretrained=None):
        super(GhostNet, self).__init__()
        self.cfgs = [
            # k, t, c, SE, s
            [3, 16, 16, 0, 1],
            [3, 48, 24, 0, 2],
            [3, 72, 24, 0, 1],  # x4
            [5, 72, 40, 1, 2],
            [5, 120, 40, 1, 1],  # x8
            [3, 240, 80, 0, 2],
            [3, 200, 80, 0, 1],
            [3, 184, 80, 0, 1],
            [3, 184, 80, 0, 1],
            [3, 480, 112, 1, 1],
            [3, 672, 112, 1, 1],  # x16
            [5, 672, 160, 1, 2],
            [5, 960, 160, 0, 1],
            [5, 960, 160, 1, 1],
            [5, 960, 160, 0, 1],
            [5, 960, 160, 1, 1]  # x32
        ]
        self.scale = scale
        self.pretrained = pretrained
        output_channels = int(self._make_divisible(16 * self.scale, 4))
        self.conv1 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=output_channels,
            kernel_size=3,
            stride=2,
            groups=1,
            act="relu",
            name="conv1")
        # build inverted residual blocks
        self.out_index = [2, 4, 10, 15]
        self.feat_channels = []
        self.ghost_bottleneck_list = []
        for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
            in_channels = output_channels
            output_channels = int(self._make_divisible(c * self.scale, 4))
            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
            ghost_bottleneck = self.add_sublayer(
                name="_ghostbottleneck_" + str(idx),
                sublayer=GhostBottleneck(
                    in_channels=in_channels,
                    hidden_dim=hidden_dim,
                    output_channels=output_channels,
                    kernel_size=k,
                    stride=s,
                    use_se=use_se,
                    name="_ghostbottleneck_" + str(idx)))
            self.ghost_bottleneck_list.append(ghost_bottleneck)
            if idx in self.out_index:
                self.feat_channels.append(output_channels)
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def forward(self, inputs):
        feat_list = []
        x = self.conv1(inputs)
        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
            x = ghost_bottleneck(x)
            if idx in self.out_index:
                feat_list.append(x)
        return feat_list
    def _make_divisible(self, v, divisor, min_value=None):
        """
        This function is taken from the original tf repo.
        It ensures that all layers have a channel number that is divisible by 8
        It can be seen here:
        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
        """
        if min_value is None:
            min_value = divisor
        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v
@manager.BACKBONES.add_component
 def GhostNet_x0_5(**kwargs):
    model = GhostNet(scale=0.5, **kwargs)
    return model
@manager.BACKBONES.add_component
 def GhostNet_x1_0(**kwargs):
    model = GhostNet(scale=1.0, **kwargs)
    return model
@manager.BACKBONES.add_component
 def GhostNet_x1_3(**kwargs):
    model = GhostNet(scale=1.3, **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/hrnet.py
+++ b/paddlers/models/ppseg/models/backbones/hrnet.py
@ -37,6 +37,7 @@ class HRNet(nn.Layer):
    (https://arxiv.org/pdf/1908.07919.pdf).
    Args:
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path of pretrained model.
        stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
        stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
@ -56,6 +57,7 @@ class HRNet(nn.Layer):
    """
    def __init__(self,
                 in_channels=3,
                 pretrained=None,
                 stage1_num_modules=1,
                 stage1_num_blocks=(4, ),
@ -91,7 +93,7 @@ class HRNet(nn.Layer):
        self.feat_channels = [sum(stage4_num_channels)]
        self.conv_layer1_1 = layers.ConvBNReLU(
-            in_channels=3,
+            in_channels=in_channels,
            out_channels=64,
            kernel_size=3,
            stride=2,
--- a/paddlers/models/ppseg/models/backbones/lite_hrnet.py
+++ b/paddlers/models/ppseg/models/backbones/lite_hrnet.py
@ -0,0 +1,974 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 This code is based on
 https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
 """
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from numbers import Integral
 from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import Normal, Constant
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg import utils
 __all__ = [
    "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
    "Lite_HRNet_wider_naive", "LiteHRNet"
 ]
 def Conv2d(in_channels,
           out_channels,
           kernel_size,
           stride=1,
           padding=0,
           dilation=1,
           groups=1,
           bias=True,
           weight_init=Normal(std=0.001),
           bias_init=Constant(0.)):
    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
    if bias:
        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
    else:
        bias_attr = False
    conv = nn.Conv2D(
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding,
        dilation,
        groups,
        weight_attr=weight_attr,
        bias_attr=bias_attr)
    return conv
 def channel_shuffle(x, groups):
    x_shape = paddle.shape(x)
    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
    num_channels = x.shape[1]
    channels_per_group = num_channels // groups
    x = paddle.reshape(
        x=x, shape=[batch_size, groups, channels_per_group, height, width])
    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
    return x
 class ConvNormLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 groups=1,
                 norm_type=None,
                 norm_groups=32,
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None):
        super(ConvNormLayer, self).__init__()
        self.act = act
        norm_lr = 0. if freeze_norm else 1.
        if norm_type is not None:
            assert norm_type in ['bn', 'sync_bn', 'gn'], \
                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
            param_attr = ParamAttr(
                initializer=Constant(1.0),
                learning_rate=norm_lr,
                regularizer=L2Decay(norm_decay), )
            bias_attr = ParamAttr(
                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
            global_stats = True if freeze_norm else None
            if norm_type in ['bn', 'sync_bn']:
                self.norm = nn.BatchNorm2D(
                    ch_out,
                    weight_attr=param_attr,
                    bias_attr=bias_attr,
                    use_global_stats=global_stats, )
            elif norm_type == 'gn':
                self.norm = nn.GroupNorm(
                    num_groups=norm_groups,
                    num_channels=ch_out,
                    weight_attr=param_attr,
                    bias_attr=bias_attr)
            norm_params = self.norm.parameters()
            if freeze_norm:
                for param in norm_params:
                    param.stop_gradient = True
            conv_bias_attr = False
        else:
            conv_bias_attr = True
            self.norm = None
        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=groups,
            weight_attr=ParamAttr(initializer=Normal(
                mean=0., std=0.001)),
            bias_attr=conv_bias_attr)
    def forward(self, inputs):
        out = self.conv(inputs)
        if self.norm is not None:
            out = self.norm(out)
        if self.act == 'relu':
            out = F.relu(out)
        elif self.act == 'sigmoid':
            out = F.sigmoid(out)
        return out
 class DepthWiseSeparableConvNormLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 dw_norm_type=None,
                 pw_norm_type=None,
                 norm_decay=0.,
                 freeze_norm=False,
                 dw_act=None,
                 pw_act=None):
        super(DepthWiseSeparableConvNormLayer, self).__init__()
        self.depthwise_conv = ConvNormLayer(
            ch_in=ch_in,
            ch_out=ch_in,
            filter_size=filter_size,
            stride=stride,
            groups=ch_in,
            norm_type=dw_norm_type,
            act=dw_act,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm, )
        self.pointwise_conv = ConvNormLayer(
            ch_in=ch_in,
            ch_out=ch_out,
            filter_size=1,
            stride=1,
            norm_type=pw_norm_type,
            act=pw_act,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm, )
    def forward(self, x):
        x = self.depthwise_conv(x)
        x = self.pointwise_conv(x)
        return x
 class CrossResolutionWeightingModule(nn.Layer):
    def __init__(self,
                 channels,
                 ratio=16,
                 norm_type='bn',
                 freeze_norm=False,
                 norm_decay=0.):
        super(CrossResolutionWeightingModule, self).__init__()
        self.channels = channels
        total_channel = sum(channels)
        self.conv1 = ConvNormLayer(
            ch_in=total_channel,
            ch_out=total_channel // ratio,
            filter_size=1,
            stride=1,
            norm_type=norm_type,
            act='relu',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
        self.conv2 = ConvNormLayer(
            ch_in=total_channel // ratio,
            ch_out=total_channel,
            filter_size=1,
            stride=1,
            norm_type=norm_type,
            act='sigmoid',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
    def forward(self, x):
        out = []
        for idx, xi in enumerate(x[:-1]):
            kernel_size = stride = pow(2, len(x) - idx - 1)
            xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
            out.append(xi)
        out.append(x[-1])
        out = paddle.concat(out, 1)
        out = self.conv1(out)
        out = self.conv2(out)
        out = paddle.split(out, self.channels, 1)
        out = [
            s * F.interpolate(
                a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
        ]
        return out
 class SpatialWeightingModule(nn.Layer):
    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
        super(SpatialWeightingModule, self).__init__()
        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
        self.conv1 = ConvNormLayer(
            ch_in=in_channel,
            ch_out=in_channel // ratio,
            filter_size=1,
            stride=1,
            act='relu',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
        self.conv2 = ConvNormLayer(
            ch_in=in_channel // ratio,
            ch_out=in_channel,
            filter_size=1,
            stride=1,
            act='sigmoid',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
    def forward(self, x):
        out = self.global_avgpooling(x)
        out = self.conv1(out)
        out = self.conv2(out)
        return x * out
 class ConditionalChannelWeightingBlock(nn.Layer):
    def __init__(self,
                 in_channels,
                 stride,
                 reduce_ratio,
                 norm_type='bn',
                 freeze_norm=False,
                 norm_decay=0.):
        super(ConditionalChannelWeightingBlock, self).__init__()
        assert stride in [1, 2]
        branch_channels = [channel // 2 for channel in in_channels]
        self.cross_resolution_weighting = CrossResolutionWeightingModule(
            branch_channels,
            ratio=reduce_ratio,
            norm_type=norm_type,
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
        self.depthwise_convs = nn.LayerList([
            ConvNormLayer(
                channel,
                channel,
                filter_size=3,
                stride=stride,
                groups=channel,
                norm_type=norm_type,
                freeze_norm=freeze_norm,
                norm_decay=norm_decay) for channel in branch_channels
        ])
        self.spatial_weighting = nn.LayerList([
            SpatialWeightingModule(
                channel,
                ratio=4,
                freeze_norm=freeze_norm,
                norm_decay=norm_decay) for channel in branch_channels
        ])
    def forward(self, x):
        x = [s.chunk(2, axis=1) for s in x]
        x1 = [s[0] for s in x]
        x2 = [s[1] for s in x]
        x2 = self.cross_resolution_weighting(x2)
        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
        out = [channel_shuffle(s, groups=2) for s in out]
        return out
 class ShuffleUnit(nn.Layer):
    def __init__(self,
                 in_channel,
                 out_channel,
                 stride,
                 norm_type='bn',
                 freeze_norm=False,
                 norm_decay=0.):
        super(ShuffleUnit, self).__init__()
        branch_channel = out_channel // 2
        self.stride = stride
        if self.stride == 1:
            assert in_channel == branch_channel * 2, \
                "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
        if stride > 1:
            self.branch1 = nn.Sequential(
                ConvNormLayer(
                    ch_in=in_channel,
                    ch_out=in_channel,
                    filter_size=3,
                    stride=self.stride,
                    groups=in_channel,
                    norm_type=norm_type,
                    freeze_norm=freeze_norm,
                    norm_decay=norm_decay),
                ConvNormLayer(
                    ch_in=in_channel,
                    ch_out=branch_channel,
                    filter_size=1,
                    stride=1,
                    norm_type=norm_type,
                    act='relu',
                    freeze_norm=freeze_norm,
                    norm_decay=norm_decay), )
        self.branch2 = nn.Sequential(
            ConvNormLayer(
                ch_in=branch_channel if stride == 1 else in_channel,
                ch_out=branch_channel,
                filter_size=1,
                stride=1,
                norm_type=norm_type,
                act='relu',
                freeze_norm=freeze_norm,
                norm_decay=norm_decay),
            ConvNormLayer(
                ch_in=branch_channel,
                ch_out=branch_channel,
                filter_size=3,
                stride=self.stride,
                groups=branch_channel,
                norm_type=norm_type,
                freeze_norm=freeze_norm,
                norm_decay=norm_decay),
            ConvNormLayer(
                ch_in=branch_channel,
                ch_out=branch_channel,
                filter_size=1,
                stride=1,
                norm_type=norm_type,
                act='relu',
                freeze_norm=freeze_norm,
                norm_decay=norm_decay), )
    def forward(self, x):
        if self.stride > 1:
            x1 = self.branch1(x)
            x2 = self.branch2(x)
        else:
            x1, x2 = x.chunk(2, axis=1)
            x2 = self.branch2(x2)
        out = paddle.concat([x1, x2], axis=1)
        out = channel_shuffle(out, groups=2)
        return out
 class IterativeHead(nn.Layer):
    def __init__(self,
                 in_channels,
                 norm_type='bn',
                 freeze_norm=False,
                 norm_decay=0.):
        super(IterativeHead, self).__init__()
        num_branches = len(in_channels)
        self.in_channels = in_channels[::-1]
        projects = []
        for i in range(num_branches):
            if i != num_branches - 1:
                projects.append(
                    DepthWiseSeparableConvNormLayer(
                        ch_in=self.in_channels[i],
                        ch_out=self.in_channels[i + 1],
                        filter_size=3,
                        stride=1,
                        dw_act=None,
                        pw_act='relu',
                        dw_norm_type=norm_type,
                        pw_norm_type=norm_type,
                        freeze_norm=freeze_norm,
                        norm_decay=norm_decay))
            else:
                projects.append(
                    DepthWiseSeparableConvNormLayer(
                        ch_in=self.in_channels[i],
                        ch_out=self.in_channels[i],
                        filter_size=3,
                        stride=1,
                        dw_act=None,
                        pw_act='relu',
                        dw_norm_type=norm_type,
                        pw_norm_type=norm_type,
                        freeze_norm=freeze_norm,
                        norm_decay=norm_decay))
        self.projects = nn.LayerList(projects)
    def forward(self, x):
        x = x[::-1]
        y = []
        last_x = None
        for i, s in enumerate(x):
            if last_x is not None:
                last_x = F.interpolate(
                    last_x,
                    size=paddle.shape(s)[-2:],
                    mode='bilinear',
                    align_corners=True)
                s = s + last_x
            s = self.projects[i](s)
            y.append(s)
            last_x = s
        return y[::-1]
 class Stem(nn.Layer):
    def __init__(self,
                 in_channel,
                 stem_channel,
                 out_channel,
                 expand_ratio,
                 norm_type='bn',
                 freeze_norm=False,
                 norm_decay=0.):
        super(Stem, self).__init__()
        self.conv1 = ConvNormLayer(
            in_channel,
            stem_channel,
            filter_size=3,
            stride=2,
            norm_type=norm_type,
            act='relu',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
        mid_channel = int(round(stem_channel * expand_ratio))
        branch_channel = stem_channel // 2
        if stem_channel == out_channel:
            inc_channel = out_channel - branch_channel
        else:
            inc_channel = out_channel - stem_channel
        self.branch1 = nn.Sequential(
            ConvNormLayer(
                ch_in=branch_channel,
                ch_out=branch_channel,
                filter_size=3,
                stride=2,
                groups=branch_channel,
                norm_type=norm_type,
                freeze_norm=freeze_norm,
                norm_decay=norm_decay),
            ConvNormLayer(
                ch_in=branch_channel,
                ch_out=inc_channel,
                filter_size=1,
                stride=1,
                norm_type=norm_type,
                act='relu',
                freeze_norm=freeze_norm,
                norm_decay=norm_decay), )
        self.expand_conv = ConvNormLayer(
            ch_in=branch_channel,
            ch_out=mid_channel,
            filter_size=1,
            stride=1,
            norm_type=norm_type,
            act='relu',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
        self.depthwise_conv = ConvNormLayer(
            ch_in=mid_channel,
            ch_out=mid_channel,
            filter_size=3,
            stride=2,
            groups=mid_channel,
            norm_type=norm_type,
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
        self.linear_conv = ConvNormLayer(
            ch_in=mid_channel,
            ch_out=branch_channel
            if stem_channel == out_channel else stem_channel,
            filter_size=1,
            stride=1,
            norm_type=norm_type,
            act='relu',
            freeze_norm=freeze_norm,
            norm_decay=norm_decay)
    def forward(self, x):
        x = self.conv1(x)
        x1, x2 = x.chunk(2, axis=1)
        x1 = self.branch1(x1)
        x2 = self.expand_conv(x2)
        x2 = self.depthwise_conv(x2)
        x2 = self.linear_conv(x2)
        out = paddle.concat([x1, x2], axis=1)
        out = channel_shuffle(out, groups=2)
        return out
 class LiteHRNetModule(nn.Layer):
    def __init__(self,
                 num_branches,
                 num_blocks,
                 in_channels,
                 reduce_ratio,
                 module_type,
                 multiscale_output=False,
                 with_fuse=True,
                 norm_type='bn',
                 freeze_norm=False,
                 norm_decay=0.):
        super(LiteHRNetModule, self).__init__()
        assert num_branches == len(in_channels),\
            "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
        assert module_type in [
            'LITE', 'NAIVE'
        ], "module_type should be one of ['LITE', 'NAIVE']"
        self.num_branches = num_branches
        self.in_channels = in_channels
        self.multiscale_output = multiscale_output
        self.with_fuse = with_fuse
        self.norm_type = 'bn'
        self.module_type = module_type
        if self.module_type == 'LITE':
            self.layers = self._make_weighting_blocks(
                num_blocks,
                reduce_ratio,
                freeze_norm=freeze_norm,
                norm_decay=norm_decay)
        elif self.module_type == 'NAIVE':
            self.layers = self._make_naive_branches(
                num_branches,
                num_blocks,
                freeze_norm=freeze_norm,
                norm_decay=norm_decay)
        if self.with_fuse:
            self.fuse_layers = self._make_fuse_layers(
                freeze_norm=freeze_norm, norm_decay=norm_decay)
            self.relu = nn.ReLU()
    def _make_weighting_blocks(self,
                               num_blocks,
                               reduce_ratio,
                               stride=1,
                               freeze_norm=False,
                               norm_decay=0.):
        layers = []
        for i in range(num_blocks):
            layers.append(
                ConditionalChannelWeightingBlock(
                    self.in_channels,
                    stride=stride,
                    reduce_ratio=reduce_ratio,
                    norm_type=self.norm_type,
                    freeze_norm=freeze_norm,
                    norm_decay=norm_decay))
        return nn.Sequential(*layers)
    def _make_naive_branches(self,
                             num_branches,
                             num_blocks,
                             freeze_norm=False,
                             norm_decay=0.):
        branches = []
        for branch_idx in range(num_branches):
            layers = []
            for i in range(num_blocks):
                layers.append(
                    ShuffleUnit(
                        self.in_channels[branch_idx],
                        self.in_channels[branch_idx],
                        stride=1,
                        norm_type=self.norm_type,
                        freeze_norm=freeze_norm,
                        norm_decay=norm_decay))
            branches.append(nn.Sequential(*layers))
        return nn.LayerList(branches)
    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
        if self.num_branches == 1:
            return None
        fuse_layers = []
        num_out_branches = self.num_branches if self.multiscale_output else 1
        for i in range(num_out_branches):
            fuse_layer = []
            for j in range(self.num_branches):
                if j > i:
                    fuse_layer.append(
                        nn.Sequential(
                            Conv2d(
                                self.in_channels[j],
                                self.in_channels[i],
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False, ),
                            nn.BatchNorm2D(self.in_channels[i]),
                            nn.Upsample(
                                scale_factor=2**(j - i), mode='nearest')))
                elif j == i:
                    fuse_layer.append(None)
                else:
                    conv_downsamples = []
                    for k in range(i - j):
                        if k == i - j - 1:
                            conv_downsamples.append(
                                nn.Sequential(
                                    Conv2d(
                                        self.in_channels[j],
                                        self.in_channels[j],
                                        kernel_size=3,
                                        stride=2,
                                        padding=1,
                                        groups=self.in_channels[j],
                                        bias=False, ),
                                    nn.BatchNorm2D(self.in_channels[j]),
                                    Conv2d(
                                        self.in_channels[j],
                                        self.in_channels[i],
                                        kernel_size=1,
                                        stride=1,
                                        padding=0,
                                        bias=False, ),
                                    nn.BatchNorm2D(self.in_channels[i])))
                        else:
                            conv_downsamples.append(
                                nn.Sequential(
                                    Conv2d(
                                        self.in_channels[j],
                                        self.in_channels[j],
                                        kernel_size=3,
                                        stride=2,
                                        padding=1,
                                        groups=self.in_channels[j],
                                        bias=False, ),
                                    nn.BatchNorm2D(self.in_channels[j]),
                                    Conv2d(
                                        self.in_channels[j],
                                        self.in_channels[j],
                                        kernel_size=1,
                                        stride=1,
                                        padding=0,
                                        bias=False, ),
                                    nn.BatchNorm2D(self.in_channels[j]),
                                    nn.ReLU()))
                    fuse_layer.append(nn.Sequential(*conv_downsamples))
            fuse_layers.append(nn.LayerList(fuse_layer))
        return nn.LayerList(fuse_layers)
    def forward(self, x):
        if self.num_branches == 1:
            return [self.layers[0](x[0])]
        if self.module_type == 'LITE':
            out = self.layers(x)
        elif self.module_type == 'NAIVE':
            for i in range(self.num_branches):
                x[i] = self.layers[i](x[i])
            out = x
        if self.with_fuse:
            out_fuse = []
            for i in range(len(self.fuse_layers)):
                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
                for j in range(self.num_branches):
                    if j == 0:
                        y += y
                    elif i == j:
                        y += out[j]
                    else:
                        y += self.fuse_layers[i][j](out[j])
                    if i == 0:
                        out[i] = y
                out_fuse.append(self.relu(y))
            out = out_fuse
        elif not self.multiscale_output:
            out = [out[0]]
        return out
 class LiteHRNet(nn.Layer):
    """
    @inproceedings{Yulitehrnet21,
    title={Lite-HRNet: A Lightweight High-Resolution Network},
        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
        booktitle={CVPR},year={2021}
    }
    Args:
        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
            "wider_naive": Naive network with wider channels in each block.
            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
        in_channels (int, optional): The channels of input image. Default: 3.
        freeze_at (int): the stage to freeze
        freeze_norm (bool): whether to freeze norm in HRNet
        norm_decay (float): weight decay for normalization layer weights
        return_idx (List): the stage to return
    """
    def __init__(self,
                 network_type,
                 in_channels=3,
                 freeze_at=0,
                 freeze_norm=True,
                 norm_decay=0.,
                 return_idx=[0, 1, 2, 3],
                 use_head=False,
                 pretrained=None):
        super(LiteHRNet, self).__init__()
        if isinstance(return_idx, Integral):
            return_idx = [return_idx]
        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
        assert len(return_idx) > 0, "need one or more return index"
        self.freeze_at = freeze_at
        self.freeze_norm = freeze_norm
        self.norm_decay = norm_decay
        self.return_idx = return_idx
        self.norm_type = 'bn'
        self.use_head = use_head
        self.pretrained = pretrained
        self.module_configs = {
            "lite_18": {
                "num_modules": [2, 4, 2],
                "num_branches": [2, 3, 4],
                "num_blocks": [2, 2, 2],
                "module_type": ["LITE", "LITE", "LITE"],
                "reduce_ratios": [8, 8, 8],
                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
            },
            "lite_30": {
                "num_modules": [3, 8, 3],
                "num_branches": [2, 3, 4],
                "num_blocks": [2, 2, 2],
                "module_type": ["LITE", "LITE", "LITE"],
                "reduce_ratios": [8, 8, 8],
                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
            },
            "naive": {
                "num_modules": [2, 4, 2],
                "num_branches": [2, 3, 4],
                "num_blocks": [2, 2, 2],
                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
                "reduce_ratios": [1, 1, 1],
                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
            },
            "wider_naive": {
                "num_modules": [2, 4, 2],
                "num_branches": [2, 3, 4],
                "num_blocks": [2, 2, 2],
                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
                "reduce_ratios": [1, 1, 1],
                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
            },
        }
        self.stages_config = self.module_configs[network_type]
        self.stem = Stem(in_channels, 32, 32, 1)
        num_channels_pre_layer = [32]
        for stage_idx in range(3):
            num_channels = self.stages_config["num_channels"][stage_idx]
            setattr(self, 'transition{}'.format(stage_idx),
                    self._make_transition_layer(num_channels_pre_layer,
                                                num_channels, self.freeze_norm,
                                                self.norm_decay))
            stage, num_channels_pre_layer = self._make_stage(
                self.stages_config, stage_idx, num_channels, True,
                self.freeze_norm, self.norm_decay)
            setattr(self, 'stage{}'.format(stage_idx), stage)
        num_channels = self.stages_config["num_channels"][-1]
        self.feat_channels = num_channels
        if self.use_head:
            self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
                                            self.freeze_norm, self.norm_decay)
            self.feat_channels = [num_channels[0]]
            for i in range(1, len(num_channels)):
                self.feat_channels.append(num_channels[i] // 2)
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def _make_transition_layer(self,
                               num_channels_pre_layer,
                               num_channels_cur_layer,
                               freeze_norm=False,
                               norm_decay=0.):
        num_branches_pre = len(num_channels_pre_layer)
        num_branches_cur = len(num_channels_cur_layer)
        transition_layers = []
        for i in range(num_branches_cur):
            if i < num_branches_pre:
                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
                    transition_layers.append(
                        nn.Sequential(
                            Conv2d(
                                num_channels_pre_layer[i],
                                num_channels_pre_layer[i],
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                groups=num_channels_pre_layer[i],
                                bias=False),
                            nn.BatchNorm2D(num_channels_pre_layer[i]),
                            Conv2d(
                                num_channels_pre_layer[i],
                                num_channels_cur_layer[i],
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False, ),
                            nn.BatchNorm2D(num_channels_cur_layer[i]),
                            nn.ReLU()))
                else:
                    transition_layers.append(None)
            else:
                conv_downsamples = []
                for j in range(i + 1 - num_branches_pre):
                    conv_downsamples.append(
                        nn.Sequential(
                            Conv2d(
                                num_channels_pre_layer[-1],
                                num_channels_pre_layer[-1],
                                groups=num_channels_pre_layer[-1],
                                kernel_size=3,
                                stride=2,
                                padding=1,
                                bias=False, ),
                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
                            Conv2d(
                                num_channels_pre_layer[-1],
                                num_channels_cur_layer[i]
                                if j == i - num_branches_pre else
                                num_channels_pre_layer[-1],
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False, ),
                            nn.BatchNorm2D(num_channels_cur_layer[i]
                                           if j == i - num_branches_pre else
                                           num_channels_pre_layer[-1]),
                            nn.ReLU()))
                transition_layers.append(nn.Sequential(*conv_downsamples))
        return nn.LayerList(transition_layers)
    def _make_stage(self,
                    stages_config,
                    stage_idx,
                    in_channels,
                    multiscale_output,
                    freeze_norm=False,
                    norm_decay=0.):
        num_modules = stages_config["num_modules"][stage_idx]
        num_branches = stages_config["num_branches"][stage_idx]
        num_blocks = stages_config["num_blocks"][stage_idx]
        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
        module_type = stages_config['module_type'][stage_idx]
        modules = []
        for i in range(num_modules):
            if not multiscale_output and i == num_modules - 1:
                reset_multiscale_output = False
            else:
                reset_multiscale_output = True
            modules.append(
                LiteHRNetModule(
                    num_branches,
                    num_blocks,
                    in_channels,
                    reduce_ratio,
                    module_type,
                    multiscale_output=reset_multiscale_output,
                    with_fuse=True,
                    freeze_norm=freeze_norm,
                    norm_decay=norm_decay))
            in_channels = modules[-1].in_channels
        return nn.Sequential(*modules), in_channels
    def forward(self, x):
        x = self.stem(x)
        y_list = [x]
        for stage_idx in range(3):
            x_list = []
            transition = getattr(self, 'transition{}'.format(stage_idx))
            for j in range(self.stages_config["num_branches"][stage_idx]):
                if transition[j] is not None:
                    if j >= len(y_list):
                        x_list.append(transition[j](y_list[-1]))
                    else:
                        x_list.append(transition[j](y_list[j]))
                else:
                    x_list.append(y_list[j])
            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
        if self.use_head:
            y_list = self.head_layer(y_list)
        res = []
        for i, layer in enumerate(y_list):
            if i == self.freeze_at:
                layer.stop_gradient = True
            if i in self.return_idx:
                res.append(layer)
        return res
@manager.BACKBONES.add_component
 def Lite_HRNet_18(**kwargs):
    model = LiteHRNet(network_type="lite_18", **kwargs)
    return model
@manager.BACKBONES.add_component
 def Lite_HRNet_30(**kwargs):
    model = LiteHRNet(network_type="lite_30", **kwargs)
    return model
@manager.BACKBONES.add_component
 def Lite_HRNet_naive(**kwargs):
    model = LiteHRNet(network_type="naive", **kwargs)
    return model
@manager.BACKBONES.add_component
 def Lite_HRNet_wider_naive(**kwargs):
    model = LiteHRNet(network_type="wider_naive", **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/mix_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/mix_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
    def __init__(self,
                 img_size=224,
                 patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                 num_classes=1000,
                 embed_dims=[64, 128, 256, 512],
                 num_heads=[1, 2, 4, 8],
@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
            img_size=img_size,
            patch_size=7,
            stride=4,
-            in_chans=in_chans,
+            in_chans=in_channels,
            embed_dim=embed_dims[0])
        self.patch_embed2 = OverlapPatchEmbed(
            img_size=img_size // 4,
--- a/paddlers/models/ppseg/models/backbones/mobilenetv2.py
+++ b/paddlers/models/ppseg/models/backbones/mobilenetv2.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -12,13 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
 from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg import utils
 __all__ = [
    "MobileNetV2_x0_25",
    "MobileNetV2_x0_5",
    "MobileNetV2_x0_75",
    "MobileNetV2_x1_0",
    "MobileNetV2_x1_5",
    "MobileNetV2_x2_0",
 ]
@manager.BACKBONES.add_component
 class MobileNetV2(nn.Layer):
    """
        The MobileNetV2 implementation based on PaddlePaddle.
@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
        (https://arxiv.org/abs/1801.04381).
        Args:
-            channel_ratio (float, optional): The ratio of channel. Default: 1.0
+            scale (float, optional): The scale of channel. Default: 1.0
-            min_channel (int, optional): The minimum of channel. Default: 16
+            in_channels (int, optional): The channels of input image. Default: 3.
            pretrained (str, optional): The path or url of pretrained model. Default: None
        """
-    def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
+    def __init__(self, scale=1.0, in_channels=3, pretrained=None):
-        super(MobileNetV2, self).__init__()
+        super().__init__()
-        self.channel_ratio = channel_ratio
+        self.scale = scale
        self.min_channel = min_channel
        self.pretrained = pretrained
        prefix_name = ""
-        self.stage0 = conv_bn(3, self.depth(32), 3, 2)
+        bottleneck_params_list = [
-
+            (1, 16, 1, 1),
-        self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
+            (6, 24, 2, 2),  # x4
-
+            (6, 32, 3, 2),  # x8
-        self.stage2 = nn.Sequential(
+            (6, 64, 4, 2),
-            InvertedResidual(self.depth(16), self.depth(24), 2, 6),
+            (6, 96, 3, 1),  # x16
-            InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
+            (6, 160, 3, 2),
-
+            (6, 320, 1, 1),  # x32
-        self.stage3 = nn.Sequential(
+        ]
-            InvertedResidual(self.depth(24), self.depth(32), 2, 6),
+        self.out_index = [1, 2, 4, 6]
            InvertedResidual(self.depth(32), self.depth(32), 1, 6),
            InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
-        self.stage4 = nn.Sequential(
+        self.conv1 = ConvBNLayer(
-            InvertedResidual(self.depth(32), self.depth(64), 2, 6),
+            num_channels=in_channels,
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
+            num_filters=int(32 * scale),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
+            filter_size=3,
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
+            stride=2,
            padding=1,
            name=prefix_name + "conv1_1")
-        self.stage5 = nn.Sequential(
+        self.block_list = []
-            InvertedResidual(self.depth(64), self.depth(96), 1, 6),
+        i = 1
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6),
+        in_c = int(32 * scale)
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
+        for layer_setting in bottleneck_params_list:
            t, c, n, s = layer_setting
            i += 1
            block = self.add_sublayer(
                prefix_name + "conv" + str(i),
                sublayer=InvresiBlocks(
                    in_c=in_c,
                    t=t,
                    c=int(c * scale),
                    n=n,
                    s=s,
                    name=prefix_name + "conv" + str(i)))
            self.block_list.append(block)
            in_c = int(c * scale)
-        self.stage6 = nn.Sequential(
+        out_channels = [
-            InvertedResidual(self.depth(96), self.depth(160), 2, 6),
+            bottleneck_params_list[idx][1] for idx in self.out_index
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6),
+        ]
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
+        self.feat_channels = [int(c * scale) for c in out_channels]
        self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
        self.init_weight()
-    def depth(self, channels):
+    def forward(self, inputs):
        min_channel = min(channels, self.min_channel)
        return max(min_channel, int(channels * self.channel_ratio))
    def forward(self, x):
        feat_list = []
-        feature_1_2 = self.stage0(x)
+        y = self.conv1(inputs, if_act=True)
-        feature_1_2 = self.stage1(feature_1_2)
+        for idx, block in enumerate(self.block_list):
-        feature_1_4 = self.stage2(feature_1_2)
+            y = block(y)
-        feature_1_8 = self.stage3(feature_1_4)
+            if idx in self.out_index:
-        feature_1_16 = self.stage4(feature_1_8)
+                feat_list.append(y)
-        feature_1_16 = self.stage5(feature_1_16)
+
        feature_1_32 = self.stage6(feature_1_16)
        feature_1_32 = self.stage7(feature_1_32)
        feat_list.append(feature_1_4)
        feat_list.append(feature_1_8)
        feat_list.append(feature_1_16)
        feat_list.append(feature_1_32)
        return feat_list
    def init_weight(self):
@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
            utils.load_entire_model(self, self.pretrained)
-def conv_bn(inp, oup, kernel, stride):
+class ConvBNLayer(nn.Layer):
-    return nn.Sequential(
+    def __init__(self,
-        nn.Conv2D(
+                 num_channels,
-            in_channels=inp,
+                 filter_size,
-            out_channels=oup,
+                 num_filters,
-            kernel_size=kernel,
+                 stride,
                 padding,
                 channels=None,
                 num_groups=1,
                 name=None,
                 use_cudnn=True):
        super(ConvBNLayer, self).__init__()
        self._conv = Conv2D(
            in_channels=num_channels,
            out_channels=num_filters,
            kernel_size=filter_size,
            stride=stride,
            padding=padding,
            groups=num_groups,
            weight_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        self._batch_norm = BatchNorm(
            num_filters,
            param_attr=ParamAttr(name=name + "_bn_scale"),
            bias_attr=ParamAttr(name=name + "_bn_offset"),
            moving_mean_name=name + "_bn_mean",
            moving_variance_name=name + "_bn_variance")
    def forward(self, inputs, if_act=True):
        y = self._conv(inputs)
        y = self._batch_norm(y)
        if if_act:
            y = F.relu6(y)
        return y
 class InvertedResidualUnit(nn.Layer):
    def __init__(self, num_channels, num_in_filter, num_filters, stride,
                 filter_size, padding, expansion_factor, name):
        super(InvertedResidualUnit, self).__init__()
        num_expfilter = int(round(num_in_filter * expansion_factor))
        self._expand_conv = ConvBNLayer(
            num_channels=num_channels,
            num_filters=num_expfilter,
            filter_size=1,
            stride=1,
            padding=0,
            num_groups=1,
            name=name + "_expand")
        self._bottleneck_conv = ConvBNLayer(
            num_channels=num_expfilter,
            num_filters=num_expfilter,
            filter_size=filter_size,
            stride=stride,
-            padding=(kernel - 1) // 2,
+            padding=padding,
-            bias_attr=False),
+            num_groups=num_expfilter,
-        nn.BatchNorm2D(
+            use_cudnn=False,
-            num_features=oup, epsilon=1e-05, momentum=0.1),
+            name=name + "_dwise")
-        nn.ReLU())
+
-
+        self._linear_conv = ConvBNLayer(
-
+            num_channels=num_expfilter,
-class InvertedResidual(nn.Layer):
+            num_filters=num_filters,
-    def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
+            filter_size=1,
-        super(InvertedResidual, self).__init__()
+            stride=1,
-        self.stride = stride
+            padding=0,
-        assert stride in [1, 2]
+            num_groups=1,
-        self.use_res_connect = self.stride == 1 and inp == oup
+            name=name + "_linear")
-
+
-        self.conv = nn.Sequential(
+    def forward(self, inputs, ifshortcut):
-            nn.Conv2D(
+        y = self._expand_conv(inputs, if_act=True)
-                inp,
+        y = self._bottleneck_conv(y, if_act=True)
-                inp * expand_ratio,
+        y = self._linear_conv(y, if_act=False)
-                kernel_size=1,
+        if ifshortcut:
-                stride=1,
+            y = paddle.add(inputs, y)
-                padding=0,
+        return y
-                dilation=1,
+
-                groups=1,
+
-                bias_attr=False),
+class InvresiBlocks(nn.Layer):
-            nn.BatchNorm2D(
+    def __init__(self, in_c, t, c, n, s, name):
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
+        super(InvresiBlocks, self).__init__()
-            nn.ReLU(),
+
-            nn.Conv2D(
+        self._first_block = InvertedResidualUnit(
-                inp * expand_ratio,
+            num_channels=in_c,
-                inp * expand_ratio,
+            num_in_filter=in_c,
-                kernel_size=3,
+            num_filters=c,
-                stride=stride,
+            stride=s,
-                padding=dilation,
+            filter_size=3,
-                dilation=dilation,
+            padding=1,
-                groups=inp * expand_ratio,
+            expansion_factor=t,
-                bias_attr=False),
+            name=name + "_1")
-            nn.BatchNorm2D(
+
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
+        self._block_list = []
-            nn.ReLU(),
+        for i in range(1, n):
-            nn.Conv2D(
+            block = self.add_sublayer(
-                inp * expand_ratio,
+                name + "_" + str(i + 1),
-                oup,
+                sublayer=InvertedResidualUnit(
-                kernel_size=1,
+                    num_channels=c,
-                stride=1,
+                    num_in_filter=c,
-                padding=0,
+                    num_filters=c,
-                dilation=1,
+                    stride=1,
-                groups=1,
+                    filter_size=3,
-                bias_attr=False),
+                    padding=1,
-            nn.BatchNorm2D(
+                    expansion_factor=t,
-                num_features=oup, epsilon=1e-05, momentum=0.1), )
+                    name=name + "_" + str(i + 1)))
-
+            self._block_list.append(block)
-    def forward(self, x):
+
-        if self.use_res_connect:
+    def forward(self, inputs):
-            return x + self.conv(x)
+        y = self._first_block(inputs, ifshortcut=False)
-        else:
+        for block in self._block_list:
-            return self.conv(x)
+            y = block(y, ifshortcut=True)
        return y
@manager.BACKBONES.add_component
 def MobileNetV2_x0_25(**kwargs):
    model = MobileNetV2(scale=0.25, **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV2_x0_5(**kwargs):
    model = MobileNetV2(scale=0.5, **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV2_x0_75(**kwargs):
    model = MobileNetV2(scale=0.75, **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV2_x1_0(**kwargs):
    model = MobileNetV2(scale=1.0, **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV2_x1_5(**kwargs):
    model = MobileNetV2(scale=1.5, **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV2_x2_0(**kwargs):
    model = MobileNetV2(scale=2.0, **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/mobilenetv3.py
+++ b/paddlers/models/ppseg/models/backbones/mobilenetv3.py
@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -14,10 +14,12 @@
 import paddle
 import paddle.nn as nn
-import paddle.nn.functional as F
+from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
 from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddlers.models.ppseg.utils import utils, logger
 from paddlers.models.ppseg.models import layers
 __all__ = [
@ -28,8 +30,92 @@ __all__ = [
    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
 ]
-
+MODEL_STAGES_PATTERN = {
-def make_divisible(v, divisor=8, min_value=None):
+    "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
    "MobileNetV3_large":
    ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
 }
 # "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
 # The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
 # k: kernel_size
 # exp: middle channel number in depthwise block
 # c: output channel number in depthwise block
 # se: whether to use SE block
 # act: which activation to use
 # s: stride in depthwise block
 # d: dilation rate in depthwise block
 NET_CONFIG = {
    "large": [
        # k, exp, c, se, act, s
        [3, 16, 16, False, "relu", 1],
        [3, 64, 24, False, "relu", 2],
        [3, 72, 24, False, "relu", 1],  # x4
        [5, 72, 40, True, "relu", 2],
        [5, 120, 40, True, "relu", 1],
        [5, 120, 40, True, "relu", 1],  # x8
        [3, 240, 80, False, "hardswish", 2],
        [3, 200, 80, False, "hardswish", 1],
        [3, 184, 80, False, "hardswish", 1],
        [3, 184, 80, False, "hardswish", 1],
        [3, 480, 112, True, "hardswish", 1],
        [3, 672, 112, True, "hardswish", 1],  # x16
        [5, 672, 160, True, "hardswish", 2],
        [5, 960, 160, True, "hardswish", 1],
        [5, 960, 160, True, "hardswish", 1],  # x32
    ],
    "small": [
        # k, exp, c, se, act, s
        [3, 16, 16, True, "relu", 2],
        [3, 72, 24, False, "relu", 2],
        [3, 88, 24, False, "relu", 1],
        [5, 96, 40, True, "hardswish", 2],
        [5, 240, 40, True, "hardswish", 1],
        [5, 240, 40, True, "hardswish", 1],
        [5, 120, 48, True, "hardswish", 1],
        [5, 144, 48, True, "hardswish", 1],
        [5, 288, 96, True, "hardswish", 2],
        [5, 576, 96, True, "hardswish", 1],
        [5, 576, 96, True, "hardswish", 1],
    ],
    "large_os8": [
        # k, exp, c, se, act, s, {d}
        [3, 16, 16, False, "relu", 1],
        [3, 64, 24, False, "relu", 2],
        [3, 72, 24, False, "relu", 1],  # x4
        [5, 72, 40, True, "relu", 2],
        [5, 120, 40, True, "relu", 1],
        [5, 120, 40, True, "relu", 1],  # x8
        [3, 240, 80, False, "hardswish", 1],
        [3, 200, 80, False, "hardswish", 1, 2],
        [3, 184, 80, False, "hardswish", 1, 2],
        [3, 184, 80, False, "hardswish", 1, 2],
        [3, 480, 112, True, "hardswish", 1, 2],
        [3, 672, 112, True, "hardswish", 1, 2],
        [5, 672, 160, True, "hardswish", 1, 2],
        [5, 960, 160, True, "hardswish", 1, 4],
        [5, 960, 160, True, "hardswish", 1, 4],
    ],
    "small_os8": [
        # k, exp, c, se, act, s, {d}
        [3, 16, 16, True, "relu", 2],
        [3, 72, 24, False, "relu", 2],
        [3, 88, 24, False, "relu", 1],
        [5, 96, 40, True, "hardswish", 1],
        [5, 240, 40, True, "hardswish", 1, 2],
        [5, 240, 40, True, "hardswish", 1, 2],
        [5, 120, 48, True, "hardswish", 1, 2],
        [5, 144, 48, True, "hardswish", 1, 2],
        [5, 288, 96, True, "hardswish", 1, 2],
        [5, 576, 96, True, "hardswish", 1, 4],
        [5, 576, 96, True, "hardswish", 1, 4],
    ]
 }
 OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
 def _make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
    return new_v
-class MobileNetV3(nn.Layer):
+def _create_act(act):
-    """
+    if act == "hardswish":
-    The MobileNetV3 implementation based on PaddlePaddle.
+        return nn.Hardswish()
    elif act == "relu":
        return nn.ReLU()
    elif act is None:
        return None
    else:
        raise RuntimeError(
            "The activation function is not supported: {}".format(act))
    The original article refers to Jingdong
    Andrew Howard, et, al. "Searching for MobileNetV3"
    (https://arxiv.org/pdf/1905.02244.pdf).
 class MobileNetV3(nn.Layer):
    """
    MobileNetV3
    Args:
-        pretrained (str, optional): The path of pretrained model.
+        config: list. MobileNetV3 depthwise blocks config.
-        scale (float, optional): The scale of channels . Default: 1.0.
+        in_channels (int, optional): The channels of input image. Default: 3.
-        model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
+        scale: float=1.0. The coefficient that controls the size of network parameters. 
-        output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
+    Returns:
-
+        model: nn.Layer. Specific MobileNetV3 model depends on args.
    """
    def __init__(self,
-                 pretrained=None,
+                 config,
                 stages_pattern,
                 out_index,
                 in_channels=3,
                 scale=1.0,
-                 model_name="small",
+                 pretrained=None):
-                 output_stride=None):
+        super().__init__()
        super(MobileNetV3, self).__init__()
        self.cfg = config
        self.out_index = out_index
        self.scale = scale
        self.pretrained = pretrained
        inplanes = 16
-        if model_name == "large":
+
-            self.cfg = [
+        self.conv = ConvBNLayer(
-                # k, exp, c,  se,     nl,  s,
+            in_c=in_channels,
-                [3, 16, 16, False, "relu", 1],
+            out_c=_make_divisible(inplanes * self.scale),
                [3, 64, 24, False, "relu", 2],
                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
                [5, 72, 40, True, "relu", 2],
                [5, 120, 40, True, "relu", 1],
                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
                [3, 240, 80, False, "hard_swish", 2],
                [3, 200, 80, False, "hard_swish", 1],
                [3, 184, 80, False, "hard_swish", 1],
                [3, 184, 80, False, "hard_swish", 1],
                [3, 480, 112, True, "hard_swish", 1],
                [3, 672, 112, True, "hard_swish",
                 1],  # output 3 -> out_index=11
                [5, 672, 160, True, "hard_swish", 2],
                [5, 960, 160, True, "hard_swish", 1],
                [5, 960, 160, True, "hard_swish",
                 1],  # output 3 -> out_index=14
            ]
            self.out_indices = [2, 5, 11, 14]
            self.feat_channels = [
                make_divisible(i * scale) for i in [24, 40, 112, 160]
            ]
            self.cls_ch_squeeze = 960
            self.cls_ch_expand = 1280
        elif model_name == "small":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
                [3, 72, 24, False, "relu", 2],
                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
                [5, 96, 40, True, "hard_swish", 2],
                [5, 240, 40, True, "hard_swish", 1],
                [5, 240, 40, True, "hard_swish", 1],
                [5, 120, 48, True, "hard_swish", 1],
                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
                [5, 288, 96, True, "hard_swish", 2],
                [5, 576, 96, True, "hard_swish", 1],
                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
            ]
            self.out_indices = [0, 3, 7, 10]
            self.feat_channels = [
                make_divisible(i * scale) for i in [16, 24, 48, 96]
            ]
            self.cls_ch_squeeze = 576
            self.cls_ch_expand = 1280
        else:
            raise NotImplementedError(
                "mode[{}_model] is not implemented!".format(model_name))
        ###################################################
        # modify stride and dilation based on output_stride
        self.dilation_cfg = [1] * len(self.cfg)
        self.modify_bottle_params(output_stride=output_stride)
        ###################################################
        self.conv1 = ConvBNLayer(
            in_c=3,
            out_c=make_divisible(inplanes * scale),
            filter_size=3,
            stride=2,
            padding=1,
            num_groups=1,
            if_act=True,
-            act="hard_swish")
+            act="hardswish")
-
+        self.blocks = nn.Sequential(*[
-        self.block_list = []
+            ResidualUnit(
-
+                in_c=_make_divisible(inplanes * self.scale if i == 0 else
-        inplanes = make_divisible(inplanes * scale)
+                                     self.cfg[i - 1][2] * self.scale),
-        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
+                mid_c=_make_divisible(self.scale * exp),
-            ######################################
+                out_c=_make_divisible(self.scale * c),
-            # add dilation rate
+                filter_size=k,
-            dilation_rate = self.dilation_cfg[i]
+                stride=s,
-            ######################################
+                use_se=se,
-            self.block_list.append(
+                act=act,
-                ResidualUnit(
+                dilation=td[0] if td else 1)
-                    in_c=inplanes,
+            for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
-                    mid_c=make_divisible(scale * exp),
+        ])
-                    out_c=make_divisible(scale * c),
+
-                    filter_size=k,
+        out_channels = [config[idx][2] for idx in self.out_index]
-                    stride=s,
+        self.feat_channels = [
-                    dilation=dilation_rate,
+            _make_divisible(self.scale * c) for c in out_channels
-                    use_se=se,
+        ]
-                    act=nl,
+
-                    name="conv" + str(i + 2)))
+        self.init_res(stages_pattern)
            self.add_sublayer(
                sublayer=self.block_list[-1], name="conv" + str(i + 2))
            inplanes = make_divisible(scale * c)
        self.pretrained = pretrained
        self.init_weight()
-    def modify_bottle_params(self, output_stride=None):
+    def init_weight(self):
-
+        if self.pretrained is not None:
-        if output_stride is not None and output_stride % 2 != 0:
+            utils.load_entire_model(self, self.pretrained)
-            raise ValueError("output stride must to be even number")
+
-        if output_stride is not None:
+    def init_res(self, stages_pattern, return_patterns=None,
-            stride = 2
+                 return_stages=None):
-            rate = 1
+        if return_patterns and return_stages:
-            for i, _cfg in enumerate(self.cfg):
+            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
-                stride = stride * _cfg[-1]
+            logger.warning(msg)
-                if stride > output_stride:
+            return_stages = None
-                    rate = rate * _cfg[-1]
+
-                    self.cfg[i][-1] = 1
+        if return_stages is True:
            return_patterns = stages_pattern
        # return_stages is int or bool
        if type(return_stages) is int:
            return_stages = [return_stages]
        if isinstance(return_stages, list):
            if max(return_stages) > len(stages_pattern) or min(
                    return_stages) < 0:
                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
                logger.warning(msg)
                return_stages = [
                    val for val in return_stages
                    if val >= 0 and val < len(stages_pattern)
                ]
            return_patterns = [stages_pattern[i] for i in return_stages]
-                self.dilation_cfg[i] = rate
+    def forward(self, x):
        x = self.conv(x)
    def forward(self, inputs, label=None):
        x = self.conv1(inputs)
        # A feature list saves each downsampling feature.
        feat_list = []
-        for i, block in enumerate(self.block_list):
+        for idx, block in enumerate(self.blocks):
            x = block(x)
-            if i in self.out_indices:
+            if idx in self.out_index:
                feat_list.append(x)
        return feat_list
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_pretrained_model(self, self.pretrained)
 class ConvBNLayer(nn.Layer):
    def __init__(self,
@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
                 filter_size,
                 stride,
                 padding,
                 dilation=1,
                 num_groups=1,
                 if_act=True,
-                 act=None):
+                 act=None,
-        super(ConvBNLayer, self).__init__()
+                 dilation=1):
-        self.if_act = if_act
+        super().__init__()
        self.act = act
-        self.conv = nn.Conv2D(
+        self.conv = Conv2D(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=filter_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=num_groups,
-            bias_attr=False)
+            bias_attr=False,
-        self.bn = layers.SyncBatchNorm(
+            dilation=dilation)
-            num_features=out_c,
+        self.bn = BatchNorm(
-            weight_attr=paddle.ParamAttr(
+            num_channels=out_c,
-                regularizer=paddle.regularizer.L2Decay(0.0)),
+            act=None,
-            bias_attr=paddle.ParamAttr(
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
-                regularizer=paddle.regularizer.L2Decay(0.0)))
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
-        self._act_op = layers.Activation(act='hardswish')
+        self.if_act = if_act
        self.act = _create_act(act)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if self.if_act:
-            x = self._act_op(x)
+            x = self.act(x)
        return x
@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
                 filter_size,
                 stride,
                 use_se,
                 dilation=1,
                 act=None,
-                 name=''):
+                 dilation=1):
-        super(ResidualUnit, self).__init__()
+        super().__init__()
        self.if_shortcut = stride == 1 and in_c == out_c
        self.if_se = use_se
@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
            padding=0,
            if_act=True,
            act=act)
        self.bottleneck_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=mid_c,
            filter_size=filter_size,
            stride=stride,
-            padding='same',
+            padding=int((filter_size - 1) // 2) * dilation,
            dilation=dilation,
            num_groups=mid_c,
            if_act=True,
-            act=act)
+            act=act,
            dilation=dilation)
        if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
+            self.mid_se = SEModule(mid_c)
        self.linear_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=out_c,
@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
            padding=0,
            if_act=False,
            act=None)
        self.dilation = dilation
-    def forward(self, inputs):
+    def forward(self, x):
-        x = self.expand_conv(inputs)
+        identity = x
        x = self.expand_conv(x)
        x = self.bottleneck_conv(x)
        if self.if_se:
            x = self.mid_se(x)
        x = self.linear_conv(x)
        if self.if_shortcut:
-            x = inputs + x
+            x = paddle.add(identity, x)
        return x
 # nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
 class Hardsigmoid(nn.Layer):
    def __init__(self, slope=0.2, offset=0.5):
        super().__init__()
        self.slope = slope
        self.offset = offset
    def forward(self, x):
        return nn.functional.hardsigmoid(
            x, slope=self.slope, offset=self.offset)
 class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
+    def __init__(self, channel, reduction=4):
-        super(SEModule, self).__init__()
+        super().__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        self.avg_pool = AdaptiveAvgPool2D(1)
-        self.conv1 = nn.Conv2D(
+        self.conv1 = Conv2D(
            in_channels=channel,
            out_channels=channel // reduction,
            kernel_size=1,
            stride=1,
            padding=0)
-        self.conv2 = nn.Conv2D(
+        self.relu = nn.ReLU()
        self.conv2 = Conv2D(
            in_channels=channel // reduction,
            out_channels=channel,
            kernel_size=1,
            stride=1,
            padding=0)
        self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
-    def forward(self, inputs):
+    def forward(self, x):
-        outputs = self.avg_pool(inputs)
+        identity = x
-        outputs = self.conv1(outputs)
+        x = self.avg_pool(x)
-        outputs = F.relu(outputs)
+        x = self.conv1(x)
-        outputs = self.conv2(outputs)
+        x = self.relu(x)
-        outputs = F.hardsigmoid(outputs)
+        x = self.conv2(x)
-        return paddle.multiply(x=inputs, y=outputs)
+        x = self.hardsigmoid(x)
        return paddle.multiply(x=identity, y=x)
@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_35(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["small"],
        scale=0.35,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["small"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_5(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["small"],
        scale=0.5,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["small"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_75(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["small"],
        scale=0.75,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["small"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_0(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["small"],
        scale=1.0,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["small"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_25(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["small"],
        scale=1.25,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["small"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_35(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["large"],
        scale=0.35,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["large"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_5(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["large"],
        scale=0.5,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
        out_index=OUT_INDEX["large"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_75(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["large"],
        scale=0.75,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
        out_index=OUT_INDEX["large"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_0(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["large"],
        scale=1.0,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
        out_index=OUT_INDEX["large"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_25(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    model = MobileNetV3(
        config=NET_CONFIG["large"],
        scale=1.25,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
        out_index=OUT_INDEX["large"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_0_os8(**kwargs):
    model = MobileNetV3(
        config=NET_CONFIG["large_os8"],
        scale=1.0,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
        out_index=OUT_INDEX["large"],
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_0_os8(**kwargs):
    model = MobileNetV3(
        config=NET_CONFIG["small_os8"],
        scale=1.0,
        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
        out_index=OUT_INDEX["small"],
        **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/resnet_vd.py
+++ b/paddlers/models/ppseg/models/backbones/resnet_vd.py
@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
        layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
        output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
        multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path of pretrained model.
    """
    def __init__(self,
                 input_channel=3,
                 layers=50,
                 output_stride=8,
                 multi_grid=(1, 1, 1),
                 in_channels=3,
                 pretrained=None,
                 data_format='NCHW'):
        super(ResNet_vd, self).__init__()
@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
            dilation_dict = {3: 2}
        self.conv1_1 = ConvBNLayer(
-            in_channels=input_channel,
+            in_channels=in_channels,
            out_channels=32,
            kernel_size=3,
            stride=2,
--- a/paddlers/models/ppseg/models/backbones/shufflenetv2.py
+++ b/paddlers/models/ppseg/models/backbones/shufflenetv2.py
@ -0,0 +1,315 @@
 # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 from paddle import ParamAttr, reshape, transpose, concat, split
 from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
 from paddle.nn.initializer import KaimingNormal
 from paddle.nn.functional import swish
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import utils, logger
 __all__ = [
    'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
    'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
    'ShuffleNetV2_swish'
 ]
 def channel_shuffle(x, groups):
    x_shape = paddle.shape(x)
    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
    num_channels = x.shape[1]
    channels_per_group = num_channels // groups
    # reshape
    x = reshape(
        x=x, shape=[batch_size, groups, channels_per_group, height, width])
    # transpose
    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
    # flatten
    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
    return x
 class ConvBNLayer(Layer):
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            groups=1,
            act=None,
            name=None, ):
        super(ConvBNLayer, self).__init__()
        self._conv = Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            weight_attr=ParamAttr(
                initializer=KaimingNormal(), name=name + "_weights"),
            bias_attr=False)
        self._batch_norm = BatchNorm(
            out_channels,
            param_attr=ParamAttr(name=name + "_bn_scale"),
            bias_attr=ParamAttr(name=name + "_bn_offset"),
            act=act,
            moving_mean_name=name + "_bn_mean",
            moving_variance_name=name + "_bn_variance")
    def forward(self, inputs):
        y = self._conv(inputs)
        y = self._batch_norm(y)
        return y
 class InvertedResidual(Layer):
    def __init__(self, in_channels, out_channels, stride, act="relu",
                 name=None):
        super(InvertedResidual, self).__init__()
        self._conv_pw = ConvBNLayer(
            in_channels=in_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
            act=act,
            name='stage_' + name + '_conv1')
        self._conv_dw = ConvBNLayer(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=3,
            stride=stride,
            padding=1,
            groups=out_channels // 2,
            act=None,
            name='stage_' + name + '_conv2')
        self._conv_linear = ConvBNLayer(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
            act=act,
            name='stage_' + name + '_conv3')
    def forward(self, inputs):
        x1, x2 = split(
            inputs,
            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
            axis=1)
        x2 = self._conv_pw(x2)
        x2 = self._conv_dw(x2)
        x2 = self._conv_linear(x2)
        out = concat([x1, x2], axis=1)
        return channel_shuffle(out, 2)
 class InvertedResidualDS(Layer):
    def __init__(self, in_channels, out_channels, stride, act="relu",
                 name=None):
        super(InvertedResidualDS, self).__init__()
        # branch1
        self._conv_dw_1 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            groups=in_channels,
            act=None,
            name='stage_' + name + '_conv4')
        self._conv_linear_1 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
            act=act,
            name='stage_' + name + '_conv5')
        # branch2
        self._conv_pw_2 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
            act=act,
            name='stage_' + name + '_conv1')
        self._conv_dw_2 = ConvBNLayer(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=3,
            stride=stride,
            padding=1,
            groups=out_channels // 2,
            act=None,
            name='stage_' + name + '_conv2')
        self._conv_linear_2 = ConvBNLayer(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
            act=act,
            name='stage_' + name + '_conv3')
    def forward(self, inputs):
        x1 = self._conv_dw_1(inputs)
        x1 = self._conv_linear_1(x1)
        x2 = self._conv_pw_2(inputs)
        x2 = self._conv_dw_2(x2)
        x2 = self._conv_linear_2(x2)
        out = concat([x1, x2], axis=1)
        return channel_shuffle(out, 2)
 class ShuffleNet(Layer):
    def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
        super(ShuffleNet, self).__init__()
        self.scale = scale
        self.pretrained = pretrained
        stage_repeats = [4, 8, 4]
        if scale == 0.25:
            stage_out_channels = [-1, 24, 24, 48, 96, 512]
        elif scale == 0.33:
            stage_out_channels = [-1, 24, 32, 64, 128, 512]
        elif scale == 0.5:
            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
        elif scale == 1.0:
            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
        elif scale == 1.5:
            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
        elif scale == 2.0:
            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
        else:
            raise NotImplementedError("This scale size:[" + str(scale) +
                                      "] is not implemented!")
        self.out_index = [3, 11, 15]
        self.feat_channels = stage_out_channels[1:5]
        # 1. conv1
        self._conv1 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=stage_out_channels[1],
            kernel_size=3,
            stride=2,
            padding=1,
            act=act,
            name='stage1_conv')
        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
        # 2. bottleneck sequences
        self._block_list = []
        for stage_id, num_repeat in enumerate(stage_repeats):
            for i in range(num_repeat):
                if i == 0:
                    block = self.add_sublayer(
                        name=str(stage_id + 2) + '_' + str(i + 1),
                        sublayer=InvertedResidualDS(
                            in_channels=stage_out_channels[stage_id + 1],
                            out_channels=stage_out_channels[stage_id + 2],
                            stride=2,
                            act=act,
                            name=str(stage_id + 2) + '_' + str(i + 1)))
                else:
                    block = self.add_sublayer(
                        name=str(stage_id + 2) + '_' + str(i + 1),
                        sublayer=InvertedResidual(
                            in_channels=stage_out_channels[stage_id + 2],
                            out_channels=stage_out_channels[stage_id + 2],
                            stride=1,
                            act=act,
                            name=str(stage_id + 2) + '_' + str(i + 1)))
                self._block_list.append(block)
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def forward(self, inputs):
        feat_list = []
        y = self._conv1(inputs)
        y = self._max_pool(y)
        feat_list.append(y)
        for idx, inv in enumerate(self._block_list):
            y = inv(y)
            if idx in self.out_index:
                feat_list.append(y)
        return feat_list
@manager.BACKBONES.add_component
 def ShuffleNetV2_x0_25(**kwargs):
    model = ShuffleNet(scale=0.25, **kwargs)
    return model
@manager.BACKBONES.add_component
 def ShuffleNetV2_x0_33(**kwargs):
    model = ShuffleNet(scale=0.33, **kwargs)
    return model
@manager.BACKBONES.add_component
 def ShuffleNetV2_x0_5(**kwargs):
    model = ShuffleNet(scale=0.5, **kwargs)
    return model
@manager.BACKBONES.add_component
 def ShuffleNetV2_x1_0(**kwargs):
    model = ShuffleNet(scale=1.0, **kwargs)
    return model
@manager.BACKBONES.add_component
 def ShuffleNetV2_x1_5(**kwargs):
    model = ShuffleNet(scale=1.5, **kwargs)
    return model
@manager.BACKBONES.add_component
 def ShuffleNetV2_x2_0(**kwargs):
    model = ShuffleNet(scale=2.0, **kwargs)
    return model
@manager.BACKBONES.add_component
 def ShuffleNetV2_swish(**kwargs):
    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/stdcnet.py
+++ b/paddlers/models/ppseg/models/backbones/stdcnet.py
@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
        layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
        block_num(int,optional): block_num of features block. Default: 4.
        type(str,optional): feature fusion method "cat"/"add". Default: "cat".
-        num_classes(int, optional): class number for image classification. Default: 1000.
+        relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective 
-        dropout(float,optional): dropout ratio. if >0,use dropout ratio.  Default: 0.20.
+            learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0. 
-        use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained(str, optional): the path of pretrained model.
    """
@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
                 layers=[4, 5, 3],
                 block_num=4,
                 type="cat",
-                 num_classes=1000,
+                 relative_lr=1.0,
-                 dropout=0.20,
+                 in_channels=3,
                 use_conv_last=False,
                 pretrained=None):
        super(STDCNet, self).__init__()
        if type == "cat":
            block = CatBottleneck
        elif type == "add":
            block = AddBottleneck
-        self.use_conv_last = use_conv_last
+        self.layers = layers
-        self.features = self._make_layers(base, layers, block_num, block)
+        self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
-        self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
+        self.features = self._make_layers(in_channels, base, layers, block_num,
-
+                                          block, relative_lr)
        if (layers == [4, 5, 3]):  #stdc1446
            self.x2 = nn.Sequential(self.features[:1])
            self.x4 = nn.Sequential(self.features[1:2])
            self.x8 = nn.Sequential(self.features[2:6])
            self.x16 = nn.Sequential(self.features[6:11])
            self.x32 = nn.Sequential(self.features[11:])
        elif (layers == [2, 2, 2]):  #stdc813
            self.x2 = nn.Sequential(self.features[:1])
            self.x4 = nn.Sequential(self.features[1:2])
            self.x8 = nn.Sequential(self.features[2:4])
            self.x16 = nn.Sequential(self.features[4:6])
            self.x32 = nn.Sequential(self.features[6:])
        else:
            raise NotImplementedError(
                "model with layers:{} is not implemented!".format(layers))
        self.pretrained = pretrained
        self.init_weight()
@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
        """
        forward function for feature extract.
        """
-        feat2 = self.x2(x)
+        out_feats = []
-        feat4 = self.x4(feat2)
+
-        feat8 = self.x8(feat4)
+        x = self.features[0](x)
-        feat16 = self.x16(feat8)
+        out_feats.append(x)
-        feat32 = self.x32(feat16)
+        x = self.features[1](x)
-        if self.use_conv_last:
+        out_feats.append(x)
-            feat32 = self.conv_last(feat32)
+
-        return feat2, feat4, feat8, feat16, feat32
+        idx = [[2, 2 + self.layers[0]],
-
+               [2 + self.layers[0], 2 + sum(self.layers[0:2])],
-    def _make_layers(self, base, layers, block_num, block):
+               [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
        for start_idx, end_idx in idx:
            for i in range(start_idx, end_idx):
                x = self.features[i](x)
            out_feats.append(x)
        return out_feats
    def _make_layers(self, in_channels, base, layers, block_num, block,
                     relative_lr):
        features = []
-        features += [ConvBNRelu(3, base // 2, 3, 2)]
+        features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
-        features += [ConvBNRelu(base // 2, base, 3, 2)]
+        features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]
        for i, layer in enumerate(layers):
            for j in range(layer):
                if i == 0 and j == 0:
-                    features.append(block(base, base * 4, block_num, 2))
+                    features.append(
                        block(base, base * 4, block_num, 2, relative_lr))
                elif j == 0:
                    features.append(
                        block(base * int(math.pow(2, i + 1)), base * int(
-                            math.pow(2, i + 2)), block_num, 2))
+                            math.pow(2, i + 2)), block_num, 2, relative_lr))
                else:
                    features.append(
                        block(base * int(math.pow(2, i + 2)), base * int(
-                            math.pow(2, i + 2)), block_num, 1))
+                            math.pow(2, i + 2)), block_num, 1, relative_lr))
        return nn.Sequential(*features)
@ -125,16 +119,24 @@ class STDCNet(nn.Layer):
 class ConvBNRelu(nn.Layer):
-    def __init__(self, in_planes, out_planes, kernel=3, stride=1):
+    def __init__(self,
                 in_planes,
                 out_planes,
                 kernel=3,
                 stride=1,
                 relative_lr=1.0):
        super(ConvBNRelu, self).__init__()
        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
        self.conv = nn.Conv2D(
            in_planes,
            out_planes,
            kernel_size=kernel,
            stride=stride,
            padding=kernel // 2,
            weight_attr=param_attr,
            bias_attr=False)
-        self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
+        self.bn = nn.BatchNorm2D(
            out_planes, weight_attr=param_attr, bias_attr=param_attr)
        self.relu = nn.ReLU()
    def forward(self, x):
@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):
 class AddBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
                 in_planes,
                 out_planes,
                 block_num=3,
                 stride=1,
                 relative_lr=1.0):
        super(AddBottleneck, self).__init__()
        assert block_num > 1, "block number should be larger than 1."
        self.conv_list = nn.LayerList()
        self.stride = stride
        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
        if stride == 2:
            self.avd_layer = nn.Sequential(
                nn.Conv2D(
@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
                    stride=2,
                    padding=1,
                    groups=out_planes // 2,
                    weight_attr=param_attr,
                    bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
                    out_planes // 2,
                    weight_attr=param_attr,
                    bias_attr=param_attr), )
            self.skip = nn.Sequential(
                nn.Conv2D(
                    in_planes,
@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
                    stride=2,
                    padding=1,
                    groups=in_planes,
                    weight_attr=param_attr,
                    bias_attr=False),
-                nn.BatchNorm2D(in_planes),
+                nn.BatchNorm2D(
                    in_planes, weight_attr=param_attr, bias_attr=param_attr),
                nn.Conv2D(
-                    in_planes, out_planes, kernel_size=1, bias_attr=False),
+                    in_planes,
-                nn.BatchNorm2D(out_planes), )
+                    out_planes,
                    kernel_size=1,
                    bias_attr=False,
                    weight_attr=param_attr),
                nn.BatchNorm2D(
                    out_planes, weight_attr=param_attr, bias_attr=param_attr), )
            stride = 1
        for idx in range(block_num):
            if idx == 0:
                self.conv_list.append(
                    ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
                        out_planes // 2,
                        kernel=1,
                        relative_lr=relative_lr))
            elif idx == 1 and block_num == 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
                        out_planes // 2,
                        stride=stride,
                        relative_lr=relative_lr))
            elif idx == 1 and block_num > 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
                        out_planes // 4,
                        stride=stride,
                        relative_lr=relative_lr))
            elif idx < block_num - 1:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                    ConvBNRelu(
-                               // int(math.pow(2, idx + 1))))
+                        out_planes // int(math.pow(2, idx)),
                        out_planes // int(math.pow(2, idx + 1)),
                        relative_lr=relative_lr))
            else:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)),
-                               // int(math.pow(2, idx))))
+                               out_planes // int(math.pow(2, idx))),
                    relative_lr=relative_lr)
    def forward(self, x):
        out_list = []
@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):
 class CatBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
                 in_planes,
                 out_planes,
                 block_num=3,
                 stride=1,
                 relative_lr=1.0):
        super(CatBottleneck, self).__init__()
        assert block_num > 1, "block number should be larger than 1."
        self.conv_list = nn.LayerList()
        self.stride = stride
        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
        if stride == 2:
            self.avd_layer = nn.Sequential(
                nn.Conv2D(
@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
                    stride=2,
                    padding=1,
                    groups=out_planes // 2,
                    weight_attr=param_attr,
                    bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
                    out_planes // 2,
                    weight_attr=param_attr,
                    bias_attr=param_attr), )
            self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
            stride = 1
@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
            if idx == 0:
                self.conv_list.append(
                    ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
                        out_planes // 2,
                        kernel=1,
                        relative_lr=relative_lr))
            elif idx == 1 and block_num == 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
                        out_planes // 2,
                        stride=stride,
                        relative_lr=relative_lr))
            elif idx == 1 and block_num > 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
                        out_planes // 4,
                        stride=stride,
                        relative_lr=relative_lr))
            elif idx < block_num - 1:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                    ConvBNRelu(
-                               // int(math.pow(2, idx + 1))))
+                        out_planes // int(math.pow(2, idx)),
                        out_planes // int(math.pow(2, idx + 1)),
                        relative_lr=relative_lr))
            else:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
+                    ConvBNRelu(
-                               // int(math.pow(2, idx))))
+                        out_planes // int(math.pow(2, idx)),
                        out_planes // int(math.pow(2, idx)),
                        relative_lr=relative_lr))
    def forward(self, x):
        out_list = []
--- a/paddlers/models/ppseg/models/backbones/swin_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/swin_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
    Args:
        pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
        patch_size (int | tuple(int)): Patch size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
+        in_channels (int): Number of input image channels. Default: 3.
        embed_dim (int): Number of linear projection output channels. Default: 96.
        depths (tuple[int]): Depths of each Swin Transformer stage.
        num_heads (tuple[int]): Number of attention head of each stage.
@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
    def __init__(self,
                 pretrain_img_size=224,
                 patch_size=4,
-                 in_chans=3,
+                 in_channels=3,
                 embed_dim=96,
                 depths=[2, 2, 6, 2],
                 num_heads=[3, 6, 12, 24],
@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
            embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)
--- a/paddlers/models/ppseg/models/backbones/top_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/top_transformer.py
@ -0,0 +1,716 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
 """
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg import utils
 from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath
 __all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
 def make_divisible(val, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * val:
        new_v += divisor
    return new_v
 class HSigmoid(nn.Layer):
    def __init__(self, inplace=True):
        super().__init__()
        self.relu = nn.ReLU6()
    def forward(self, x):
        return self.relu(x + 3) / 6
 class Conv2DBN(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 ks=1,
                 stride=1,
                 pad=0,
                 dilation=1,
                 groups=1,
                 bn_weight_init=1,
                 lr_mult=1.0):
        super().__init__()
        conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
        self.c = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=ks,
            stride=stride,
            padding=pad,
            dilation=dilation,
            groups=groups,
            weight_attr=conv_weight_attr,
            bias_attr=False)
        bn_weight_attr = paddle.ParamAttr(
            initializer=nn.initializer.Constant(bn_weight_init),
            learning_rate=lr_mult)
        bn_bias_attr = paddle.ParamAttr(
            initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
        self.bn = nn.BatchNorm2D(
            out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
    def forward(self, inputs):
        out = self.c(inputs)
        out = self.bn(out)
        return out
 class ConvBNAct(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=1,
                 stride=1,
                 padding=0,
                 groups=1,
                 norm=nn.BatchNorm2D,
                 act=None,
                 bias_attr=False,
                 lr_mult=1.0):
        super(ConvBNAct, self).__init__()
        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
        self.conv = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            weight_attr=param_attr,
            bias_attr=param_attr if bias_attr else False)
        self.act = act() if act is not None else Identity()
        self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
            if norm is not None else Identity()
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        return x
 class MLP(nn.Layer):
    def __init__(self,
                 in_features,
                 hidden_features=None,
                 out_features=None,
                 act_layer=nn.ReLU,
                 drop=0.,
                 lr_mult=1.0):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
        self.dwconv = nn.Conv2D(
            hidden_features,
            hidden_features,
            3,
            1,
            1,
            groups=hidden_features,
            weight_attr=param_attr,
            bias_attr=param_attr)
        self.act = act_layer()
        self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
        self.drop = nn.Dropout(drop)
    def forward(self, x):
        x = self.fc1(x)
        x = self.dwconv(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x
 class InvertedResidual(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 expand_ratio,
                 activations=None,
                 lr_mult=1.0):
        super(InvertedResidual, self).__init__()
        assert stride in [1, 2], "The stride should be 1 or 2."
        if activations is None:
            activations = nn.ReLU
        hidden_dim = int(round(in_channels * expand_ratio))
        self.use_res_connect = stride == 1 and in_channels == out_channels
        layers = []
        if expand_ratio != 1:
            layers.append(
                Conv2DBN(
                    in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
            layers.append(activations())
        layers.extend([
            Conv2DBN(
                hidden_dim,
                hidden_dim,
                ks=kernel_size,
                stride=stride,
                pad=kernel_size // 2,
                groups=hidden_dim,
                lr_mult=lr_mult), activations(), Conv2DBN(
                    hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
        ])
        self.conv = nn.Sequential(*layers)
        self.out_channels = out_channels
    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)
 class TokenPyramidModule(nn.Layer):
    def __init__(self,
                 cfgs,
                 out_indices,
                 in_channels=3,
                 inp_channel=16,
                 activation=nn.ReLU,
                 width_mult=1.,
                 lr_mult=1.):
        super().__init__()
        self.out_indices = out_indices
        self.stem = nn.Sequential(
            Conv2DBN(
                in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
            activation())
        self.layers = []
        for i, (k, t, c, s) in enumerate(cfgs):
            output_channel = make_divisible(c * width_mult, 8)
            exp_size = t * inp_channel
            exp_size = make_divisible(exp_size * width_mult, 8)
            layer_name = 'layer{}'.format(i + 1)
            layer = InvertedResidual(
                inp_channel,
                output_channel,
                kernel_size=k,
                stride=s,
                expand_ratio=t,
                activations=activation,
                lr_mult=lr_mult)
            self.add_sublayer(layer_name, layer)
            self.layers.append(layer_name)
            inp_channel = output_channel
    def forward(self, x):
        outs = []
        x = self.stem(x)
        for i, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
            if i in self.out_indices:
                outs.append(x)
        return outs
 class Attention(nn.Layer):
    def __init__(self,
                 dim,
                 key_dim,
                 num_heads,
                 attn_ratio=4,
                 activation=None,
                 lr_mult=1.0):
        super().__init__()
        self.num_heads = num_heads
        self.scale = key_dim**-0.5
        self.key_dim = key_dim
        self.nh_kd = nh_kd = key_dim * num_heads
        self.d = int(attn_ratio * key_dim)
        self.dh = int(attn_ratio * key_dim) * num_heads
        self.attn_ratio = attn_ratio
        self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
        self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
        self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
        self.proj = nn.Sequential(
            activation(),
            Conv2DBN(
                self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
    def forward(self, x):
        x_shape = paddle.shape(x)
        H, W = x_shape[2], x_shape[3]
        qq = self.to_q(x).reshape(
            [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
        kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
        vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
            [0, 1, 3, 2])
        attn = paddle.matmul(qq, kk)
        attn = F.softmax(attn, axis=-1)
        xx = paddle.matmul(attn, vv)
        xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
        xx = self.proj(xx)
        return xx
 class Block(nn.Layer):
    def __init__(self,
                 dim,
                 key_dim,
                 num_heads,
                 mlp_ratios=4.,
                 attn_ratio=2.,
                 drop=0.,
                 drop_path=0.,
                 act_layer=nn.ReLU,
                 lr_mult=1.0):
        super().__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.mlp_ratios = mlp_ratios
        self.attn = Attention(
            dim,
            key_dim=key_dim,
            num_heads=num_heads,
            attn_ratio=attn_ratio,
            activation=act_layer,
            lr_mult=lr_mult)
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
        mlp_hidden_dim = int(dim * mlp_ratios)
        self.mlp = MLP(in_features=dim,
                       hidden_features=mlp_hidden_dim,
                       act_layer=act_layer,
                       drop=drop,
                       lr_mult=lr_mult)
    def forward(self, x):
        h = x
        x = self.attn(x)
        x = self.drop_path(x)
        x = h + x
        h = x
        x = self.mlp(x)
        x = self.drop_path(x)
        x = x + h
        return x
 class BasicLayer(nn.Layer):
    def __init__(self,
                 block_num,
                 embedding_dim,
                 key_dim,
                 num_heads,
                 mlp_ratios=4.,
                 attn_ratio=2.,
                 drop=0.,
                 attn_drop=0.,
                 drop_path=0.,
                 act_layer=None,
                 lr_mult=1.0):
        super().__init__()
        self.block_num = block_num
        self.transformer_blocks = nn.LayerList()
        for i in range(self.block_num):
            self.transformer_blocks.append(
                Block(
                    embedding_dim,
                    key_dim=key_dim,
                    num_heads=num_heads,
                    mlp_ratios=mlp_ratios,
                    attn_ratio=attn_ratio,
                    drop=drop,
                    drop_path=drop_path[i]
                    if isinstance(drop_path, list) else drop_path,
                    act_layer=act_layer,
                    lr_mult=lr_mult))
    def forward(self, x):
        # token * N 
        for i in range(self.block_num):
            x = self.transformer_blocks[i](x)
        return x
 class PyramidPoolAgg(nn.Layer):
    def __init__(self, stride):
        super().__init__()
        self.stride = stride
        self.tmp = Identity()  # avoid the error of paddle.flops
    def forward(self, inputs):
        '''
        # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
        # so exporting the inference model will raise error.
        _, _, H, W = inputs[-1].shape
        H = (H - 1) // self.stride + 1
        W = (W - 1) // self.stride + 1
        return paddle.concat(
            [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
        '''
        out = []
        ks = 2**len(inputs)
        stride = self.stride**len(inputs)
        for x in inputs:
            x = F.avg_pool2d(x, int(ks), int(stride))
            ks /= 2
            stride /= 2
            out.append(x)
        out = paddle.concat(out, axis=1)
        return out
 class InjectionMultiSum(nn.Layer):
    def __init__(self, in_channels, out_channels, activations=None,
                 lr_mult=1.0):
        super(InjectionMultiSum, self).__init__()
        self.local_embedding = ConvBNAct(
            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
        self.global_embedding = ConvBNAct(
            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
        self.global_act = ConvBNAct(
            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
        self.act = HSigmoid()
    def forward(self, x_low, x_global):
        xl_hw = paddle.shape(x_low)[2:]
        local_feat = self.local_embedding(x_low)
        global_act = self.global_act(x_global)
        sig_act = F.interpolate(
            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
        global_feat = self.global_embedding(x_global)
        global_feat = F.interpolate(
            global_feat, xl_hw, mode='bilinear', align_corners=False)
        out = local_feat * sig_act + global_feat
        return out
 class InjectionMultiSumCBR(nn.Layer):
    def __init__(self, in_channels, out_channels, activations=None):
        '''
        local_embedding: conv-bn-relu
        global_embedding: conv-bn-relu
        global_act: conv
        '''
        super(InjectionMultiSumCBR, self).__init__()
        self.local_embedding = ConvBNAct(
            in_channels, out_channels, kernel_size=1)
        self.global_embedding = ConvBNAct(
            in_channels, out_channels, kernel_size=1)
        self.global_act = ConvBNAct(
            in_channels, out_channels, kernel_size=1, norm=None, act=None)
        self.act = HSigmoid()
    def forward(self, x_low, x_global):
        xl_hw = paddle.shape(x)[2:]
        local_feat = self.local_embedding(x_low)
        # kernel
        global_act = self.global_act(x_global)
        global_act = F.interpolate(
            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
        # feat_h
        global_feat = self.global_embedding(x_global)
        global_feat = F.interpolate(
            global_feat, xl_hw, mode='bilinear', align_corners=False)
        out = local_feat * global_act + global_feat
        return out
 class FuseBlockSum(nn.Layer):
    def __init__(self, in_channels, out_channels, activations=None):
        super(FuseBlockSum, self).__init__()
        self.fuse1 = ConvBNAct(
            in_channels, out_channels, kernel_size=1, act=None)
        self.fuse2 = ConvBNAct(
            in_channels, out_channels, kernel_size=1, act=None)
    def forward(self, x_low, x_high):
        xl_hw = paddle.shape(x)[2:]
        inp = self.fuse1(x_low)
        kernel = self.fuse2(x_high)
        feat_h = F.interpolate(
            kernel, xl_hw, mode='bilinear', align_corners=False)
        out = inp + feat_h
        return out
 class FuseBlockMulti(nn.Layer):
    def __init__(
            self,
            in_channels,
            out_channels,
            stride=1,
            activations=None, ):
        super(FuseBlockMulti, self).__init__()
        assert stride in [1, 2], "The stride should be 1 or 2."
        self.fuse1 = ConvBNAct(
            in_channels, out_channels, kernel_size=1, act=None)
        self.fuse2 = ConvBNAct(
            in_channels, out_channels, kernel_size=1, act=None)
        self.act = HSigmoid()
    def forward(self, x_low, x_high):
        xl_hw = paddle.shape(x)[2:]
        inp = self.fuse1(x_low)
        sig_act = self.fuse2(x_high)
        sig_act = F.interpolate(
            self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
        out = inp * sig_act
        return out
 SIM_BLOCK = {
    "fuse_sum": FuseBlockSum,
    "fuse_multi": FuseBlockMulti,
    "multi_sum": InjectionMultiSum,
    "multi_sum_cbr": InjectionMultiSumCBR,
 }
 class TopTransformer(nn.Layer):
    def __init__(self,
                 cfgs,
                 injection_out_channels,
                 encoder_out_indices,
                 trans_out_indices=[1, 2, 3],
                 depths=4,
                 key_dim=16,
                 num_heads=8,
                 attn_ratios=2,
                 mlp_ratios=2,
                 c2t_stride=2,
                 drop_path_rate=0.,
                 act_layer=nn.ReLU6,
                 injection_type="muli_sum",
                 injection=True,
                 lr_mult=1.0,
                 in_channels=3,
                 pretrained=None):
        super().__init__()
        self.feat_channels = [
            c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
        ]
        self.injection_out_channels = injection_out_channels
        self.injection = injection
        self.embed_dim = sum(self.feat_channels)
        self.trans_out_indices = trans_out_indices
        self.tpm = TokenPyramidModule(
            cfgs=cfgs,
            out_indices=encoder_out_indices,
            in_channels=in_channels,
            lr_mult=lr_mult)
        self.ppa = PyramidPoolAgg(stride=c2t_stride)
        dpr = [x.item() for x in \
               paddle.linspace(0, drop_path_rate, depths)]
        self.trans = BasicLayer(
            block_num=depths,
            embedding_dim=self.embed_dim,
            key_dim=key_dim,
            num_heads=num_heads,
            mlp_ratios=mlp_ratios,
            attn_ratio=attn_ratios,
            drop=0,
            attn_drop=0,
            drop_path=dpr,
            act_layer=act_layer,
            lr_mult=lr_mult)
        self.SIM = nn.LayerList()
        inj_module = SIM_BLOCK[injection_type]
        if self.injection:
            for i in range(len(self.feat_channels)):
                if i in trans_out_indices:
                    self.SIM.append(
                        inj_module(
                            self.feat_channels[i],
                            injection_out_channels[i],
                            activations=act_layer,
                            lr_mult=lr_mult))
                else:
                    self.SIM.append(Identity())
        self.pretrained = pretrained
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def forward(self, x):
        ouputs = self.tpm(x)
        out = self.ppa(ouputs)
        out = self.trans(out)
        if self.injection:
            xx = out.split(self.feat_channels, axis=1)
            results = []
            for i in range(len(self.feat_channels)):
                if i in self.trans_out_indices:
                    local_tokens = ouputs[i]
                    global_semantics = xx[i]
                    out_ = self.SIM[i](local_tokens, global_semantics)
                    results.append(out_)
            return results
        else:
            ouputs.append(out)
            return ouputs
@manager.BACKBONES.add_component
 def TopTransformer_Base(**kwargs):
    cfgs = [
        # k,  t,  c, s
        [3, 1, 16, 1],  # 1/2        
        [3, 4, 32, 2],  # 1/4 1      
        [3, 3, 32, 1],  #            
        [5, 3, 64, 2],  # 1/8 3      
        [5, 3, 64, 1],  #            
        [3, 3, 128, 2],  # 1/16 5     
        [3, 3, 128, 1],  #            
        [5, 6, 160, 2],  # 1/32 7     
        [5, 6, 160, 1],  #            
        [3, 6, 160, 1],  #            
    ]
    model = TopTransformer(
        cfgs=cfgs,
        injection_out_channels=[None, 256, 256, 256],
        encoder_out_indices=[2, 4, 6, 9],
        trans_out_indices=[1, 2, 3],
        depths=4,
        key_dim=16,
        num_heads=8,
        attn_ratios=2,
        mlp_ratios=2,
        c2t_stride=2,
        drop_path_rate=0.,
        act_layer=nn.ReLU6,
        injection_type="multi_sum",
        injection=True,
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def TopTransformer_Small(**kwargs):
    cfgs = [
        # k,  t,  c, s
        [3, 1, 16, 1],  # 1/2        
        [3, 4, 24, 2],  # 1/4 1      
        [3, 3, 24, 1],  #            
        [5, 3, 48, 2],  # 1/8 3      
        [5, 3, 48, 1],  #            
        [3, 3, 96, 2],  # 1/16 5     
        [3, 3, 96, 1],  #            
        [5, 6, 128, 2],  # 1/32 7     
        [5, 6, 128, 1],  #            
        [3, 6, 128, 1],  #           
    ]
    model = TopTransformer(
        cfgs=cfgs,
        injection_out_channels=[None, 192, 192, 192],
        encoder_out_indices=[2, 4, 6, 9],
        trans_out_indices=[1, 2, 3],
        depths=4,
        key_dim=16,
        num_heads=6,
        attn_ratios=2,
        mlp_ratios=2,
        c2t_stride=2,
        drop_path_rate=0.,
        act_layer=nn.ReLU6,
        injection_type="multi_sum",
        injection=True,
        **kwargs)
    return model
@manager.BACKBONES.add_component
 def TopTransformer_Tiny(**kwargs):
    cfgs = [
        # k,  t,  c, s
        [3, 1, 16, 1],  # 1/2       
        [3, 4, 16, 2],  # 1/4 1      
        [3, 3, 16, 1],  #            
        [5, 3, 32, 2],  # 1/8 3      
        [5, 3, 32, 1],  #            
        [3, 3, 64, 2],  # 1/16 5     
        [3, 3, 64, 1],  #            
        [5, 6, 96, 2],  # 1/32 7     
        [5, 6, 96, 1],  #               
    ]
    model = TopTransformer(
        cfgs=cfgs,
        injection_out_channels=[None, 128, 128, 128],
        encoder_out_indices=[2, 4, 6, 8],
        trans_out_indices=[1, 2, 3],
        depths=4,
        key_dim=16,
        num_heads=4,
        attn_ratios=2,
        mlp_ratios=2,
        c2t_stride=2,
        drop_path_rate=0.,
        act_layer=nn.ReLU6,
        injection_type="multi_sum",
        injection=True,
        **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/transformer_utils.py
+++ b/paddlers/models/ppseg/models/backbones/transformer_utils.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
        return x
    keep_prob = paddle.to_tensor(1 - drop_prob)
    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
    random_tensor = paddle.floor(random_tensor)  # binarize
    output = x.divide(keep_prob) * random_tensor
    return output
--- a/paddlers/models/ppseg/models/backbones/vision_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/vision_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
    def __init__(self,
                 img_size=224,
                 patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
        self.patch_embed = PatchEmbed(
            img_size=img_size,
            patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
            embed_dim=embed_dim)
        self.pos_w = self.patch_embed.num_patches_in_w
        self.pos_h = self.patch_embed.num_patches_in_h
--- a/paddlers/models/ppseg/models/backbones/xception_deeplab.py
+++ b/paddlers/models/ppseg/models/backbones/xception_deeplab.py
@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):
     Args:
         backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
         in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path of pretrained model.
         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
    """
-    def __init__(self, backbone, pretrained=None, output_stride=16):
+    def __init__(self,
                 backbone,
                 in_channels=3,
                 pretrained=None,
                 output_stride=16):
        super(XceptionDeeplab, self).__init__()
@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
        self.feat_channels = [128, 2048]
        self._conv1 = ConvBNLayer(
-            3,
+            in_channels,
            32,
            3,
            stride=2,
--- a/paddlers/models/ppseg/models/bisenet.py
+++ b/paddlers/models/ppseg/models/bisenet.py
@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
    Args:
        num_classes (int): The unique number of target classes.
        lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
                 num_classes,
                 lambd=0.25,
                 align_corners=False,
                 in_channels=3,
                 pretrained=None):
        super().__init__()
@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
        sb_channels = (C1, C3, C4, C5)
        mid_channels = 128
-        self.db = DetailBranch(db_channels)
+        self.db = DetailBranch(in_channels, db_channels)
-        self.sb = SemanticBranch(sb_channels)
+        self.sb = SemanticBranch(in_channels, sb_channels)
        self.bga = BGA(mid_channels, align_corners)
        self.aux_head1 = SegHead(C1, C1, num_classes)
@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
 class DetailBranch(nn.Layer):
    """The detail branch of BiSeNet, which has wide channels but shallow layers."""
-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
        super().__init__()
-        C1, C2, C3 = in_channels
+        C1, C2, C3 = feature_channels
        self.convs = nn.Sequential(
            # stage 1
            layers.ConvBNReLU(
-                3, C1, 3, stride=2),
+                in_channels, C1, 3, stride=2),
            layers.ConvBNReLU(C1, C1, 3),
            # stage 2
            layers.ConvBNReLU(
@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
 class SemanticBranch(nn.Layer):
    """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
        super().__init__()
-        C1, C3, C4, C5 = in_channels
+        C1, C3, C4, C5 = feature_channels
-        self.stem = StemBlock(3, C1)
+        self.stem = StemBlock(in_channels, C1)
        self.stage3 = nn.Sequential(
            GatherAndExpansionLayer2(C1, C3, 6),
--- a/paddlers/models/ppseg/models/ccnet.py
+++ b/paddlers/models/ppseg/models/ccnet.py
@ -0,0 +1,174 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.utils import utils
@manager.MODELS.add_component
 class CCNet(nn.Layer):
    """
    The CCNet implementation based on PaddlePaddle.
    The original article refers to
    Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
    (https://arxiv.org/abs/1811.11721)
    Args:
        num_classes (int): The unique number of target classes.
        backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
        backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
        dropout_prob (float, optional): The probability of dropout. Default: 0.0.
        recurrence (int, optional): The number of recurrent operations. Defautl: 1.
        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices=(2, 3),
                 enable_auxiliary_loss=True,
                 dropout_prob=0.0,
                 recurrence=1,
                 align_corners=False,
                 pretrained=None):
        super().__init__()
        self.enable_auxiliary_loss = enable_auxiliary_loss
        self.recurrence = recurrence
        self.align_corners = align_corners
        self.backbone = backbone
        self.backbone_indices = backbone_indices
        backbone_channels = [
            backbone.feat_channels[i] for i in backbone_indices
        ]
        if enable_auxiliary_loss:
            self.aux_head = layers.AuxLayer(
                backbone_channels[0],
                512,
                num_classes,
                dropout_prob=dropout_prob)
        self.head = RCCAModule(
            backbone_channels[1],
            512,
            num_classes,
            dropout_prob=dropout_prob,
            recurrence=recurrence)
        self.pretrained = pretrained
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def forward(self, x):
        feat_list = self.backbone(x)
        logit_list = []
        output = self.head(feat_list[self.backbone_indices[-1]])
        logit_list.append(output)
        if self.training and self.enable_auxiliary_loss:
            aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
            logit_list.append(aux_out)
        return [
            F.interpolate(
                logit,
                paddle.shape(x)[2:],
                mode='bilinear',
                align_corners=self.align_corners) for logit in logit_list
        ]
 class RCCAModule(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_classes,
                 dropout_prob=0.1,
                 recurrence=1):
        super().__init__()
        inter_channels = in_channels // 4
        self.recurrence = recurrence
        self.conva = layers.ConvBNLeakyReLU(
            in_channels, inter_channels, 3, padding=1, bias_attr=False)
        self.cca = CrissCrossAttention(inter_channels)
        self.convb = layers.ConvBNLeakyReLU(
            inter_channels, inter_channels, 3, padding=1, bias_attr=False)
        self.out = layers.AuxLayer(
            in_channels + inter_channels,
            out_channels,
            num_classes,
            dropout_prob=dropout_prob)
    def forward(self, x):
        feat = self.conva(x)
        for i in range(self.recurrence):
            feat = self.cca(feat)
        feat = self.convb(feat)
        output = self.out(paddle.concat([x, feat], axis=1))
        return output
 class CrissCrossAttention(nn.Layer):
    def __init__(self, in_channels):
        super().__init__()
        self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
        self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
        self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
        self.softmax = nn.Softmax(axis=3)
        self.gamma = self.create_parameter(
            shape=(1, ), default_initializer=nn.initializer.Constant(0))
        self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
    def forward(self, x):
        b, c, h, w = paddle.shape(x)
        proj_q = self.q_conv(x)
        proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
            [b * w, -1, h]).transpose([0, 2, 1])
        proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
            [b * h, -1, w]).transpose([0, 2, 1])
        proj_k = self.k_conv(x)
        proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
        proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
        proj_v = self.v_conv(x)
        proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
        proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
        energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
            [b, w, h, h]).transpose([0, 2, 1, 3])
        energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
        concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
        attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
            [b * w, h, h])
        attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
        out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
            [b, w, -1, h]).transpose([0, 2, 3, 1])
        out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
            [b, h, -1, w]).transpose([0, 2, 1, 3])
        return self.gamma * (out_h + out_w) + x
    def Inf(self, B, H, W):
        return -paddle.tile(
            paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
            [B * W, 1, 1])
--- a/paddlers/models/ppseg/models/ddrnet.py
+++ b/paddlers/models/ppseg/models/ddrnet.py
@ -0,0 +1,403 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg.cvlibs import manager, param_init
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.utils import utils
 class DualResNet(nn.Layer):
    """
    The DDRNet implementation based on PaddlePaddle.
    The original article refers to
    Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
    (https://arxiv.org/abs/2101.06085)
    Args:
        num_classes (int): The unique number of target classes.
        in_channels (int, optional): Number of input channels. Default: 3.
        block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
        planes (int): Base channels in network. Default: 64.
        spp_planes (int): Branch channels for DAPPM. Default: 128.
        head_planes (int): Mid channels of segmentation head. Default: 128.
        enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 in_channels=3,
                 block_layers=[2, 2, 2, 2],
                 planes=64,
                 spp_planes=128,
                 head_planes=128,
                 enable_auxiliary_loss=False,
                 pretrained=None):
        super().__init__()
        highres_planes = planes * 2
        self.enable_auxiliary_loss = enable_auxiliary_loss
        self.conv1 = nn.Sequential(
            layers.ConvBNReLU(
                in_channels, planes, kernel_size=3, stride=2, padding=1),
            layers.ConvBNReLU(
                planes, planes, kernel_size=3, stride=2, padding=1), )
        self.relu = nn.ReLU()
        self.layer1 = self._make_layers(BasicBlock, planes, planes,
                                        block_layers[0])
        self.layer2 = self._make_layers(
            BasicBlock, planes, planes * 2, block_layers[1], stride=2)
        self.layer3 = self._make_layers(
            BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
        self.layer4 = self._make_layers(
            BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
        self.compression3 = layers.ConvBN(
            planes * 4, highres_planes, kernel_size=1, bias_attr=False)
        self.compression4 = layers.ConvBN(
            planes * 8, highres_planes, kernel_size=1, bias_attr=False)
        self.down3 = layers.ConvBN(
            highres_planes,
            planes * 4,
            kernel_size=3,
            stride=2,
            bias_attr=False)
        self.down4 = nn.Sequential(
            layers.ConvBNReLU(
                highres_planes,
                planes * 4,
                kernel_size=3,
                stride=2,
                padding=1,
                bias_attr=False),
            layers.ConvBN(
                planes * 4,
                planes * 8,
                kernel_size=3,
                stride=2,
                padding=1,
                bias_attr=False))
        self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
                                         2)
        self.layer4_ = self._make_layers(BasicBlock, highres_planes,
                                         highres_planes, 2)
        self.layer5_ = self._make_layers(Bottleneck, highres_planes,
                                         highres_planes, 1)
        self.layer5 = self._make_layers(
            Bottleneck, planes * 8, planes * 8, 1, stride=2)
        self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
        if self.enable_auxiliary_loss:
            self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
        self.head = DDRNetHead(planes * 4, head_planes, num_classes)
        self.pretrained = pretrained
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
        else:
            for m in self.sublayers():
                if isinstance(m, nn.Conv2D):
                    param_init.kaiming_normal_init(m.weight)
                elif isinstance(m, nn.BatchNorm2D):
                    param_init.constant_init(m.weight, value=1)
                    param_init.constant_init(m.bias, value=0)
    def _make_layers(self, block, inplanes, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2D(
                    inplanes,
                    planes * block.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias_attr=False),
                nn.BatchNorm2D(planes * block.expansion), )
        layers = []
        layers.append(block(inplanes, planes, stride, downsample))
        inplanes = planes * block.expansion
        for i in range(1, blocks):
            if i == (blocks - 1):
                layers.append(block(inplanes, planes, stride=1, no_relu=True))
            else:
                layers.append(block(inplanes, planes, stride=1, no_relu=False))
        return nn.Sequential(*layers)
    def forward(self, x):
        n, c, h, w = paddle.shape(x)
        width_output = w // 8
        height_output = h // 8
        x = self.conv1(x)
        stage1_out = self.layer1(x)
        stage2_out = self.layer2(self.relu(stage1_out))
        stage3_out = self.layer3(self.relu(stage2_out))
        stage3_out_dual = self.layer3_(self.relu(stage2_out))
        x = stage3_out + self.down3(self.relu(stage3_out_dual))
        stage3_merge = stage3_out_dual + F.interpolate(
            self.compression3(self.relu(stage3_out)),
            size=[height_output, width_output],
            mode='bilinear')
        stage4_out = self.layer4(self.relu(x))
        stage4_out_dual = self.layer4_(self.relu(stage3_merge))
        x = stage4_out + self.down4(self.relu(stage4_out_dual))
        stage4_merge = stage4_out_dual + F.interpolate(
            self.compression4(self.relu(stage4_out)),
            size=[height_output, width_output],
            mode='bilinear')
        stage5_out_dual = self.layer5_(self.relu(stage4_merge))
        x = F.interpolate(
            self.spp(self.layer5(self.relu(x))),
            size=[height_output, width_output],
            mode='bilinear')
        output = self.head(x + stage5_out_dual)
        logit_list = []
        logit_list.append(output)
        if self.enable_auxiliary_loss:
            aux_out = self.aux_head(stage3_merge)
            logit_list.append(aux_out)
        return [
            F.interpolate(
                logit, [h, w], mode='bilinear') for logit in logit_list
        ]
 class BasicBlock(nn.Layer):
    expansion = 1
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 downsample=None,
                 no_relu=False):
        super().__init__()
        self.conv_bn_relu = layers.ConvBNReLU(
            inplanes,
            planes,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias_attr=False)
        self.relu = nn.ReLU()
        self.conv_bn = layers.ConvBN(
            planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
        self.downsample = downsample
        self.stride = stride
        self.no_relu = no_relu
    def forward(self, x):
        residual = x
        out = self.conv_bn_relu(x)
        out = self.conv_bn(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        if self.no_relu:
            return out
        else:
            return self.relu(out)
 class Bottleneck(nn.Layer):
    expansion = 2
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 downsample=None,
                 no_relu=True):
        super().__init__()
        self.conv_bn_relu1 = layers.ConvBNReLU(
            inplanes, planes, kernel_size=1, bias_attr=False)
        self.conv_bn_relu2 = layers.ConvBNReLU(
            planes,
            planes,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias_attr=False)
        self.conv_bn = layers.ConvBN(
            planes, planes * self.expansion, kernel_size=1, bias_attr=False)
        self.relu = nn.ReLU()
        self.downsample = downsample
        self.stride = stride
        self.no_relu = no_relu
    def forward(self, x):
        residual = x
        out = self.conv_bn_relu1(x)
        out = self.conv_bn_relu2(out)
        out = self.conv_bn(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        if self.no_relu:
            return out
        else:
            return self.relu(out)
 class DAPPM(nn.Layer):
    def __init__(self, inplanes, branch_planes, outplanes):
        super().__init__()
        self.scale1 = nn.Sequential(
            nn.AvgPool2D(
                kernel_size=5, stride=2, padding=2),
            layers.SyncBatchNorm(inplanes),
            nn.ReLU(),
            nn.Conv2D(
                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
        self.scale2 = nn.Sequential(
            nn.AvgPool2D(
                kernel_size=9, stride=4, padding=4),
            layers.SyncBatchNorm(inplanes),
            nn.ReLU(),
            nn.Conv2D(
                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
        self.scale3 = nn.Sequential(
            nn.AvgPool2D(
                kernel_size=17, stride=8, padding=8),
            layers.SyncBatchNorm(inplanes),
            nn.ReLU(),
            nn.Conv2D(
                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
        self.scale4 = nn.Sequential(
            nn.AdaptiveAvgPool2D((1, 1)),
            layers.SyncBatchNorm(inplanes),
            nn.ReLU(),
            nn.Conv2D(
                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
        self.scale0 = nn.Sequential(
            layers.SyncBatchNorm(inplanes),
            nn.ReLU(),
            nn.Conv2D(
                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
        self.process1 = nn.Sequential(
            layers.SyncBatchNorm(branch_planes),
            nn.ReLU(),
            nn.Conv2D(
                branch_planes,
                branch_planes,
                kernel_size=3,
                padding=1,
                bias_attr=False), )
        self.process2 = nn.Sequential(
            layers.SyncBatchNorm(branch_planes),
            nn.ReLU(),
            nn.Conv2D(
                branch_planes,
                branch_planes,
                kernel_size=3,
                padding=1,
                bias_attr=False), )
        self.process3 = nn.Sequential(
            layers.SyncBatchNorm(branch_planes),
            nn.ReLU(),
            nn.Conv2D(
                branch_planes,
                branch_planes,
                kernel_size=3,
                padding=1,
                bias_attr=False), )
        self.process4 = nn.Sequential(
            layers.SyncBatchNorm(branch_planes),
            nn.ReLU(),
            nn.Conv2D(
                branch_planes,
                branch_planes,
                kernel_size=3,
                padding=1,
                bias_attr=False), )
        self.compression = nn.Sequential(
            layers.SyncBatchNorm(branch_planes * 5),
            nn.ReLU(),
            nn.Conv2D(
                branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
        self.shortcut = nn.Sequential(
            layers.SyncBatchNorm(inplanes),
            nn.ReLU(),
            nn.Conv2D(
                inplanes, outplanes, kernel_size=1, bias_attr=False))
    def forward(self, x):
        n, c, h, w = paddle.shape(x)
        x0 = self.scale0(x)
        x1 = self.process1(
            F.interpolate(
                self.scale1(x), size=[h, w], mode='bilinear') + x0)
        x2 = self.process2(
            F.interpolate(
                self.scale2(x), size=[h, w], mode='bilinear') + x1)
        x3 = self.process3(
            F.interpolate(
                self.scale3(x), size=[h, w], mode='bilinear') + x2)
        x4 = self.process4(
            F.interpolate(
                self.scale4(x), size=[h, w], mode='bilinear') + x3)
        out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
                                             1)) + self.shortcut(x)
        return out
 class DDRNetHead(nn.Layer):
    def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
        super().__init__()
        self.bn1 = nn.BatchNorm2D(inplanes)
        self.relu = nn.ReLU()
        self.conv_bn_relu = layers.ConvBNReLU(
            inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
        self.conv = nn.Conv2D(
            interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
        self.scale_factor = scale_factor
    def forward(self, x):
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv_bn_relu(x)
        out = self.conv(x)
        if self.scale_factor is not None:
            out = F.interpolate(
                out, scale_factor=self.scale_factor, mode='bilinear')
        return out
@manager.MODELS.add_component
 def DDRNet_23(**kwargs):
    return DualResNet(
        block_layers=[2, 2, 2, 2],
        planes=64,
        spp_planes=128,
        head_planes=128,
        **kwargs)
--- a/paddlers/models/ppseg/models/emanet.py
+++ b/paddlers/models/ppseg/models/emanet.py
@ -209,7 +209,9 @@ class EMAU(nn.Layer):
            mu = F.normalize(mu, axis=1, p=2)
            mu = self.mu * (1 - self.momentum) + mu * self.momentum
            if paddle.distributed.get_world_size() > 1:
-                mu = paddle.distributed.all_reduce(mu)
+                out = paddle.distributed.all_reduce(mu)
                if out is not None:
                    mu = out
                mu /= paddle.distributed.get_world_size()
            self.mu = mu
--- a/paddlers/models/ppseg/models/enet.py
+++ b/paddlers/models/ppseg/models/enet.py
@ -34,6 +34,7 @@ class ENet(nn.Layer):
    Args:
        num_classes (int): The unique number of target classes.
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
        encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
            function; otherwise, PReLU is used. Default: False.
@ -43,13 +44,14 @@ class ENet(nn.Layer):
    def __init__(self,
                 num_classes,
                 in_channels=3,
                 pretrained=None,
                 encoder_relu=False,
                 decoder_relu=True):
        super(ENet, self).__init__()
        self.numclasses = num_classes
-        self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
+        self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)
        self.downsample1_0 = DownsamplingBottleneck(
            16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)
--- a/paddlers/models/ppseg/models/fast_scnn.py
+++ b/paddlers/models/ppseg/models/fast_scnn.py
@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
    (https://arxiv.org/pdf/1902.04502.pdf).
    Args:
        num_classes (int): The unique number of target classes.
        in_channels (int, optional): The channels of input image. Default: 3.
        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
            If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
        align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):
    def __init__(self,
                 num_classes,
                 in_channels=3,
                 enable_auxiliary_loss=True,
                 align_corners=False,
                 pretrained=None):
        super().__init__()
-        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
+        self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
                                                           64)
        self.global_feature_extractor = GlobalFeatureExtractor(
            in_channels=64,
            block_channels=[64, 96, 128],
@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
        out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
    """
-    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
+    def __init__(self,
                 in_channels=3,
                 dw_channels1=32,
                 dw_channels2=48,
                 out_channels=64):
        super(LearningToDownsample, self).__init__()
        self.conv_bn_relu = layers.ConvBNReLU(
-            in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
+            in_channels=in_channels,
            out_channels=dw_channels1,
            kernel_size=3,
            stride=2)
        self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
            in_channels=dw_channels1,
            out_channels=dw_channels2,
--- a/paddlers/models/ppseg/models/ginet.py
+++ b/paddlers/models/ppseg/models/ginet.py
@ -92,7 +92,7 @@ class GINet(nn.Layer):
        return [
            F.interpolate(
-                logit, (h, w),
+                logit, [h, w],
                mode='bilinear',
                align_corners=self.align_corners) for logit in logit_list
        ]
--- a/paddlers/models/ppseg/models/glore.py
+++ b/paddlers/models/ppseg/models/glore.py
@ -0,0 +1,198 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.utils import utils
@manager.MODELS.add_component
 class GloRe(nn.Layer):
    """
    The GloRe implementation based on PaddlePaddle.
    The original article refers to:
       Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
       (https://arxiv.org/pdf/1811.12814.pdf)
    Args:
        num_classes (int): The unique number of target classes.
        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
        backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
        gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
        gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
        gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices=(2, 3),
                 gru_channels=512,
                 gru_num_state=128,
                 gru_num_node=64,
                 enable_auxiliary_loss=True,
                 align_corners=False,
                 pretrained=None):
        super().__init__()
        self.backbone = backbone
        backbone_channels = [
            backbone.feat_channels[i] for i in backbone_indices
        ]
        self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
                              gru_channels, gru_num_state, gru_num_node,
                              enable_auxiliary_loss)
        self.align_corners = align_corners
        self.pretrained = pretrained
        self.init_weight()
    def forward(self, x):
        feat_list = self.backbone(x)
        logit_list = self.head(feat_list)
        return [
            F.interpolate(
                logit,
                paddle.shape(x)[2:],
                mode='bilinear',
                align_corners=self.align_corners) for logit in logit_list
        ]
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
 class GloReHead(nn.Layer):
    def __init__(self,
                 num_classes,
                 backbone_indices,
                 backbone_channels,
                 gru_channels=512,
                 gru_num_state=128,
                 gru_num_node=64,
                 enable_auxiliary_loss=True):
        super().__init__()
        in_channels = backbone_channels[1]
        self.conv_bn_relu = layers.ConvBNReLU(
            in_channels, gru_channels, 1, bias_attr=False)
        self.gru_module = GruModule(
            num_input=gru_channels,
            num_state=gru_num_state,
            num_node=gru_num_node)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
        self.auxlayer = layers.AuxLayer(
            in_channels=backbone_channels[0],
            inter_channels=backbone_channels[0] // 4,
            out_channels=num_classes)
        self.backbone_indices = backbone_indices
        self.enable_auxiliary_loss = enable_auxiliary_loss
    def forward(self, feat_list):
        logit_list = []
        x = feat_list[self.backbone_indices[1]]
        feature = self.conv_bn_relu(x)
        gru_output = self.gru_module(feature)
        output = self.dropout(gru_output)
        logit = self.classifier(output)
        logit_list.append(logit)
        if self.enable_auxiliary_loss:
            low_level_feat = feat_list[self.backbone_indices[0]]
            auxiliary_logit = self.auxlayer(low_level_feat)
            logit_list.append(auxiliary_logit)
        return logit_list
 class GCN(nn.Layer):
    def __init__(self, num_state, num_node, bias=False):
        super(GCN, self).__init__()
        self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1D(
            num_state, num_state, kernel_size=1, bias_attr=bias)
    def forward(self, x):
        h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
        h = paddle.transpose(h, perm=(0, 2, 1))
        h = h + x
        h = self.relu(self.conv2(h))
        return h
 class GruModule(nn.Layer):
    def __init__(self,
                 num_input=512,
                 num_state=128,
                 num_node=64,
                 normalize=False):
        super(GruModule, self).__init__()
        self.normalize = normalize
        self.num_state = num_state
        self.num_node = num_node
        self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
        self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
        self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
        self.extend_dim = nn.Conv2D(
            self.num_state, num_input, kernel_size=1, bias_attr=False)
        self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
    def forward(self, input):
        n, c, h, w = input.shape
        # B, C, H, W
        reduction_dim = self.reduction_dim(input)
        # B, N, H, W
        mat_B = self.projection_mat(input)
        # B, C, H*W
        reshaped_reduction = paddle.reshape(
            reduction_dim, shape=[n, self.num_state, h * w])
        # B, N, H*W
        reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
        # B, N, H*W
        reproject = reshaped_B
        # B, C, N
        node_state_V = paddle.matmul(
            reshaped_reduction, paddle.transpose(
                reshaped_B, perm=[0, 2, 1]))
        if self.normalize:
            node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
        # B, C, N
        gcn_out = self.gcn(node_state_V)
        # B, C, H*W
        Y = paddle.matmul(gcn_out, reproject)
        # B, C, H, W
        Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
        Y_extend = self.extend_dim(Y)
        Y_extend = self.extend_bn(Y_extend)
        out = input + Y_extend
        return out
--- a/paddlers/models/ppseg/models/hardnet.py
+++ b/paddlers/models/ppseg/models/hardnet.py
@ -31,6 +31,7 @@ class HarDNet(nn.Layer):
    Args:
        num_classes (int): The unique number of target classes.
        in_channels (int, optional): The channels of input image. Default: 3.
        stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
        ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
        grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
@ -43,6 +44,7 @@ class HarDNet(nn.Layer):
    def __init__(self,
                 num_classes,
                 in_channels=3,
                 stem_channels=(16, 24, 32, 48),
                 ch_list=(64, 96, 160, 224, 320),
                 grmul=1.7,
@ -60,7 +62,7 @@ class HarDNet(nn.Layer):
        self.stem = nn.Sequential(
            layers.ConvBNReLU(
-                3, stem_channels[0], kernel_size=3, bias_attr=False),
+                in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
            layers.ConvBNReLU(
                stem_channels[0],
                stem_channels[1],
--- a/paddlers/models/ppseg/models/layers/init.py
+++ b/paddlers/models/ppseg/models/layers/init.py
@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
+from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
 from .activation import Activation
 from .pyramid_pool import ASPPModule, PPModule
 from .attention import AttentionBlock
 from .nonlocal2d import NonLocal2D
 from .wrap_functions import *
 from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten
--- a/paddlers/models/ppseg/models/layers/attention.py
+++ b/paddlers/models/ppseg/models/layers/attention.py
@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
        if self.out_project is not None:
            context = self.out_project(context)
        return context
 class DualAttentionModule(nn.Layer):
    """
    Dual attention module.
    Args:
        in_channels (int): The number of input channels.
        out_channels (int): The number of output channels.
    """
    def __init__(self, in_channels, out_channels):
        super().__init__()
        inter_channels = in_channels // 4
        self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
        self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
        self.pam = PAM(inter_channels)
        self.cam = CAM(inter_channels)
        self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
        self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
        self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
    def forward(self, feats):
        channel_feats = self.channel_conv(feats)
        channel_feats = self.cam(channel_feats)
        channel_feats = self.conv1(channel_feats)
        position_feats = self.position_conv(feats)
        position_feats = self.pam(position_feats)
        position_feats = self.conv2(position_feats)
        feats_sum = position_feats + channel_feats
        out = self.conv3(feats_sum)
        return out
 class PAM(nn.Layer):
    """
    Position attention module.
    Args:
        in_channels (int): The number of input channels.
    """
    def __init__(self, in_channels):
        super().__init__()
        mid_channels = in_channels // 8
        self.mid_channels = mid_channels
        self.in_channels = in_channels
        self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
        self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
        self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
        self.gamma = self.create_parameter(
            shape=[1],
            dtype='float32',
            default_initializer=nn.initializer.Constant(0))
    def forward(self, x):
        x_shape = paddle.shape(x)
        # query: n, h * w, c1
        query = self.query_conv(x)
        query = paddle.reshape(query, (0, self.mid_channels, -1))
        query = paddle.transpose(query, (0, 2, 1))
        # key: n, c1, h * w
        key = self.key_conv(x)
        key = paddle.reshape(key, (0, self.mid_channels, -1))
        # sim: n, h * w, h * w
        sim = paddle.bmm(query, key)
        sim = F.softmax(sim, axis=-1)
        value = self.value_conv(x)
        value = paddle.reshape(value, (0, self.in_channels, -1))
        sim = paddle.transpose(sim, (0, 2, 1))
        # feat: from (n, c2, h * w) -> (n, c2, h, w)
        feat = paddle.bmm(value, sim)
        feat = paddle.reshape(feat,
                              (0, self.in_channels, x_shape[2], x_shape[3]))
        out = self.gamma * feat + x
        return out
 class CAM(nn.Layer):
    """
    Channel attention module.
    Args:
        in_channels (int): The number of input channels.
    """
    def __init__(self, channels):
        super().__init__()
        self.channels = channels
        self.gamma = self.create_parameter(
            shape=[1],
            dtype='float32',
            default_initializer=nn.initializer.Constant(0))
    def forward(self, x):
        x_shape = paddle.shape(x)
        # query: n, c, h * w
        query = paddle.reshape(x, (0, self.channels, -1))
        # key: n, h * w, c
        key = paddle.reshape(x, (0, self.channels, -1))
        key = paddle.transpose(key, (0, 2, 1))
        # sim: n, c, c
        sim = paddle.bmm(query, key)
        # The danet author claims that this can avoid gradient divergence
        sim = paddle.max(sim, axis=-1, keepdim=True).tile(
            [1, 1, self.channels]) - sim
        sim = F.softmax(sim, axis=-1)
        # feat: from (n, c, h * w) to (n, c, h, w)
        value = paddle.reshape(x, (0, self.channels, -1))
        feat = paddle.bmm(sim, value)
        feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
        out = self.gamma * feat + x
        return out
--- a/paddlers/models/ppseg/models/layers/layer_libs.py
+++ b/paddlers/models/ppseg/models/layers/layer_libs.py
@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
        return x
 class ConvBNAct(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 padding='same',
                 act_type=None,
                 **kwargs):
        super().__init__()
        self._conv = nn.Conv2D(
            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
        if 'data_format' in kwargs:
            data_format = kwargs['data_format']
        else:
            data_format = 'NCHW'
        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
        self._act_type = act_type
        if act_type is not None:
            self._act = layers.Activation(act_type)
    def forward(self, x):
        x = self._conv(x)
        x = self._batch_norm(x)
        if self._act_type is not None:
            x = self._act(x)
        return x
 class ConvBN(nn.Layer):
    def __init__(self,
                 in_channels,
@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
        x = self._batch_norm(x)
        x = self._prelu(x)
        return x
 class ConvBNLeakyReLU(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 padding='same',
                 **kwargs):
        super().__init__()
        self._conv = nn.Conv2D(
            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
        if 'data_format' in kwargs:
            data_format = kwargs['data_format']
        else:
            data_format = 'NCHW'
        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
        self._relu = layers.Activation("leakyrelu")
    def forward(self, x):
        x = self._conv(x)
        x = self._batch_norm(x)
        x = self._relu(x)
        return x
--- a/paddlers/models/ppseg/models/layers/tensor_fusion.py
+++ b/paddlers/models/ppseg/models/layers/tensor_fusion.py
@ -0,0 +1,285 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.nn.initializer import Constant
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper
 class UAFM(nn.Layer):
    """
    The base of Unified Attention Fusion Module.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__()
        self.conv_x = layers.ConvBNReLU(
            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
        self.conv_out = layers.ConvBNReLU(
            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
        self.resize_mode = resize_mode
    def check(self, x, y):
        assert x.ndim == 4 and y.ndim == 4
        x_h, x_w = x.shape[2:]
        y_h, y_w = y.shape[2:]
        assert x_h >= y_h and x_w >= y_w
    def prepare(self, x, y):
        x = self.prepare_x(x, y)
        y = self.prepare_y(x, y)
        return x, y
    def prepare_x(self, x, y):
        x = self.conv_x(x)
        return x
    def prepare_y(self, x, y):
        y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
        return y_up
    def fuse(self, x, y):
        out = x + y
        out = self.conv_out(out)
        return out
    def forward(self, x, y):
        """
        Args:
            x (Tensor): The low level feature.
            y (Tensor): The high level feature.
        """
        self.check(x, y)
        x, y = self.prepare(x, y)
        out = self.fuse(x, y)
        return out
 class UAFM_ChAtten(UAFM):
    """
    The UAFM with channel attention, which uses mean and max values.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
        self.conv_xy_atten = nn.Sequential(
            layers.ConvBNAct(
                4 * y_ch,
                y_ch // 2,
                kernel_size=1,
                bias_attr=False,
                act_type="leakyrelu"),
            layers.ConvBN(
                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
    def fuse(self, x, y):
        """
        Args:
            x (Tensor): The low level feature.
            y (Tensor): The high level feature.
        """
        atten = helper.avg_max_reduce_hw([x, y], self.training)
        atten = F.sigmoid(self.conv_xy_atten(atten))
        out = x * atten + y * (1 - atten)
        out = self.conv_out(out)
        return out
 class UAFM_ChAtten_S(UAFM):
    """
    The UAFM with channel attention, which uses mean values.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
        self.conv_xy_atten = nn.Sequential(
            layers.ConvBNAct(
                2 * y_ch,
                y_ch // 2,
                kernel_size=1,
                bias_attr=False,
                act_type="leakyrelu"),
            layers.ConvBN(
                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
    def fuse(self, x, y):
        """
        Args:
            x (Tensor): The low level feature.
            y (Tensor): The high level feature.
        """
        atten = helper.avg_reduce_hw([x, y])
        atten = F.sigmoid(self.conv_xy_atten(atten))
        out = x * atten + y * (1 - atten)
        out = self.conv_out(out)
        return out
 class UAFM_SpAtten(UAFM):
    """
    The UAFM with spatial attention, which uses mean and max values.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
        self.conv_xy_atten = nn.Sequential(
            layers.ConvBNReLU(
                4, 2, kernel_size=3, padding=1, bias_attr=False),
            layers.ConvBN(
                2, 1, kernel_size=3, padding=1, bias_attr=False))
        self._scale = self.create_parameter(
            shape=[1],
            attr=ParamAttr(initializer=Constant(value=1.)),
            dtype="float32")
        self._scale.stop_gradient = True
    def fuse(self, x, y):
        """
        Args:
            x (Tensor): The low level feature.
            y (Tensor): The high level feature.
        """
        atten = helper.avg_max_reduce_channel([x, y])
        atten = F.sigmoid(self.conv_xy_atten(atten))
        out = x * atten + y * (self._scale - atten)
        out = self.conv_out(out)
        return out
 class UAFM_SpAtten_S(UAFM):
    """
    The UAFM with spatial attention, which uses mean values.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
        self.conv_xy_atten = nn.Sequential(
            layers.ConvBNReLU(
                2, 2, kernel_size=3, padding=1, bias_attr=False),
            layers.ConvBN(
                2, 1, kernel_size=3, padding=1, bias_attr=False))
    def fuse(self, x, y):
        """
        Args:
            x (Tensor): The low level feature.
            y (Tensor): The high level feature.
        """
        atten = helper.avg_reduce_channel([x, y])
        atten = F.sigmoid(self.conv_xy_atten(atten))
        out = x * atten + y * (1 - atten)
        out = self.conv_out(out)
        return out
 class UAFMMobile(UAFM):
    """
    Unified Attention Fusion Module for mobile.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
        self.conv_x = layers.SeparableConvBNReLU(
            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
        self.conv_out = layers.SeparableConvBNReLU(
            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
 class UAFMMobile_SpAtten(UAFM):
    """
    Unified Attention Fusion Module with spatial attention for mobile.
    Args:
        x_ch (int): The channel of x tensor, which is the low level feature.
        y_ch (int): The channel of y tensor, which is the high level feature.
        out_ch (int): The channel of output tensor.
        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
    """
    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
        self.conv_x = layers.SeparableConvBNReLU(
            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
        self.conv_out = layers.SeparableConvBNReLU(
            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
        self.conv_xy_atten = nn.Sequential(
            layers.ConvBNReLU(
                4, 2, kernel_size=3, padding=1, bias_attr=False),
            layers.ConvBN(
                2, 1, kernel_size=3, padding=1, bias_attr=False))
    def fuse(self, x, y):
        """
        Args:
            x (Tensor): The low level feature.
            y (Tensor): The high level feature.
        """
        atten = helper.avg_max_reduce_channel([x, y])
        atten = F.sigmoid(self.conv_xy_atten(atten))
        out = x * atten + y * (1 - atten)
        out = self.conv_out(out)
        return out
--- a/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
+++ b/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
@ -0,0 +1,133 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 def avg_reduce_hw(x):
    # Reduce hw by avg
    # Return cat([avg_pool_0, avg_pool_1, ...])
    if not isinstance(x, (list, tuple)):
        return F.adaptive_avg_pool2d(x, 1)
    elif len(x) == 1:
        return F.adaptive_avg_pool2d(x[0], 1)
    else:
        res = []
        for xi in x:
            res.append(F.adaptive_avg_pool2d(xi, 1))
        return paddle.concat(res, axis=1)
 def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
    assert not isinstance(x, (list, tuple))
    avg_pool = F.adaptive_avg_pool2d(x, 1)
    # TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
    if is_training:
        max_pool = F.adaptive_max_pool2d(x, 1)
    else:
        max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
    if use_concat:
        res = paddle.concat([avg_pool, max_pool], axis=1)
    else:
        res = [avg_pool, max_pool]
    return res
 def avg_max_reduce_hw(x, is_training):
    # Reduce hw by avg and max
    # Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
    if not isinstance(x, (list, tuple)):
        return avg_max_reduce_hw_helper(x, is_training)
    elif len(x) == 1:
        return avg_max_reduce_hw_helper(x[0], is_training)
    else:
        res_avg = []
        res_max = []
        for xi in x:
            avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
            res_avg.append(avg)
            res_max.append(max)
        res = res_avg + res_max
        return paddle.concat(res, axis=1)
 def avg_reduce_channel(x):
    # Reduce channel by avg
    # Return cat([avg_ch_0, avg_ch_1, ...])
    if not isinstance(x, (list, tuple)):
        return paddle.mean(x, axis=1, keepdim=True)
    elif len(x) == 1:
        return paddle.mean(x[0], axis=1, keepdim=True)
    else:
        res = []
        for xi in x:
            res.append(paddle.mean(xi, axis=1, keepdim=True))
        return paddle.concat(res, axis=1)
 def max_reduce_channel(x):
    # Reduce channel by max
    # Return cat([max_ch_0, max_ch_1, ...])
    if not isinstance(x, (list, tuple)):
        return paddle.max(x, axis=1, keepdim=True)
    elif len(x) == 1:
        return paddle.max(x[0], axis=1, keepdim=True)
    else:
        res = []
        for xi in x:
            res.append(paddle.max(xi, axis=1, keepdim=True))
        return paddle.concat(res, axis=1)
 def avg_max_reduce_channel_helper(x, use_concat=True):
    # Reduce hw by avg and max, only support single input
    assert not isinstance(x, (list, tuple))
    mean_value = paddle.mean(x, axis=1, keepdim=True)
    max_value = paddle.max(x, axis=1, keepdim=True)
    if use_concat:
        res = paddle.concat([mean_value, max_value], axis=1)
    else:
        res = [mean_value, max_value]
    return res
 def avg_max_reduce_channel(x):
    # Reduce hw by avg and max
    # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
    if not isinstance(x, (list, tuple)):
        return avg_max_reduce_channel_helper(x)
    elif len(x) == 1:
        return avg_max_reduce_channel_helper(x[0])
    else:
        res = []
        for xi in x:
            res.extend(avg_max_reduce_channel_helper(xi, False))
        return paddle.concat(res, axis=1)
 def cat_avg_max_reduce_channel(x):
    # Reduce hw by cat+avg+max
    assert isinstance(x, (list, tuple)) and len(x) > 1
    x = paddle.concat(x, axis=1)
    mean_value = paddle.mean(x, axis=1, keepdim=True)
    max_value = paddle.max(x, axis=1, keepdim=True)
    res = paddle.concat([mean_value, max_value], axis=1)
    return res
--- a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
                    raise ValueError(
                        "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
                        .format(self.weight))
-            elif isinstance(self.weight, paddle.VarBase):
+            elif not isinstance(self.weight, paddle.Tensor):
                raise TypeError(
                    'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
                    .format(type(self.weight)))
--- a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py
@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
            logit = paddle.transpose(logit, [0, 2, 3, 1])
        label = label.astype('int64')
        # In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
        # When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
        loss = F.cross_entropy(
            logit,
            label,
@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
            loss = loss * semantic_weights
        if self.weight is not None:
-            _one_hot = F.one_hot(label, logit.shape[-1])
+            _one_hot = F.one_hot(label * mask, logit.shape[-1])
            coef = paddle.sum(_one_hot * self.weight, axis=-1)
        else:
            coef = paddle.ones_like(label)
--- a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
+++ b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
@ -16,7 +16,7 @@ import numpy as np
 import paddle
 from paddle import nn
 import paddle.nn.functional as F
-from scipy.ndimage.interpolation import shift
+from scipy.ndimage import shift
 from paddlers.models.ppseg.cvlibs import manager
--- a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
+++ b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/losses/dice_loss.py
+++ b/paddlers/models/ppseg/models/losses/dice_loss.py
@ -19,38 +19,59 @@ from paddlers.models.ppseg.cvlibs import manager
@manager.LOSSES.add_component
 class DiceLoss(nn.Layer):
    """
-    Implements the dice loss function.
+    The implements of the dice loss.
    Args:
-        ignore_index (int64): Specifies a target value that is ignored
+        weight (list[float], optional): The weight for each class. Default: None.
-            and does not contribute to the input gradient. Default ``255``.
+        ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
-        smooth (float32): laplace smoothing,
+            is ignored and does not contribute to the input gradient. Default ``255``.
-            to smooth dice loss and accelerate convergence. following:
+        smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
-            https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
+            Default: 1.0
    """
-    def __init__(self, ignore_index=255, smooth=0.):
+    def __init__(self, weight=None, ignore_index=255, smooth=1.0):
-        super(DiceLoss, self).__init__()
+        super().__init__()
        self.weight = weight
        self.ignore_index = ignore_index
        self.eps = 1e-5
        self.smooth = smooth
        self.eps = 1e-8
    def forward(self, logits, labels):
-        labels = paddle.cast(labels, dtype='int32')
+        num_class = logits.shape[1]
-        labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
+        if self.weight is not None:
-        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
+            assert num_class == len(self.weight), \
-        labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
+                "The lenght of weight should be euqal to the num class"
        mask = labels != self.ignore_index
        mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')
        labels[labels == self.ignore_index] = 0
        labels_one_hot = F.one_hot(labels, num_class)
        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
        logits = F.softmax(logits, axis=1)
-        mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
+        dice_loss = 0.0
-        logits = logits * mask
+        for i in range(num_class):
-        labels_one_hot = labels_one_hot * mask
+            dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
                                           mask, self.smooth, self.eps)
            if self.weight is not None:
                dice_loss_i *= self.weight[i]
            dice_loss += dice_loss_i
        dice_loss = dice_loss / num_class
        return dice_loss
        dims = (0, ) + tuple(range(2, labels.ndimension() + 1))
-        intersection = paddle.sum(logits * labels_one_hot, dims)
+def dice_loss_helper(logit, label, mask, smooth, eps):
-        cardinality = paddle.sum(logits + labels_one_hot, dims)
+    assert logit.shape == label.shape, \
-        dice_loss = ((2. * intersection + self.smooth) /
+        "The shape of logit and label should be the same"
-                     (cardinality + self.eps + self.smooth)).mean()
+    logit = paddle.reshape(logit, [0, -1])
-        return 1 - dice_loss
+    label = paddle.reshape(label, [0, -1])
    mask = paddle.reshape(mask, [0, -1])
    logit *= mask
    label *= mask
    intersection = paddle.sum(logit * label, axis=1)
    cardinality = paddle.sum(logit + label, axis=1)
    dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
    dice_loss = dice_loss.mean()
    return dice_loss
--- a/paddlers/models/ppseg/models/losses/focal_loss.py
+++ b/paddlers/models/ppseg/models/losses/focal_loss.py
@ -23,38 +23,110 @@ from paddlers.models.ppseg.cvlibs import manager
@manager.LOSSES.add_component
 class FocalLoss(nn.Layer):
    """
-    Focal Loss.
+    The implement of focal loss.
-    Code referenced from:
+    The focal loss requires the label is 0 or 1 for now.
    https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
    Args:
-        gamma (float): the coefficient of Focal Loss.
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
-        ignore_index (int64): Specifies a target value that is ignored
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
        ignore_index (int64, optional): Specifies a target value that is ignored
            and does not contribute to the input gradient. Default ``255``.
    """
-    def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
+    def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
-        super(FocalLoss, self).__init__()
+        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
-        self.edge_label = edge_label
+        self.EPS = 1e-10
    def forward(self, logit, label):
-        logit = paddle.reshape(
+        """
-            logit, [logit.shape[0], logit.shape[1], -1])  # N,C,H,W => N,C,H*W
+        Forward computation.
-        logit = paddle.transpose(logit, [0, 2, 1])  # N,C,H*W => N,H*W,C
+
-        logit = paddle.reshape(logit,
+        Args:
-                               [-1, logit.shape[2]])  # N,H*W,C => N*H*W,C
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
-        label = paddle.reshape(label, [-1, 1])
+                (N, C, H, W), where C is number of classes.
-        range_ = paddle.arange(0, label.shape[0])
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
-        range_ = paddle.unsqueeze(range_, axis=-1)
+                where each value is 0 <= label[i] <= C-1.
-        label = paddle.cast(label, dtype='int64')
+        Returns:
-        label = paddle.concat([range_, label], axis=-1)
+            (Tensor): The average loss.
-        logpt = F.log_softmax(logit)
+        """
-        logpt = paddle.gather_nd(logpt, label)
+        assert logit.ndim == 4, "The ndim of logit should be 4."
-
+        assert logit.shape[1] == 2, "The channel of logit should be 2."
-        pt = paddle.exp(logpt.detach())
+        assert label.ndim == 3, "The ndim of label should be 3."
-        loss = -1 * (1 - pt)**self.gamma * logpt
+
-        loss = paddle.mean(loss)
+        class_num = logit.shape[1]  # class num is 2
-        return loss
+        logit = paddle.transpose(logit, [0, 2, 3, 1])  # N,C,H,W => N,H,W,C
        mask = label != self.ignore_index  # N,H,W
        mask = paddle.unsqueeze(mask, 3)
        mask = paddle.cast(mask, 'float32')
        mask.stop_gradient = True
        label = F.one_hot(label, class_num)  # N,H,W,C
        label = paddle.cast(label, logit.dtype)
        label.stop_gradient = True
        loss = F.sigmoid_focal_loss(
            logit=logit,
            label=label,
            alpha=self.alpha,
            gamma=self.gamma,
            reduction='none')
        loss = loss * mask
        avg_loss = paddle.sum(loss) / (
            paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
        return avg_loss
@manager.LOSSES.add_component
 class MultiClassFocalLoss(nn.Layer):
    """
    The implement of focal loss for multi class.
    Args:
        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
            of class 1, 1-alpha is the weight of class 0. Default: 0.25
        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
        ignore_index (int64, optional): Specifies a target value that is ignored
            and does not contribute to the input gradient. Default ``255``.
    """
    def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
        super().__init__()
        self.num_class = num_class
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
        self.EPS = 1e-10
    def forward(self, logit, label):
        """
        Forward computation.
        Args:
            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
                (N, C, H, W), where C is number of classes.
            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
                where each value is 0 <= label[i] <= C-1.
        Returns:
            (Tensor): The average loss.
        """
        assert logit.ndim == 4, "The ndim of logit should be 4."
        assert label.ndim == 3, "The ndim of label should be 3."
        logit = paddle.transpose(logit, [0, 2, 3, 1])
        label = label.astype('int64')
        ce_loss = F.cross_entropy(
            logit, label, ignore_index=self.ignore_index, reduction='none')
        pt = paddle.exp(-ce_loss)
        focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
        mask = paddle.cast(label != self.ignore_index, 'float32')
        focal_loss *= mask
        avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
        return avg_loss
--- a/paddlers/models/ppseg/models/losses/l1_loss.py
+++ b/paddlers/models/ppseg/models/losses/l1_loss.py
@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):
    def __init__(self, reduction='mean', ignore_index=255):
        super().__init__(reduction=reduction)
        self.ignore_index = ignore_index
        self.EPS = 1e-10
    def forward(self, input, label):
        mask = label != self.ignore_index
        mask = paddle.cast(mask, "float32")
        label.stop_gradient = True
        mask.stop_gradient = True
        output = paddle.nn.functional.l1_loss(
            input, label, "none", name=self.name) * mask
        if self.reduction == "mean":
            return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
        elif self.reduction == "none":
            return output
        elif self.reduction == "sum":
            return paddle.sum(output)
        else:
            raise ValueError(
                "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
                "received %s, which is not allowed." % self.reduction)
--- a/paddlers/models/ppseg/models/losses/lovasz_loss.py
+++ b/paddlers/models/ppseg/models/losses/lovasz_loss.py
@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
    signs = 2. * labels - 1.
    signs.stop_gradient = True
    errors = 1. - logits * signs
-    errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
+    if hasattr(paddle, "_legacy_C_ops"):
-                                                        'descending', True)
+        errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
                                                           'descending', True)
    else:
        errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
                                                    'descending', True)
    errors_sorted.stop_gradient = False
    gt_sorted = paddle.gather(labels, perm)
    grad = lovasz_grad(gt_sorted)
@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
        else:
            class_pred = probas[:, c]
        errors = paddle.abs(fg - class_pred)
-        errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
+        if hasattr(paddle, "_legacy_C_ops"):
-                                                            'descending', True)
+            errors_sorted, perm = paddle._legacy_C_ops.argsort(
                errors, 'axis', 0, 'descending', True)
        else:
            errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
                                                        'descending', True)
        errors_sorted.stop_gradient = False
        fg_sorted = paddle.gather(fg, perm)
--- a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):
        # get the label after ohem
        n, c, h, w = logit.shape
-        label = label.reshape((-1, ))
+        label = label.reshape((-1, )).astype('int64')
        valid_mask = (label != self.ignore_index).astype('int64')
        num_valid = valid_mask.sum()
        label = label * valid_mask
--- a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
                elif num_hard >= n_view / 2:
                    num_easy_keep = num_easy
                    num_hard_keep = n_view - num_easy_keep
-                else:
+                elif num_easy >= n_view / 2:
                    num_hard_keep = num_hard
                    num_easy_keep = n_view - num_hard_keep
                else:
                    num_hard_keep = num_hard
                    num_easy_keep = num_easy
                indices = None
                if num_hard > 0:
--- a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
+++ b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
                label_num_conn, label_conn = cv2.connectedComponents(
                    labels_np_class.astype(np.uint8))
                origin_pred_num_conn = pred_num_conn
                if pred_num_conn > 2 * label_num_conn:
                    pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
                real_pred_num = pred_num_conn - 1
@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
                # Connected Components Matching and SC Loss Calculation
                if real_label_num > 0 and real_pred_num > 0:
                    img_connectivity = compute_class_connectiveity(
-                        pred_conn, label_conn, pred_num_conn, label_num_conn,
+                        pred_conn, label_conn, pred_num_conn,
-                        pred_i, real_label_num, real_pred_num, zero)
+                        origin_pred_num_conn, label_num_conn, pred_i,
                        real_label_num, real_pred_num, zero)
                    sc_loss += 1 - img_connectivity
                elif real_label_num == 0 and real_pred_num == 0:
                    # if no connected component, SC Loss = 0, so pass
@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):
 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
-                                label_num_conn, pred, real_label_num,
+                                origin_pred_num_conn, label_num_conn, pred,
-                                real_pred_num, zero):
+                                real_label_num, real_pred_num, zero):
    pred_conn = paddle.to_tensor(pred_conn)
    label_conn = paddle.to_tensor(label_conn)
-    pred_conn = F.one_hot(pred_conn, pred_num_conn)
+    pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
    label_conn = F.one_hot(label_conn, label_num_conn)
    ious = paddle.zeros((real_label_num, real_pred_num))
--- a/paddlers/models/ppseg/models/lraspp.py
+++ b/paddlers/models/ppseg/models/lraspp.py
@ -0,0 +1,162 @@
 # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import partial
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg import utils
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.cvlibs import manager
@manager.MODELS.add_component
 class LRASPP(nn.Layer):
    """
    Semantic segmentation model with a light R-ASPP head.
    The original article refers to
        Howard, Andrew, et al. "Searching for mobilenetv3."
        (https://arxiv.org/pdf/1909.11065.pdf)
    Args:
        num_classes (int): The number of target classes.
        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
            has feat_channels, of which the length is 5.
        backbone_indices (List(int), optional): The values indicate the indices of backbone output 
            used as the input of the LR-ASPP head.
            Default: [0, 1, 3].
        lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
            Default: [32, 64].
        lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
            Default: 128
        resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
            Default: bilinear.
        use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
            a 49x49 kernel for average pooling.
            Default: True.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices=[0, 1, 3],
                 lraspp_head_inter_chs=[32, 64],
                 lraspp_head_out_ch=128,
                 resize_mode='bilinear',
                 use_gap=True,
                 pretrained=None):
        super().__init__()
        # backbone
        assert hasattr(backbone, 'feat_channels'), \
            "The backbone should has feat_channels."
        assert len(backbone.feat_channels) >= len(backbone_indices), \
            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
        assert len(backbone.feat_channels) > max(backbone_indices), \
            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
        self.backbone = backbone
        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
            "should not be lesser than 1"
        # head
        assert len(backbone_indices) == len(
            lraspp_head_inter_chs
        ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
        self.backbone_indices = backbone_indices
        self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
                                      lraspp_head_inter_chs, lraspp_head_out_ch,
                                      num_classes, resize_mode, use_gap)
        # pretrained
        self.pretrained = pretrained
        self.init_weight()
    def forward(self, x):
        x_hw = paddle.shape(x)[2:]
        feats_backbone = self.backbone(x)
        assert len(feats_backbone) >= len(self.backbone_indices), \
            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
        y = self.lraspp_head(feats_backbone)
        y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
        logit_list = [y]
        return logit_list
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
 class LRASPPHead(nn.Layer):
    def __init__(self,
                 indices,
                 in_chs,
                 mid_chs,
                 out_ch,
                 n_classes,
                 resize_mode,
                 use_gap,
                 align_corners=False):
        super().__init__()
        self.indices = indices[-2::-1]
        self.in_chs = [in_chs[i] for i in indices[::-1]]
        self.mid_chs = mid_chs[::-1]
        self.convs = nn.LayerList()
        self.conv_ups = nn.LayerList()
        for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
            self.convs.append(
                nn.Conv2D(
                    in_ch, mid_ch, kernel_size=1, bias_attr=False))
            self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
        self.conv_w = nn.Sequential(
            nn.AvgPool2D(
                kernel_size=(49, 49), stride=(16, 20))
            if not use_gap else nn.AdaptiveAvgPool2D(1),
            nn.Conv2D(
                self.in_chs[0], out_ch, 1, bias_attr=False),
            nn.Sigmoid())
        self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
        self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
        self.conv_out = nn.Conv2D(
            out_ch, n_classes, kernel_size=1, bias_attr=False)
        self.interp = partial(
            F.interpolate, mode=resize_mode, align_corners=align_corners)
    def forward(self, in_feat_list):
        x = in_feat_list[-1]
        x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
        y = self.conv_t(x)
        for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
            feat = in_feat_list[idx]
            y = self.interp(y, paddle.shape(feat)[2:])
            y = paddle.concat([y, conv(feat)], axis=1)
            y = conv_up(y)
        y = self.conv_out(y)
        return y
--- a/paddlers/models/ppseg/models/mla_transformer.py
+++ b/paddlers/models/ppseg/models/mla_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/mobileseg.py
+++ b/paddlers/models/ppseg/models/mobileseg.py
@ -0,0 +1,289 @@
 # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg import utils
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.cvlibs import manager
@manager.MODELS.add_component
 class MobileSeg(nn.Layer):
    """
    The semantic segmentation models for mobile devices.
    Args:
        num_classes (int): The number of target classes.
        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
            has feat_channels, of which the length is 5.
        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
            Default: [2, 3, 4].
        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
            Default: [64, 64, 64].
        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
            Default: bilinear.
        use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices=[1, 2, 3],
                 cm_bin_sizes=[1, 2],
                 cm_out_ch=64,
                 arm_type='UAFMMobile',
                 arm_out_chs=[32, 48, 64],
                 seg_head_inter_chs=[32, 32, 32],
                 resize_mode='bilinear',
                 use_last_fuse=False,
                 pretrained=None):
        super().__init__()
        # backbone
        assert hasattr(backbone, 'feat_channels'), \
            "The backbone should has feat_channels."
        assert len(backbone.feat_channels) >= len(backbone_indices), \
            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
        assert len(backbone.feat_channels) > max(backbone_indices), \
            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
        self.backbone = backbone
        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
            "should not be lesser than 1"
        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
        # head
        if len(arm_out_chs) == 1:
            arm_out_chs = arm_out_chs * len(backbone_indices)
        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
            "arm_out_chs and backbone_indices should be equal"
        self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
                                        cm_bin_sizes, cm_out_ch, arm_type,
                                        resize_mode, use_last_fuse)
        if len(seg_head_inter_chs) == 1:
            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
            "seg_head_inter_chs and backbone_indices should be equal"
        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
        # pretrained
        self.pretrained = pretrained
        self.init_weight()
    def forward(self, x):
        x_hw = paddle.shape(x)[2:]
        feats_backbone = self.backbone(x)  # [x4, x8, x16, x32]
        assert len(feats_backbone) >= len(self.backbone_indices), \
            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
        if self.training:
            logit_list = []
            for x, seg_head in zip(feats_head, self.seg_heads):
                x = seg_head(x)
                logit_list.append(x)
            logit_list = [
                F.interpolate(
                    x, x_hw, mode='bilinear', align_corners=False)
                for x in logit_list
            ]
        else:
            x = self.seg_heads[0](feats_head[0])
            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
            logit_list = [x]
        return logit_list
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
 class MobileSegHead(nn.Layer):
    """
    The head of MobileSeg.
    Args:
        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
        arm_out_chs (List(int)): The out channels of each arm module.
        cm_bin_sizes (List(int)): The bin size of context module.
        cm_out_ch (int): The output channel of the last context module.
        arm_type (str): The type of attention refinement module.
        resize_mode (str): The resize mode for the upsampling operation in decoder.
    """
    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
                 arm_type, resize_mode, use_last_fuse):
        super().__init__()
        self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
                                      cm_out_ch, cm_bin_sizes)
        assert hasattr(layers,arm_type), \
            "Not support arm_type ({})".format(arm_type)
        arm_class = eval("layers." + arm_type)
        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
        for i in range(len(backbone_out_chs)):
            low_chs = backbone_out_chs[i]
            high_ch = cm_out_ch if i == len(
                backbone_out_chs) - 1 else arm_out_chs[i + 1]
            out_ch = arm_out_chs[i]
            arm = arm_class(
                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
            self.arm_list.append(arm)
        self.use_last_fuse = use_last_fuse
        if self.use_last_fuse:
            self.fuse_convs = nn.LayerList()
            for i in range(1, len(arm_out_chs)):
                conv = layers.SeparableConvBNReLU(
                    arm_out_chs[i],
                    arm_out_chs[0],
                    kernel_size=3,
                    bias_attr=False)
                self.fuse_convs.append(conv)
            self.last_conv = layers.SeparableConvBNReLU(
                len(arm_out_chs) * arm_out_chs[0],
                arm_out_chs[0],
                kernel_size=3,
                bias_attr=False)
    def forward(self, in_feat_list):
        """
        Args:
            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
                x2, x4 and x8 are optional.
        Returns:
            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
                x2, x4 and x8 are optional.
                The length of in_feat_list and out_feat_list are the same.
        """
        high_feat = self.cm(in_feat_list[-1])
        out_feat_list = []
        for i in reversed(range(len(in_feat_list))):
            low_feat = in_feat_list[i]
            arm = self.arm_list[i]
            high_feat = arm(low_feat, high_feat)
            out_feat_list.insert(0, high_feat)
        if self.use_last_fuse:
            x_list = [out_feat_list[0]]
            size = paddle.shape(out_feat_list[0])[2:]
            for i, (x, conv
                    ) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
                x = conv(x)
                x = F.interpolate(
                    x, size=size, mode='bilinear', align_corners=False)
                x_list.append(x)
            x = paddle.concat(x_list, axis=1)
            x = self.last_conv(x)
            out_feat_list[0] = x
        return out_feat_list
 class MobileContextModule(nn.Layer):
    """
    Context Module for Mobile Model.
    Args:
        in_channels (int): The number of input channels to pyramid pooling module.
        inter_channels (int): The number of inter channels to pyramid pooling module.
        out_channels (int): The number of output channels after pyramid pooling module.
        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
        align_corners (bool): An argument of F.interpolate. It should be set to False
            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
    """
    def __init__(self,
                 in_channels,
                 inter_channels,
                 out_channels,
                 bin_sizes,
                 align_corners=False):
        super().__init__()
        self.stages = nn.LayerList([
            self._make_stage(in_channels, inter_channels, size)
            for size in bin_sizes
        ])
        self.conv_out = layers.SeparableConvBNReLU(
            in_channels=inter_channels,
            out_channels=out_channels,
            kernel_size=3,
            bias_attr=False)
        self.align_corners = align_corners
    def _make_stage(self, in_channels, out_channels, size):
        prior = nn.AdaptiveAvgPool2D(output_size=size)
        conv = layers.ConvBNReLU(
            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
        return nn.Sequential(prior, conv)
    def forward(self, input):
        out = None
        input_shape = paddle.shape(input)[2:]
        for stage in self.stages:
            x = stage(input)
            x = F.interpolate(
                x,
                input_shape,
                mode='bilinear',
                align_corners=self.align_corners)
            if out is None:
                out = x
            else:
                out += x
        out = self.conv_out(out)
        return out
 class SegHead(nn.Layer):
    def __init__(self, in_chan, mid_chan, n_classes):
        super().__init__()
        self.conv = layers.SeparableConvBNReLU(
            in_chan, mid_chan, kernel_size=3, bias_attr=False)
        self.conv_out = nn.Conv2D(
            mid_chan, n_classes, kernel_size=1, bias_attr=False)
    def forward(self, x):
        x = self.conv(x)
        x = self.conv_out(x)
        return x
--- a/paddlers/models/ppseg/models/pointrend.py
+++ b/paddlers/models/ppseg/models/pointrend.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/portraitnet.py
+++ b/paddlers/models/ppseg/models/portraitnet.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -50,19 +50,9 @@ class PortraitNet(nn.Layer):
        self.init_weight()
    def forward(self, x):
-        img = x[:, :3, :, :]
+        feat_list = self.backbone(x)
        img_ori = x[:, 3:, :, :]
        feat_list = self.backbone(img)
        logits_list = self.head(feat_list)
-
+        return [logits_list]
        feat_list = self.backbone(img_ori)
        logits_ori_list = self.head(feat_list)
        return [
            logits_list[0], logits_ori_list[0], logits_list[1],
            logits_ori_list[1]
        ]
    def init_weight(self):
        if self.pretrained is not None:
--- a/paddlers/models/ppseg/models/pp_liteseg.py
+++ b/paddlers/models/ppseg/models/pp_liteseg.py
@ -0,0 +1,273 @@
 # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg import utils
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import utils
@manager.MODELS.add_component
 class PPLiteSeg(nn.Layer):
    """
    The PP_LiteSeg implementation based on PaddlePaddle.
    The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu,
    Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai,
    Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic
    Segmentation Model. https://arxiv.org/abs/2204.02681".
    Args:
        num_classes (int): The number of target classes.
        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
            has feat_channels, of which the length is 5.
        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
            Default: [2, 3, 4].
        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
            Default: [64, 64, 64].
        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
            Default: bilinear.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices=[2, 3, 4],
                 arm_type='UAFM_SpAtten',
                 cm_bin_sizes=[1, 2, 4],
                 cm_out_ch=128,
                 arm_out_chs=[64, 96, 128],
                 seg_head_inter_chs=[64, 64, 64],
                 resize_mode='bilinear',
                 pretrained=None):
        super().__init__()
        # backbone
        assert hasattr(backbone, 'feat_channels'), \
            "The backbone should has feat_channels."
        assert len(backbone.feat_channels) >= len(backbone_indices), \
            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
        assert len(backbone.feat_channels) > max(backbone_indices), \
            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
        self.backbone = backbone
        assert len(backbone_indices) > 1, "The lenght of backbone_indices " \
            "should be greater than 1"
        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
        # head
        if len(arm_out_chs) == 1:
            arm_out_chs = arm_out_chs * len(backbone_indices)
        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
            "arm_out_chs and backbone_indices should be equal"
        self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs,
                                        cm_bin_sizes, cm_out_ch, arm_type,
                                        resize_mode)
        if len(seg_head_inter_chs) == 1:
            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
            "seg_head_inter_chs and backbone_indices should be equal"
        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
        # pretrained
        self.pretrained = pretrained
        self.init_weight()
    def forward(self, x):
        x_hw = paddle.shape(x)[2:]
        feats_backbone = self.backbone(x)  # [x2, x4, x8, x16, x32]
        assert len(feats_backbone) >= len(self.backbone_indices), \
            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
        if self.training:
            logit_list = []
            for x, seg_head in zip(feats_head, self.seg_heads):
                x = seg_head(x)
                logit_list.append(x)
            logit_list = [
                F.interpolate(
                    x, x_hw, mode='bilinear', align_corners=False)
                for x in logit_list
            ]
        else:
            x = self.seg_heads[0](feats_head[0])
            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
            logit_list = [x]
        return logit_list
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
 class PPLiteSegHead(nn.Layer):
    """
    The head of PPLiteSeg.
    Args:
        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
        arm_out_chs (List(int)): The out channels of each arm module.
        cm_bin_sizes (List(int)): The bin size of context module.
        cm_out_ch (int): The output channel of the last context module.
        arm_type (str): The type of attention refinement module.
        resize_mode (str): The resize mode for the upsampling operation in decoder.
    """
    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
                 arm_type, resize_mode):
        super().__init__()
        self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch,
                                  cm_bin_sizes)
        assert hasattr(layers,arm_type), \
            "Not support arm_type ({})".format(arm_type)
        arm_class = eval("layers." + arm_type)
        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
        for i in range(len(backbone_out_chs)):
            low_chs = backbone_out_chs[i]
            high_ch = cm_out_ch if i == len(
                backbone_out_chs) - 1 else arm_out_chs[i + 1]
            out_ch = arm_out_chs[i]
            arm = arm_class(
                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
            self.arm_list.append(arm)
    def forward(self, in_feat_list):
        """
        Args:
            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
                x2, x4 and x8 are optional.
        Returns:
            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
                x2, x4 and x8 are optional.
                The length of in_feat_list and out_feat_list are the same.
        """
        high_feat = self.cm(in_feat_list[-1])
        out_feat_list = []
        for i in reversed(range(len(in_feat_list))):
            low_feat = in_feat_list[i]
            arm = self.arm_list[i]
            high_feat = arm(low_feat, high_feat)
            out_feat_list.insert(0, high_feat)
        return out_feat_list
 class PPContextModule(nn.Layer):
    """
    Simple Context module.
    Args:
        in_channels (int): The number of input channels to pyramid pooling module.
        inter_channels (int): The number of inter channels to pyramid pooling module.
        out_channels (int): The number of output channels after pyramid pooling module.
        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
        align_corners (bool): An argument of F.interpolate. It should be set to False
            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
    """
    def __init__(self,
                 in_channels,
                 inter_channels,
                 out_channels,
                 bin_sizes,
                 align_corners=False):
        super().__init__()
        self.stages = nn.LayerList([
            self._make_stage(in_channels, inter_channels, size)
            for size in bin_sizes
        ])
        self.conv_out = layers.ConvBNReLU(
            in_channels=inter_channels,
            out_channels=out_channels,
            kernel_size=3,
            padding=1)
        self.align_corners = align_corners
    def _make_stage(self, in_channels, out_channels, size):
        prior = nn.AdaptiveAvgPool2D(output_size=size)
        conv = layers.ConvBNReLU(
            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
        return nn.Sequential(prior, conv)
    def forward(self, input):
        out = None
        input_shape = paddle.shape(input)[2:]
        for stage in self.stages:
            x = stage(input)
            x = F.interpolate(
                x,
                input_shape,
                mode='bilinear',
                align_corners=self.align_corners)
            if out is None:
                out = x
            else:
                out += x
        out = self.conv_out(out)
        return out
 class SegHead(nn.Layer):
    def __init__(self, in_chan, mid_chan, n_classes):
        super().__init__()
        self.conv = layers.ConvBNReLU(
            in_chan,
            mid_chan,
            kernel_size=3,
            stride=1,
            padding=1,
            bias_attr=False)
        self.conv_out = nn.Conv2D(
            mid_chan, n_classes, kernel_size=1, bias_attr=False)
    def forward(self, x):
        x = self.conv(x)
        x = self.conv_out(x)
        return x
--- a/paddlers/models/ppseg/models/pphumanseg_lite.py
+++ b/paddlers/models/ppseg/models/pphumanseg_lite.py
@ -27,13 +27,17 @@ __all__ = ['PPHumanSegLite']
 class PPHumanSegLite(nn.Layer):
    "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals."
-    def __init__(self, num_classes, pretrained=None, align_corners=False):
+    def __init__(self,
                 num_classes,
                 in_channels=3,
                 pretrained=None,
                 align_corners=False):
        super().__init__()
        self.pretrained = pretrained
        self.num_classes = num_classes
        self.align_corners = align_corners
-        self.conv_bn0 = _ConvBNReLU(3, 36, 3, 2, 1)
+        self.conv_bn0 = _ConvBNReLU(in_channels, 36, 3, 2, 1)
        self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0)
        self.block1 = nn.Sequential(
--- a/paddlers/models/ppseg/models/segformer.py
+++ b/paddlers/models/ppseg/models/segformer.py
@ -127,51 +127,3 @@ class SegFormer(nn.Layer):
                mode='bilinear',
                align_corners=self.align_corners)
        ]
@manager.MODELS.add_component
 def SegFormer_B0(**kwargs):
    return SegFormer(
        backbone=manager.BACKBONES['MixVisionTransformer_B0'](),
        embedding_dim=256,
        **kwargs)
@manager.MODELS.add_component
 def SegFormer_B1(**kwargs):
    return SegFormer(
        backbone=manager.BACKBONES['MixVisionTransformer_B1'](),
        embedding_dim=256,
        **kwargs)
@manager.MODELS.add_component
 def SegFormer_B2(**kwargs):
    return SegFormer(
        backbone=manager.BACKBONES['MixVisionTransformer_B2'](),
        embedding_dim=768,
        **kwargs)
@manager.MODELS.add_component
 def SegFormer_B3(**kwargs):
    return SegFormer(
        backbone=manager.BACKBONES['MixVisionTransformer_B3'](),
        embedding_dim=768,
        **kwargs)
@manager.MODELS.add_component
 def SegFormer_B4(**kwargs):
    return SegFormer(
        backbone=manager.BACKBONES['MixVisionTransformer_B4'](),
        embedding_dim=768,
        **kwargs)
@manager.MODELS.add_component
 def SegFormer_B5(**kwargs):
    return SegFormer(
        backbone=manager.BACKBONES['MixVisionTransformer_B5'](),
        embedding_dim=768,
        **kwargs)
--- a/paddlers/models/ppseg/models/segnet.py
+++ b/paddlers/models/ppseg/models/segnet.py
@ -32,14 +32,14 @@ class SegNet(nn.Layer):
        num_classes (int): The unique number of target classes.
    """
-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super().__init__()
        # Encoder Module
        self.enco1 = nn.Sequential(
            layers.ConvBNReLU(
-                3, 64, 3, padding=1),
+                in_channels, 64, 3, padding=1),
            layers.ConvBNReLU(
                64, 64, 3, padding=1))
--- a/paddlers/models/ppseg/models/sinet.py
+++ b/paddlers/models/ppseg/models/sinet.py
@ -0,0 +1,449 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import utils
 CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]],
       [[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]],
       [[3, 2], [3, 4]], [[3, 1], [5, 2]]]
@manager.MODELS.add_component
 class SINet(nn.Layer):
    """
    The SINet implementation based on PaddlePaddle.
    The original article refers to
    Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak
    "SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules
    and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099).
    Args:
        num_classes (int): The unique number of target classes.
        config (List, optional): The config for SINet. Defualt use the CFG.
        stage2_blocks (int, optional): The num of blocks in stage2. Default: 2.
        stage3_blocks (int, optional): The num of blocks in stage3. Default: 8.
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes=2,
                 config=CFG,
                 stage2_blocks=2,
                 stage3_blocks=8,
                 in_channels=3,
                 pretrained=None):
        super().__init__()
        dim1 = 16
        dim2 = 48
        dim3 = 96
        self.encoder = SINetEncoder(config, in_channels, num_classes,
                                    stage2_blocks, stage3_blocks)
        self.up = nn.UpsamplingBilinear2D(scale_factor=2)
        self.bn_3 = nn.BatchNorm(num_classes)
        self.level2_C = CBR(dim2, num_classes, 1, 1)
        self.bn_2 = nn.BatchNorm(num_classes)
        self.classifier = nn.Sequential(
            nn.UpsamplingBilinear2D(scale_factor=2),
            nn.Conv2D(
                num_classes, num_classes, 3, 1, 1, bias_attr=False))
        self.pretrained = pretrained
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def forward(self, input):
        output1 = self.encoder.level1(input)  # x2
        output2_0 = self.encoder.level2_0(output1)  # x4
        for i, layer in enumerate(self.encoder.level2):
            if i == 0:
                output2 = layer(output2_0)
            else:
                output2 = layer(output2)
        output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1))
        output3_0 = self.encoder.level3_0(output2_cat)  # x8
        for i, layer in enumerate(self.encoder.level3):
            if i == 0:
                output3 = layer(output3_0)
            else:
                output3 = layer(output3)
        output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1))
        enc_final = self.encoder.classifier(output3_cat)  # x8
        dec_stage1 = self.bn_3(self.up(enc_final))  # x4
        stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1)
        stage1_gate = (1 - stage1_confidence).unsqueeze(1)
        dec_stage2_0 = self.level2_C(output2)  # x4
        dec_stage2 = self.bn_2(
            self.up(dec_stage2_0 * stage1_gate + dec_stage1))  # x2
        out = self.classifier(dec_stage2)  # x
        return [out]
 def channel_shuffle(x, groups):
    x_shape = paddle.shape(x)
    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
    num_channels = x.shape[1]
    channels_per_group = num_channels // groups
    # reshape
    x = paddle.reshape(
        x=x, shape=[batch_size, groups, channels_per_group, height, width])
    # transpose
    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
    # flatten
    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
    return x
 class CBR(nn.Layer):
    '''
    This class defines the convolution layer with batch normalization and PReLU activation
    '''
    def __init__(self, nIn, nOut, kSize, stride=1):
        super().__init__()
        padding = int((kSize - 1) / 2)
        self.conv = nn.Conv2D(
            nIn,
            nOut, (kSize, kSize),
            stride=stride,
            padding=(padding, padding),
            bias_attr=False)
        self.bn = nn.BatchNorm(nOut)
        self.act = nn.PReLU(nOut)
    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.act(output)
        return output
 class SeparableCBR(nn.Layer):
    '''
    This class defines the convolution layer with batch normalization and PReLU activation
    '''
    def __init__(self, nIn, nOut, kSize, stride=1):
        super().__init__()
        padding = int((kSize - 1) / 2)
        self.conv = nn.Sequential(
            nn.Conv2D(
                nIn,
                nIn, (kSize, kSize),
                stride=stride,
                padding=(padding, padding),
                groups=nIn,
                bias_attr=False),
            nn.Conv2D(
                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
        self.bn = nn.BatchNorm(nOut)
        self.act = nn.PReLU(nOut)
    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.act(output)
        return output
 class SqueezeBlock(nn.Layer):
    def __init__(self, exp_size, divide=4.0):
        super(SqueezeBlock, self).__init__()
        if divide > 1:
            self.dense = nn.Sequential(
                nn.Linear(exp_size, int(exp_size / divide)),
                nn.PReLU(int(exp_size / divide)),
                nn.Linear(int(exp_size / divide), exp_size),
                nn.PReLU(exp_size), )
        else:
            self.dense = nn.Sequential(
                nn.Linear(exp_size, exp_size), nn.PReLU(exp_size))
    def forward(self, x):
        alpha = F.adaptive_avg_pool2d(x, [1, 1])
        alpha = paddle.squeeze(alpha, axis=[2, 3])
        alpha = self.dense(alpha)
        alpha = paddle.unsqueeze(alpha, axis=[2, 3])
        out = x * alpha
        return out
 class SESeparableCBR(nn.Layer):
    '''
    This class defines the convolution layer with batch normalization and PReLU activation
    '''
    def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0):
        super().__init__()
        padding = int((kSize - 1) / 2)
        self.conv = nn.Sequential(
            nn.Conv2D(
                nIn,
                nIn, (kSize, kSize),
                stride=stride,
                padding=(padding, padding),
                groups=nIn,
                bias_attr=False),
            SqueezeBlock(
                nIn, divide=divide),
            nn.Conv2D(
                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
        self.bn = nn.BatchNorm(nOut)
        self.act = nn.PReLU(nOut)
    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.act(output)
        return output
 class BR(nn.Layer):
    '''
    This class groups the batch normalization and PReLU activation
    '''
    def __init__(self, nOut):
        super().__init__()
        self.bn = nn.BatchNorm(nOut)
        self.act = nn.PReLU(nOut)
    def forward(self, input):
        output = self.bn(input)
        output = self.act(output)
        return output
 class CB(nn.Layer):
    '''
    This class groups the convolution and batch normalization
    '''
    def __init__(self, nIn, nOut, kSize, stride=1):
        super().__init__()
        padding = int((kSize - 1) / 2)
        self.conv = nn.Conv2D(
            nIn,
            nOut, (kSize, kSize),
            stride=stride,
            padding=(padding, padding),
            bias_attr=False)
        self.bn = nn.BatchNorm(nOut)
    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        return output
 class C(nn.Layer):
    '''
    This class is for a convolutional layer.
    '''
    def __init__(self, nIn, nOut, kSize, stride=1, group=1):
        super().__init__()
        padding = int((kSize - 1) / 2)
        self.conv = nn.Conv2D(
            nIn,
            nOut, (kSize, kSize),
            stride=stride,
            padding=(padding, padding),
            bias_attr=False,
            groups=group)
    def forward(self, input):
        output = self.conv(input)
        return output
 class S2block(nn.Layer):
    '''
    This class defines the dilated convolution.
    '''
    def __init__(self, nIn, nOut, kSize, avgsize):
        super().__init__()
        self.resolution_down = False
        if avgsize > 1:
            self.resolution_down = True
            self.down_res = nn.AvgPool2D(avgsize, avgsize)
            self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize)
            self.avgsize = avgsize
        padding = int((kSize - 1) / 2)
        self.conv = nn.Sequential(
            nn.Conv2D(
                nIn,
                nIn,
                kernel_size=(kSize, kSize),
                stride=1,
                padding=(padding, padding),
                groups=nIn,
                bias_attr=False),
            nn.BatchNorm(nIn))
        self.act_conv1x1 = nn.Sequential(
            nn.PReLU(nIn),
            nn.Conv2D(
                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
        self.bn = nn.BatchNorm(nOut)
    def forward(self, input):
        if self.resolution_down:
            input = self.down_res(input)
        output = self.conv(input)
        output = self.act_conv1x1(output)
        if self.resolution_down:
            output = self.up_res(output)
        return self.bn(output)
 class S2module(nn.Layer):
    '''
    This class defines the ESP block, which is based on the following principle
        Reduce ---> Split ---> Transform --> Merge
    '''
    def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]):
        super().__init__()
        group_n = len(config)
        assert group_n == 2
        n = int(nOut / group_n)
        n1 = nOut - group_n * n
        self.c1 = C(nIn, n, 1, 1, group=group_n)
        # self.c1 = C(nIn, n, 1, 1)
        for i in range(group_n):
            if i == 0:
                self.layer_0 = S2block(
                    n, n + n1, kSize=config[i][0], avgsize=config[i][1])
            else:
                self.layer_1 = S2block(
                    n, n, kSize=config[i][0], avgsize=config[i][1])
        self.BR = BR(nOut)
        self.add = add
        self.group_n = group_n
    def forward(self, input):
        output1 = self.c1(input)
        output1 = channel_shuffle(output1, self.group_n)
        res_0 = self.layer_0(output1)
        res_1 = self.layer_1(output1)
        combine = paddle.concat([res_0, res_1], 1)
        if self.add:
            combine = input + combine
        output = self.BR(combine)
        return output
 class SINetEncoder(nn.Layer):
    def __init__(self,
                 config,
                 in_channels=3,
                 num_classes=2,
                 stage2_blocks=2,
                 stage3_blocks=8):
        super().__init__()
        assert stage2_blocks == 2
        dim1 = 16
        dim2 = 48
        dim3 = 96
        self.level1 = CBR(in_channels, 12, 3, 2)
        self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1)
        self.level2 = nn.LayerList()
        for i in range(0, stage2_blocks):
            if i == 0:
                self.level2.append(
                    S2module(
                        dim1, dim2, config=config[i], add=False))
            else:
                self.level2.append(S2module(dim2, dim2, config=config[i]))
        self.BR2 = BR(dim2 + dim1)
        self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2)
        self.level3 = nn.LayerList()
        for i in range(0, stage3_blocks):
            if i == 0:
                self.level3.append(
                    S2module(
                        dim2, dim3, config=config[2 + i], add=False))
            else:
                self.level3.append(S2module(dim3, dim3, config=config[2 + i]))
        self.BR3 = BR(dim3 + dim2)
        self.classifier = C(dim3 + dim2, num_classes, 1, 1)
    def forward(self, input):
        output1 = self.level1(input)  # x2
        output2_0 = self.level2_0(output1)  # x4
        for i, layer in enumerate(self.level2):
            if i == 0:
                output2 = layer(output2_0)
            else:
                output2 = layer(output2)
        output3_0 = self.level3_0(
            self.BR2(paddle.concat([output2_0, output2], 1)))  # x8
        for i, layer in enumerate(self.level3):
            if i == 0:
                output3 = layer(output3_0)
            else:
                output3 = layer(output3)
        output3_cat = self.BR3(paddle.concat([output3_0, output3], 1))
        classifier = self.classifier(output3_cat)
        return classifier
--- a/paddlers/models/ppseg/models/stdcseg.py
+++ b/paddlers/models/ppseg/models/stdcseg.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/topformer.py
+++ b/paddlers/models/ppseg/models/topformer.py
@ -0,0 +1,155 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import warnings
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.utils import utils
 from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct
@manager.MODELS.add_component
 class TopFormer(nn.Layer):
    """
    The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle.
    The original article refers to
    Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu,
    and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation." 
    In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
    pp. 12083-12093. 2022.
    This model refers to https://github.com/hustvl/TopFormer.
    Args:
        num_classes(int,optional): The unique number of target classes.
        backbone(nn.Layer): Backbone network.
        head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False.
        align_corners (bool, optional): Set the align_corners in resizing. Default: False.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 head_use_dw=False,
                 align_corners=False,
                 pretrained=None):
        super().__init__()
        self.backbone = backbone
        head_in_channels = [
            i for i in backbone.injection_out_channels if i is not None
        ]
        self.decode_head = TopFormerHead(
            num_classes=num_classes,
            in_channels=head_in_channels,
            use_dw=head_use_dw,
            align_corners=align_corners)
        self.align_corners = align_corners
        self.pretrained = pretrained
        self.init_weight()
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
    def forward(self, x):
        x_hw = paddle.shape(x)[2:]
        x = self.backbone(x)  # len=3, 1/8,1/16,1/32
        x = self.decode_head(x)
        x = F.interpolate(
            x, x_hw, mode='bilinear', align_corners=self.align_corners)
        return [x]
 class TopFormerHead(nn.Layer):
    def __init__(self,
                 num_classes,
                 in_channels,
                 in_index=[0, 1, 2],
                 in_transform='multiple_select',
                 use_dw=False,
                 dropout_ratio=0.1,
                 align_corners=False):
        super().__init__()
        self.in_index = in_index
        self.in_transform = in_transform
        self.align_corners = align_corners
        self._init_inputs(in_channels, in_index, in_transform)
        self.linear_fuse = ConvBNAct(
            in_channels=self.last_channels,
            out_channels=self.last_channels,
            kernel_size=1,
            stride=1,
            groups=self.last_channels if use_dw else 1,
            act=nn.ReLU)
        self.dropout = nn.Dropout2D(dropout_ratio)
        self.conv_seg = nn.Conv2D(
            self.last_channels, num_classes, kernel_size=1)
    def _init_inputs(self, in_channels, in_index, in_transform):
        assert in_transform in [None, 'resize_concat', 'multiple_select']
        if in_transform is not None:
            assert len(in_channels) == len(in_index)
            if in_transform == 'resize_concat':
                self.last_channels = sum(in_channels)
            else:
                self.last_channels = in_channels[0]
        else:
            assert isinstance(in_channels, int)
            assert isinstance(in_index, int)
            self.last_channels = in_channels
    def _transform_inputs(self, inputs):
        if self.in_transform == 'resize_concat':
            inputs = [inputs[i] for i in self.in_index]
            inputs = [
                F.interpolate(
                    input_data=x,
                    size=paddle.shape(inputs[0])[2:],
                    mode='bilinear',
                    align_corners=self.align_corners) for x in inputs
            ]
            inputs = paddle.concat(inputs, axis=1)
        elif self.in_transform == 'multiple_select':
            inputs_tmp = [inputs[i] for i in self.in_index]
            inputs = inputs_tmp[0]
            for x in inputs_tmp[1:]:
                x = F.interpolate(
                    x,
                    size=paddle.shape(inputs)[2:],
                    mode='bilinear',
                    align_corners=self.align_corners)
                inputs += x
        else:
            inputs = inputs[self.in_index]
        return inputs
    def forward(self, x):
        x = self._transform_inputs(x)
        x = self.linear_fuse(x)
        x = self.dropout(x)
        x = self.conv_seg(x)
        return x
--- a/paddlers/models/ppseg/models/u2net.py
+++ b/paddlers/models/ppseg/models/u2net.py
@ -34,15 +34,15 @@ class U2Net(nn.Layer):
    Args:
        num_classes (int): The unique number of target classes.
-        in_ch (int, optional): Input channels. Default: 3.
+        in_channels (int, optional): Input channels. Default: 3.
        pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
    """
-    def __init__(self, num_classes, in_ch=3, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super(U2Net, self).__init__()
-        self.stage1 = RSU7(in_ch, 32, 64)
+        self.stage1 = RSU7(in_channels, 32, 64)
        self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
        self.stage2 = RSU6(64, 32, 128)
@ -153,10 +153,10 @@ class U2Net(nn.Layer):
 class U2Netp(nn.Layer):
    """Please Refer to U2Net above."""
-    def __init__(self, num_classes, in_ch=3, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super(U2Netp, self).__init__()
-        self.stage1 = RSU7(in_ch, 16, 64)
+        self.stage1 = RSU7(in_channels, 16, 64)
        self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
        self.stage2 = RSU6(64, 16, 64)
--- a/paddlers/models/ppseg/models/unet.py
+++ b/paddlers/models/ppseg/models/unet.py
@ -36,18 +36,19 @@ class UNet(nn.Layer):
            is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
        use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
            If False, use resize_bilinear. Default: False.
        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
    """
    def __init__(self,
                 num_classes,
                 input_channel=3,
                 align_corners=False,
                 use_deconv=False,
                 in_channels=3,
                 pretrained=None):
        super().__init__()
-        self.encode = Encoder(input_channel)
+        self.encode = Encoder(in_channels)
        self.decode = Decoder(align_corners, use_deconv=use_deconv)
        self.cls = self.conv = nn.Conv2D(
            in_channels=64,
@ -73,12 +74,11 @@ class UNet(nn.Layer):
 class Encoder(nn.Layer):
-    def __init__(self, input_channel=3):
+    def __init__(self, in_channels=3):
        super().__init__()
        self.double_conv = nn.Sequential(
-            layers.ConvBNReLU(input_channel, 64, 3),
+            layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
            layers.ConvBNReLU(64, 64, 3))
        down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
        self.down_sample_list = nn.LayerList([
            self.down_sampling(channel[0], channel[1])
--- a/paddlers/models/ppseg/models/unet_plusplus.py
+++ b/paddlers/models/ppseg/models/unet_plusplus.py
@ -31,8 +31,8 @@ class UNetPlusPlus(nn.Layer):
    (https://arxiv.org/abs/1807.10165).
    Args:
        in_channels (int): The channel number of input image.
        num_classes (int): The unique number of target classes.
        in_channels (int, optional): The channel number of input image. Default: 3.
        use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
            If False, use resize_bilinear. Default: False.
        align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -42,8 +42,8 @@ class UNetPlusPlus(nn.Layer):
        """
    def __init__(self,
                 in_channels,
                 num_classes,
                 in_channels=3,
                 use_deconv=False,
                 align_corners=False,
                 pretrained=None,
--- a/paddlers/models/ppseg/models/upernet.py
+++ b/paddlers/models/ppseg/models/upernet.py
@ -0,0 +1,173 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddlers.models.ppseg import utils
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.models import layers
@manager.MODELS.add_component
 class UPerNet(nn.Layer):
    """
    The UPerNet implementation based on PaddlePaddle.
    The original article refers to
    Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding"
    (https://arxiv.org/abs/1807.10221).
    Args:
        num_classes (int): The unique number of target classes.
        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
        backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone.
        channels (int): The channels of inter layers. Default: 512.
        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False.
        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
        dropout_prob (float): Dropout ratio for upernet head. Default: 0.1.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """
    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices,
                 channels=512,
                 enable_auxiliary_loss=False,
                 align_corners=False,
                 dropout_prob=0.1,
                 pretrained=None):
        super().__init__()
        self.backbone = backbone
        self.backbone_indices = backbone_indices
        self.in_channels = [
            self.backbone.feat_channels[i] for i in backbone_indices
        ]
        self.align_corners = align_corners
        self.pretrained = pretrained
        self.enable_auxiliary_loss = enable_auxiliary_loss
        fpn_inplanes = [
            self.backbone.feat_channels[i] for i in backbone_indices
        ]
        self.head = UPerNetHead(
            num_classes=num_classes,
            fpn_inplanes=fpn_inplanes,
            dropout_prob=dropout_prob,
            channels=channels,
            enable_auxiliary_loss=self.enable_auxiliary_loss)
        self.init_weight()
    def forward(self, x):
        feats = self.backbone(x)
        feats = [feats[i] for i in self.backbone_indices]
        logit_list = self.head(feats)
        logit_list = [
            F.interpolate(
                logit,
                paddle.shape(x)[2:],
                mode='bilinear',
                align_corners=self.align_corners) for logit in logit_list
        ]
        return logit_list
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)
 class UPerNetHead(nn.Layer):
    def __init__(self,
                 num_classes,
                 fpn_inplanes,
                 channels,
                 dropout_prob=0.1,
                 enable_auxiliary_loss=False,
                 align_corners=True):
        super(UPerNetHead, self).__init__()
        self.align_corners = align_corners
        self.ppm = layers.PPModule(
            in_channels=fpn_inplanes[-1],
            out_channels=channels,
            bin_sizes=(1, 2, 3, 6),
            dim_reduction=True,
            align_corners=True)
        self.enable_auxiliary_loss = enable_auxiliary_loss
        self.lateral_convs = nn.LayerList()
        self.fpn_convs = nn.LayerList()
        for fpn_inplane in fpn_inplanes[:-1]:
            self.lateral_convs.append(
                layers.ConvBNReLU(fpn_inplane, channels, 1))
            self.fpn_convs.append(
                layers.ConvBNReLU(
                    channels, channels, 3, bias_attr=False))
        if self.enable_auxiliary_loss:
            self.aux_head = layers.AuxLayer(
                fpn_inplanes[2],
                fpn_inplanes[2],
                num_classes,
                dropout_prob=dropout_prob)
        self.fpn_bottleneck = layers.ConvBNReLU(
            len(fpn_inplanes) * channels, channels, 3, padding=1)
        self.conv_last = nn.Sequential(
            layers.ConvBNReLU(
                len(fpn_inplanes) * channels, channels, 3, bias_attr=False),
            nn.Conv2D(
                channels, num_classes, kernel_size=1))
        self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1)
    def forward(self, inputs):
        laterals = []
        for i, lateral_conv in enumerate(self.lateral_convs):
            laterals.append(lateral_conv(inputs[i]))
        laterals.append(self.ppm(inputs[-1]))
        fpn_levels = len(laterals)
        for i in range(fpn_levels - 1, 0, -1):
            prev_shape = paddle.shape(laterals[i - 1])
            laterals[i - 1] = laterals[i - 1] + F.interpolate(
                laterals[i],
                size=prev_shape[2:],
                mode='bilinear',
                align_corners=self.align_corners)
        fpn_outs = []
        for i in range(fpn_levels - 1):
            fpn_outs.append(self.fpn_convs[i](laterals[i]))
        fpn_outs.append(laterals[-1])
        for i in range(fpn_levels - 1, 0, -1):
            fpn_outs[i] = F.interpolate(
                fpn_outs[i],
                size=paddle.shape(fpn_outs[0])[2:],
                mode='bilinear',
                align_corners=self.align_corners)
        fuse_out = paddle.concat(fpn_outs, axis=1)
        x = self.fpn_bottleneck(fuse_out)
        x = self.conv_seg(x)
        logits_list = [x]
        if self.enable_auxiliary_loss:
            aux_out = self.aux_head(inputs[2])
            logits_list.append(aux_out)
            return logits_list
        else:
            return logits_list
--- a/paddlers/models/ppseg/transforms/functional.py
+++ b/paddlers/models/ppseg/transforms/functional.py
@ -15,7 +15,14 @@
 import cv2
 import numpy as np
 from PIL import Image, ImageEnhance
-from scipy.ndimage.morphology import distance_transform_edt
+from scipy.ndimage import distance_transform_edt
 def rescale_size(img_size, target_size):
    scale = min(
        max(target_size) / max(img_size), min(target_size) / min(img_size))
    rescaled_size = [round(i * scale) for i in img_size]
    return rescaled_size, scale
 def normalize(im, mean, std):
--- a/paddlers/models/ppseg/transforms/transforms.py
+++ b/paddlers/models/ppseg/transforms/transforms.py
--- a/paddlers/models/ppseg/utils/init.py
+++ b/paddlers/models/ppseg/utils/init.py
@ -19,5 +19,4 @@ from .env import seg_env, get_sys_env
 from .utils import *
 from .timer import TimeAverager, calculate_eta
 from . import visualize
 from .config_check import config_check
 from .ema import EMA
--- a/paddlers/models/ppseg/utils/config_check.py
+++ b/paddlers/models/ppseg/utils/config_check.py
@ -1,59 +0,0 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import numpy as np
 def config_check(cfg, train_dataset=None, val_dataset=None):
    """
    To check config。
    Args:
        cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
    """
    num_classes_check(cfg, train_dataset, val_dataset)
 def num_classes_check(cfg, train_dataset, val_dataset):
    """"
    Check that the num_classes in model, train_dataset and val_dataset is consistent.
    """
    num_classes_set = set()
    if train_dataset and hasattr(train_dataset, 'num_classes'):
        num_classes_set.add(train_dataset.num_classes)
    if val_dataset and hasattr(val_dataset, 'num_classes'):
        num_classes_set.add(val_dataset.num_classes)
    if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None):
        num_classes_set.add(cfg.dic['model'].get('num_classes'))
    if (not cfg.train_dataset) and (not cfg.val_dataset):
        raise ValueError(
            'One of `train_dataset` or `val_dataset should be given, but there are none.'
        )
    if len(num_classes_set) == 0:
        raise ValueError(
            '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
        )
    elif len(num_classes_set) > 1:
        raise ValueError(
            '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
            .format(num_classes_set))
    else:
        num_classes = num_classes_set.pop()
        if train_dataset:
            train_dataset.num_classes = num_classes
        if val_dataset:
            val_dataset.num_classes = num_classes
--- a/paddlers/models/ppseg/utils/env/init.py
+++ b/paddlers/models/ppseg/utils/env/init.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/utils/env/seg_env.py
+++ b/paddlers/models/ppseg/utils/env/seg_env.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/utils/env/sys_env.py
+++ b/paddlers/models/ppseg/utils/env/sys_env.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -20,6 +20,7 @@ import sys
 import cv2
 import paddle
 import paddlers.models.ppseg as ppseg
 IS_WINDOWS = sys.platform == 'win32'
@ -57,8 +58,12 @@ def _get_nvcc_info(cuda_home):
    if cuda_home is not None and os.path.isdir(cuda_home):
        try:
            nvcc = os.path.join(cuda_home, 'bin/nvcc')
-            nvcc = subprocess.check_output(
+            if not IS_WINDOWS:
-                "{} -V".format(nvcc), shell=True).decode()
+                nvcc = subprocess.check_output(
                    "{} -V".format(nvcc), shell=True).decode()
            else:
                nvcc = subprocess.check_output(
                    "\"{}\" -V".format(nvcc), shell=True).decode()
            nvcc = nvcc.strip().split('\n')[-1]
        except subprocess.SubprocessError:
            nvcc = "Not Available"
@ -116,6 +121,7 @@ def get_sys_env():
    except:
        pass
    env_info['PaddleSeg'] = ppseg.__version__
    env_info['PaddlePaddle'] = paddle.__version__
    env_info['OpenCV'] = cv2.__version__
--- a/paddlers/models/ppseg/utils/metrics.py
+++ b/paddlers/models/ppseg/utils/metrics.py
@ -135,37 +135,6 @@ def mean_iou(intersect_area, pred_area, label_area):
    return np.array(class_iou), miou
 def fwiou(intersect_area, pred_area, label_area):
    """
    Calculate iou.
    Args:
        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
        pred_area (Tensor): The prediction area on all classes.
        label_area (Tensor): The ground truth area on all classes.
    Returns:
        np.ndarray: iou on all classes.
        float: Frequency Weighted iou of all classes.
        np.ndarray: Frequency of all classes.
    """
    intersect_area = intersect_area.numpy()
    pred_area = pred_area.numpy()
    label_area = label_area.numpy()
    union = pred_area + label_area - intersect_area
    class_iou = []
    for i in range(len(intersect_area)):
        if union[i] == 0:
            iou = 0
        else:
            iou = intersect_area[i] / union[i]
        class_iou.append(iou)
    fw = label_area / np.sum(label_area)
    fwious = np.array(fw) * np.array(class_iou)
    fwiou = np.sum(fwious)
    return np.array(class_iou), fwiou, fw
 def dice(intersect_area, pred_area, label_area):
    """
    Calculate DICE.
@ -194,6 +163,7 @@ def dice(intersect_area, pred_area, label_area):
    return np.array(class_dice), mdice
 # This is a deprecated function, please use class_measurement function.
 def accuracy(intersect_area, pred_area):
    """
    Calculate accuracy
@ -219,6 +189,38 @@ def accuracy(intersect_area, pred_area):
    return np.array(class_acc), macc
 def class_measurement(intersect_area, pred_area, label_area):
    """
    Calculate accuracy, calss precision and class recall.
    Args:
        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
        pred_area (Tensor): The prediction area on all classes.
        label_area (Tensor): The ground truth area on all classes.
    Returns:
        float: The mean accuracy.
        np.ndarray: The precision of all classes.
        np.ndarray: The recall of all classes.
    """
    intersect_area = intersect_area.numpy()
    pred_area = pred_area.numpy()
    label_area = label_area.numpy()
    mean_acc = np.sum(intersect_area) / np.sum(pred_area)
    class_precision = []
    class_recall = []
    for i in range(len(intersect_area)):
        precision = 0 if pred_area[i] == 0 \
            else intersect_area[i] / pred_area[i]
        recall = 0 if label_area[i] == 0 \
            else intersect_area[i] / label_area[i]
        class_precision.append(precision)
        class_recall.append(recall)
    return mean_acc, np.array(class_precision), np.array(class_recall)
 def kappa(intersect_area, pred_area, label_area):
    """
    Calculate kappa coefficient
@ -231,9 +233,9 @@ def kappa(intersect_area, pred_area, label_area):
    Returns:
        float: kappa coefficient.
    """
-    intersect_area = intersect_area.numpy()
+    intersect_area = intersect_area.numpy().astype(np.float64)
-    pred_area = pred_area.numpy()
+    pred_area = pred_area.numpy().astype(np.float64)
-    label_area = label_area.numpy()
+    label_area = label_area.numpy().astype(np.float64)
    total_area = np.sum(label_area)
    po = np.sum(intersect_area) / total_area
    pe = np.sum(pred_area * label_area) / (total_area * total_area)
--- a/paddlers/models/ppseg/utils/train_profiler.py
+++ b/paddlers/models/ppseg/utils/train_profiler.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/utils/utils.py
+++ b/paddlers/models/ppseg/utils/utils.py
@ -160,6 +160,8 @@ def get_image_list(image_path):
            for f in files:
                if '.ipynb_checkpoints' in root:
                    continue
                if f.startswith('.'):
                    continue
                if os.path.splitext(f)[-1] in valid_suffix:
                    image_list.append(os.path.join(root, f))
    else:
--- a/paddlers/models/ppseg/utils/visualize.py
+++ b/paddlers/models/ppseg/utils/visualize.py
@ -63,7 +63,7 @@ def get_pseudo_color_map(pred, color_map=None):
        pred (numpy.ndarray): the origin predicted image.
        color_map (list, optional): the palette color map. Default: None,
            use paddleseg's default color map.
-    
+
    Returns:
        (numpy.ndarray): the pseduo image.
    """
@ -103,3 +103,41 @@ def get_color_map_list(num_classes, custom_color=None):
    if custom_color:
        color_map[:len(custom_color)] = custom_color
    return color_map
 def paste_images(image_list):
    """
    Paste all image to a image.
    Args:
        image_list (List or Tuple): The images to be pasted and their size are the same.
    Returns:
        result_img (PIL.Image): The pasted image.
    """
    assert isinstance(image_list,
                      (list, tuple)), "image_list should be a list or tuple"
    assert len(
        image_list) > 1, "The length of image_list should be greater than 1"
    pil_img_list = []
    for img in image_list:
        if isinstance(img, str):
            assert os.path.exists(img), "The image is not existed: {}".format(
                img)
            img = PILImage.open(img)
            img = np.array(img)
        elif isinstance(img, np.ndarray):
            img = PILImage.fromarray(img)
        pil_img_list.append(img)
    sample_img = pil_img_list[0]
    size = sample_img.size
    for img in pil_img_list:
        assert size == img.size, "The image size in image_list should be the same"
    width, height = sample_img.size
    result_img = PILImage.new(sample_img.mode,
                              (width * len(pil_img_list), height))
    for i, img in enumerate(pil_img_list):
        result_img.paste(img, box=(width * i, 0))
    return result_img
--- a/paddlers/rs_models/cd/losses/fccdn_loss.py
+++ b/paddlers/rs_models/cd/losses/fccdn_loss.py
@ -43,42 +43,13 @@ class DiceLoss(nn.Layer):
        return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true)
 class MultiClassDiceLoss(nn.Layer):
    def __init__(
            self,
            weight,
            batch=True,
            ignore_index=-1,
            do_softmax=False,
            **kwargs, ):
        super(MultiClassDiceLoss, self).__init__()
        self.ignore_index = ignore_index
        self.weight = weight
        self.do_softmax = do_softmax
        self.binary_diceloss = DiceLoss(batch)
    def forward(self, y_pred, y_true):
        if self.do_softmax:
            y_pred = paddle.nn.functional.softmax(y_pred, axis=1)
        y_true = F.one_hot(y_true.long(), y_pred.shape[1]).permute(0, 3, 1, 2)
        total_loss = 0.0
        tmp_i = 0.0
        for i in range(y_pred.shape[1]):
            if i != self.ignore_index:
                diceloss = self.binary_diceloss(y_pred[:, i, :, :],
                                                y_true[:, i, :, :])
                total_loss += paddle.multiply(diceloss, self.weight[i])
                tmp_i += 1.0
        return total_loss / tmp_i
 class DiceBCELoss(nn.Layer):
    """Binary change detection task loss"""
    def __init__(self):
        super(DiceBCELoss, self).__init__()
        self.bce_loss = nn.BCELoss()
-        self.binnary_dice = DiceLoss()
+        self.binary_dice = DiceLoss()
    def forward(self, scores, labels, do_sigmoid=True):
        if len(scores.shape) > 3:
@ -87,29 +58,11 @@ class DiceBCELoss(nn.Layer):
            labels = labels.squeeze(1)
        if do_sigmoid:
            scores = paddle.nn.functional.sigmoid(scores.clone())
-        diceloss = self.binnary_dice(scores, labels)
+        diceloss = self.binary_dice(scores, labels)
        bceloss = self.bce_loss(scores, labels)
        return diceloss + bceloss
 class McDiceBCELoss(nn.Layer):
    """Multi-class change detection task loss"""
    def __init__(self, weight, do_sigmoid=True):
        super(McDiceBCELoss, self).__init__()
        self.ce_loss = nn.CrossEntropyLoss(weight)
        self.dice = MultiClassDiceLoss(weight, do_sigmoid)
    def forward(self, scores, labels):
        if len(scores.shape) < 4:
            scores = scores.unsqueeze(1)
        if len(labels.shape) < 4:
            labels = labels.unsqueeze(1)
        diceloss = self.dice(scores, labels)
        bceloss = self.ce_loss(scores, labels)
        return diceloss + bceloss
 def fccdn_ssl_loss(logits_list, labels):
    """
    Self-supervised learning loss for change detection.
@ -160,11 +113,11 @@ def fccdn_ssl_loss(logits_list, labels):
    # Seg loss
    labels_downsample = labels_downsample.astype(paddle.float32)
-    loss_aux = 0.2 * criterion_ssl(out1, pred_seg_post_tmp1, False)
+    loss_aux = criterion_ssl(out1, pred_seg_post_tmp1, False)
-    loss_aux += 0.2 * criterion_ssl(out2, pred_seg_pre_tmp1, False)
+    loss_aux += criterion_ssl(out2, pred_seg_pre_tmp1, False)
-    loss_aux += 0.2 * criterion_ssl(
+    loss_aux += criterion_ssl(out3, labels_downsample - pred_seg_post_tmp2,
-        out3, labels_downsample - pred_seg_post_tmp2, False)
+                              False)
-    loss_aux += 0.2 * criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
+    loss_aux += criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
-                                    False)
+                              False)
    return loss_aux
--- a/paddlers/rs_models/clas/init.py
+++ b/paddlers/rs_models/clas/init.py
@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .condensenet_v2 import CondenseNetV2_a, CondenseNetV2_b, CondenseNetV2_c
+from .condensenetv2 import CondenseNetV2_A, CondenseNetV2_B, CondenseNetV2_C
--- a/paddlers/rs_models/clas/condensenet_v2.py
+++ b/paddlers/rs_models/clas/condensenet_v2.py
@ -1,442 +1,442 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
+#
-# Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# You may obtain a copy of the License at
-#
+#
-#     http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
-#
+#
-# Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License.
-"""
+"""
-This code is based on https://github.com/AgentMaker/Paddle-Image-Models
+This code is based on https://github.com/AgentMaker/Paddle-Image-Models
-Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
+Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
-Apache License [see LICENSE for details]
+Apache License [see LICENSE for details]
-"""
+"""
-
+
-import paddle
+import paddle
-import paddle.nn as nn
+import paddle.nn as nn
-
+
-__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"]
+__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"]
-
+
-
+
-class SELayer(nn.Layer):
+class SELayer(nn.Layer):
-    def __init__(self, inplanes, reduction=16):
+    def __init__(self, inplanes, reduction=16):
-        super(SELayer, self).__init__()
+        super(SELayer, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.fc = nn.Sequential(
+        self.fc = nn.Sequential(
-            nn.Linear(
+            nn.Linear(
-                inplanes, inplanes // reduction, bias_attr=False),
+                inplanes, inplanes // reduction, bias_attr=False),
-            nn.ReLU(),
+            nn.ReLU(),
-            nn.Linear(
+            nn.Linear(
-                inplanes // reduction, inplanes, bias_attr=False),
+                inplanes // reduction, inplanes, bias_attr=False),
-            nn.Sigmoid(), )
+            nn.Sigmoid(), )
-
+
-    def forward(self, x):
+    def forward(self, x):
-        b, c, _, _ = x.shape
+        b, c, _, _ = x.shape
-        y = self.avg_pool(x).reshape((b, c))
+        y = self.avg_pool(x).reshape((b, c))
-        y = self.fc(y).reshape((b, c, 1, 1))
+        y = self.fc(y).reshape((b, c, 1, 1))
-        return x * paddle.expand(y, shape=x.shape)
+        return x * paddle.expand(y, shape=x.shape)
-
+
-
+
-class HS(nn.Layer):
+class HS(nn.Layer):
-    def __init__(self):
+    def __init__(self):
-        super(HS, self).__init__()
+        super(HS, self).__init__()
-        self.relu6 = nn.ReLU6()
+        self.relu6 = nn.ReLU6()
-
+
-    def forward(self, inputs):
+    def forward(self, inputs):
-        return inputs * self.relu6(inputs + 3) / 6
+        return inputs * self.relu6(inputs + 3) / 6
-
+
-
+
-class Conv(nn.Sequential):
+class Conv(nn.Sequential):
-    def __init__(
+    def __init__(
-            self,
+            self,
-            in_channels,
+            in_channels,
-            out_channels,
+            out_channels,
-            kernel_size,
+            kernel_size,
-            stride=1,
+            stride=1,
-            padding=0,
+            padding=0,
-            groups=1,
+            groups=1,
-            activation="ReLU",
+            activation="ReLU",
-            bn_momentum=0.9, ):
+            bn_momentum=0.9, ):
-        super(Conv, self).__init__()
+        super(Conv, self).__init__()
-        self.add_sublayer(
+        self.add_sublayer(
-            "norm", nn.BatchNorm2D(
+            "norm", nn.BatchNorm2D(
-                in_channels, momentum=bn_momentum))
+                in_channels, momentum=bn_momentum))
-        if activation == "ReLU":
+        if activation == "ReLU":
-            self.add_sublayer("activation", nn.ReLU())
+            self.add_sublayer("activation", nn.ReLU())
-        elif activation == "HS":
+        elif activation == "HS":
-            self.add_sublayer("activation", HS())
+            self.add_sublayer("activation", HS())
-        else:
+        else:
-            raise NotImplementedError
+            raise NotImplementedError
-        self.add_sublayer(
+        self.add_sublayer(
-            "conv",
+            "conv",
-            nn.Conv2D(
+            nn.Conv2D(
-                in_channels,
+                in_channels,
-                out_channels,
+                out_channels,
-                kernel_size=kernel_size,
+                kernel_size=kernel_size,
-                stride=stride,
+                stride=stride,
-                padding=padding,
+                padding=padding,
-                bias_attr=False,
+                bias_attr=False,
-                groups=groups, ), )
+                groups=groups, ), )
-
+
-
+
-def ShuffleLayer(x, groups):
+def ShuffleLayer(x, groups):
-    batchsize, num_channels, height, width = x.shape
+    batchsize, num_channels, height, width = x.shape
-    channels_per_group = num_channels // groups
+    channels_per_group = num_channels // groups
-    # Reshape
+    # Reshape
-    x = x.reshape((batchsize, groups, channels_per_group, height, width))
+    x = x.reshape((batchsize, groups, channels_per_group, height, width))
-    # Transpose
+    # Transpose
-    x = x.transpose((0, 2, 1, 3, 4))
+    x = x.transpose((0, 2, 1, 3, 4))
-    # Reshape
+    # Reshape
-    x = x.reshape((batchsize, groups * channels_per_group, height, width))
+    x = x.reshape((batchsize, groups * channels_per_group, height, width))
-    return x
+    return x
-
+
-
+
-def ShuffleLayerTrans(x, groups):
+def ShuffleLayerTrans(x, groups):
-    batchsize, num_channels, height, width = x.shape
+    batchsize, num_channels, height, width = x.shape
-    channels_per_group = num_channels // groups
+    channels_per_group = num_channels // groups
-    # Reshape
+    # Reshape
-    x = x.reshape((batchsize, channels_per_group, groups, height, width))
+    x = x.reshape((batchsize, channels_per_group, groups, height, width))
-    # Transpose
+    # Transpose
-    x = x.transpose((0, 2, 1, 3, 4))
+    x = x.transpose((0, 2, 1, 3, 4))
-    # Reshape
+    # Reshape
-    x = x.reshape((batchsize, channels_per_group * groups, height, width))
+    x = x.reshape((batchsize, channels_per_group * groups, height, width))
-    return x
+    return x
-
+
-
+
-class CondenseLGC(nn.Layer):
+class CondenseLGC(nn.Layer):
-    def __init__(
+    def __init__(
-            self,
+            self,
-            in_channels,
+            in_channels,
-            out_channels,
+            out_channels,
-            kernel_size,
+            kernel_size,
-            stride=1,
+            stride=1,
-            padding=0,
+            padding=0,
-            groups=1,
+            groups=1,
-            activation="ReLU", ):
+            activation="ReLU", ):
-        super(CondenseLGC, self).__init__()
+        super(CondenseLGC, self).__init__()
-        self.in_channels = in_channels
+        self.in_channels = in_channels
-        self.out_channels = out_channels
+        self.out_channels = out_channels
-        self.groups = groups
+        self.groups = groups
-        self.norm = nn.BatchNorm2D(self.in_channels)
+        self.norm = nn.BatchNorm2D(self.in_channels)
-        if activation == "ReLU":
+        if activation == "ReLU":
-            self.activation = nn.ReLU()
+            self.activation = nn.ReLU()
-        elif activation == "HS":
+        elif activation == "HS":
-            self.activation = HS()
+            self.activation = HS()
-        else:
+        else:
-            raise NotImplementedError
+            raise NotImplementedError
-        self.conv = nn.Conv2D(
+        self.conv = nn.Conv2D(
-            self.in_channels,
+            self.in_channels,
-            self.out_channels,
+            self.out_channels,
-            kernel_size=kernel_size,
+            kernel_size=kernel_size,
-            stride=stride,
+            stride=stride,
-            padding=padding,
+            padding=padding,
-            groups=self.groups,
+            groups=self.groups,
-            bias_attr=False, )
+            bias_attr=False, )
-        self.register_buffer(
+        self.register_buffer(
-            "index", paddle.zeros(
+            "index", paddle.zeros(
-                (self.in_channels, ), dtype="int64"))
+                (self.in_channels, ), dtype="int64"))
-
+
-    def forward(self, x):
+    def forward(self, x):
-        x = paddle.index_select(x, self.index, axis=1)
+        x = paddle.index_select(x, self.index, axis=1)
-        x = self.norm(x)
+        x = self.norm(x)
-        x = self.activation(x)
+        x = self.activation(x)
-        x = self.conv(x)
+        x = self.conv(x)
-        x = ShuffleLayer(x, self.groups)
+        x = ShuffleLayer(x, self.groups)
-        return x
+        return x
-
+
-
+
-class CondenseSFR(nn.Layer):
+class CondenseSFR(nn.Layer):
-    def __init__(
+    def __init__(
-            self,
+            self,
-            in_channels,
+            in_channels,
-            out_channels,
+            out_channels,
-            kernel_size,
+            kernel_size,
-            stride=1,
+            stride=1,
-            padding=0,
+            padding=0,
-            groups=1,
+            groups=1,
-            activation="ReLU", ):
+            activation="ReLU", ):
-        super(CondenseSFR, self).__init__()
+        super(CondenseSFR, self).__init__()
-        self.in_channels = in_channels
+        self.in_channels = in_channels
-        self.out_channels = out_channels
+        self.out_channels = out_channels
-        self.groups = groups
+        self.groups = groups
-        self.norm = nn.BatchNorm2D(self.in_channels)
+        self.norm = nn.BatchNorm2D(self.in_channels)
-        if activation == "ReLU":
+        if activation == "ReLU":
-            self.activation = nn.ReLU()
+            self.activation = nn.ReLU()
-        elif activation == "HS":
+        elif activation == "HS":
-            self.activation = HS()
+            self.activation = HS()
-        else:
+        else:
-            raise NotImplementedError
+            raise NotImplementedError
-        self.conv = nn.Conv2D(
+        self.conv = nn.Conv2D(
-            self.in_channels,
+            self.in_channels,
-            self.out_channels,
+            self.out_channels,
-            kernel_size=kernel_size,
+            kernel_size=kernel_size,
-            padding=padding,
+            padding=padding,
-            groups=self.groups,
+            groups=self.groups,
-            bias_attr=False,
+            bias_attr=False,
-            stride=stride, )
+            stride=stride, )
-        self.register_buffer("index",
+        self.register_buffer("index",
-                             paddle.zeros(
+                             paddle.zeros(
-                                 (self.out_channels, self.out_channels)))
+                                 (self.out_channels, self.out_channels)))
-
+
-    def forward(self, x):
+    def forward(self, x):
-        x = self.norm(x)
+        x = self.norm(x)
-        x = self.activation(x)
+        x = self.activation(x)
-        x = ShuffleLayerTrans(x, self.groups)
+        x = ShuffleLayerTrans(x, self.groups)
-        x = self.conv(x)  # SIZE: N, C, H, W
+        x = self.conv(x)  # SIZE: N, C, H, W
-        N, C, H, W = x.shape
+        N, C, H, W = x.shape
-        x = x.reshape((N, C, H * W))
+        x = x.reshape((N, C, H * W))
-        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
+        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
-        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
+        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
-        x = paddle.matmul(x, self.index)
+        x = paddle.matmul(x, self.index)
-        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
+        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
-        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
+        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
-        return x
+        return x
-
+
-
+
-class _SFR_DenseLayer(nn.Layer):
+class _SFR_DenseLayer(nn.Layer):
-    def __init__(
+    def __init__(
-            self,
+            self,
-            in_channels,
+            in_channels,
-            growth_rate,
+            growth_rate,
-            group_1x1,
+            group_1x1,
-            group_3x3,
+            group_3x3,
-            group_trans,
+            group_trans,
-            bottleneck,
+            bottleneck,
-            activation,
+            activation,
-            use_se=False, ):
+            use_se=False, ):
-        super(_SFR_DenseLayer, self).__init__()
+        super(_SFR_DenseLayer, self).__init__()
-        self.group_1x1 = group_1x1
+        self.group_1x1 = group_1x1
-        self.group_3x3 = group_3x3
+        self.group_3x3 = group_3x3
-        self.group_trans = group_trans
+        self.group_trans = group_trans
-        self.use_se = use_se
+        self.use_se = use_se
-        # 1x1 conv i --> b*k
+        # 1x1 conv i --> b*k
-        self.conv_1 = CondenseLGC(
+        self.conv_1 = CondenseLGC(
-            in_channels,
+            in_channels,
-            bottleneck * growth_rate,
+            bottleneck * growth_rate,
-            kernel_size=1,
+            kernel_size=1,
-            groups=self.group_1x1,
+            groups=self.group_1x1,
-            activation=activation, )
+            activation=activation, )
-        # 3x3 conv b*k --> k
+        # 3x3 conv b*k --> k
-        self.conv_2 = Conv(
+        self.conv_2 = Conv(
-            bottleneck * growth_rate,
+            bottleneck * growth_rate,
-            growth_rate,
+            growth_rate,
-            kernel_size=3,
+            kernel_size=3,
-            padding=1,
+            padding=1,
-            groups=self.group_3x3,
+            groups=self.group_3x3,
-            activation=activation, )
+            activation=activation, )
-        # 1x1 res conv k(8-16-32)--> i (k*l)
+        # 1x1 res conv k(8-16-32)--> i (k*l)
-        self.sfr = CondenseSFR(
+        self.sfr = CondenseSFR(
-            growth_rate,
+            growth_rate,
-            in_channels,
+            in_channels,
-            kernel_size=1,
+            kernel_size=1,
-            groups=self.group_trans,
+            groups=self.group_trans,
-            activation=activation, )
+            activation=activation, )
-        if self.use_se:
+        if self.use_se:
-            self.se = SELayer(inplanes=growth_rate, reduction=1)
+            self.se = SELayer(inplanes=growth_rate, reduction=1)
-
+
-    def forward(self, x):
+    def forward(self, x):
-        x_ = x
+        x_ = x
-        x = self.conv_1(x)
+        x = self.conv_1(x)
-        x = self.conv_2(x)
+        x = self.conv_2(x)
-        if self.use_se:
+        if self.use_se:
-            x = self.se(x)
+            x = self.se(x)
-        sfr_feature = self.sfr(x)
+        sfr_feature = self.sfr(x)
-        y = x_ + sfr_feature
+        y = x_ + sfr_feature
-        return paddle.concat([y, x], 1)
+        return paddle.concat([y, x], 1)
-
+
-
+
-class _SFR_DenseBlock(nn.Sequential):
+class _SFR_DenseBlock(nn.Sequential):
-    def __init__(
+    def __init__(
-            self,
+            self,
-            num_layers,
+            num_layers,
-            in_channels,
+            in_channels,
-            growth_rate,
+            growth_rate,
-            group_1x1,
+            group_1x1,
-            group_3x3,
+            group_3x3,
-            group_trans,
+            group_trans,
-            bottleneck,
+            bottleneck,
-            activation,
+            activation,
-            use_se, ):
+            use_se, ):
-        super(_SFR_DenseBlock, self).__init__()
+        super(_SFR_DenseBlock, self).__init__()
-        for i in range(num_layers):
+        for i in range(num_layers):
-            layer = _SFR_DenseLayer(
+            layer = _SFR_DenseLayer(
-                in_channels + i * growth_rate,
+                in_channels + i * growth_rate,
-                growth_rate,
+                growth_rate,
-                group_1x1,
+                group_1x1,
-                group_3x3,
+                group_3x3,
-                group_trans,
+                group_trans,
-                bottleneck,
+                bottleneck,
-                activation,
+                activation,
-                use_se, )
+                use_se, )
-            self.add_sublayer("denselayer_%d" % (i + 1), layer)
+            self.add_sublayer("denselayer_%d" % (i + 1), layer)
-
+
-
+
-class _Transition(nn.Layer):
+class _Transition(nn.Layer):
-    def __init__(self):
+    def __init__(self):
-        super(_Transition, self).__init__()
+        super(_Transition, self).__init__()
-        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
+        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
-
+
-    def forward(self, x):
+    def forward(self, x):
-        x = self.pool(x)
+        x = self.pool(x)
-        return x
+        return x
-
+
-
+
-class CondenseNetV2(nn.Layer):
+class CondenseNetV2(nn.Layer):
-    def __init__(
+    def __init__(
-            self,
+            self,
-            stages,
+            stages,
-            growth,
+            growth,
-            HS_start_block,
+            HS_start_block,
-            SE_start_block,
+            SE_start_block,
-            fc_channel,
+            fc_channel,
-            group_1x1,
+            group_1x1,
-            group_3x3,
+            group_3x3,
-            group_trans,
+            group_trans,
-            bottleneck,
+            bottleneck,
-            last_se_reduction,
+            last_se_reduction,
-            in_channels=3,
+            in_channels=3,
-            class_num=1000, ):
+            class_num=1000, ):
-        super(CondenseNetV2, self).__init__()
+        super(CondenseNetV2, self).__init__()
-        self.stages = stages
+        self.stages = stages
-        self.growth = growth
+        self.growth = growth
-        self.in_channels = in_channels
+        self.in_channels = in_channels
-        self.class_num = class_num
+        self.class_num = class_num
-        self.last_se_reduction = last_se_reduction
+        self.last_se_reduction = last_se_reduction
-        assert len(self.stages) == len(self.growth)
+        assert len(self.stages) == len(self.growth)
-        self.progress = 0.0
+        self.progress = 0.0
-
+
-        self.init_stride = 2
+        self.init_stride = 2
-        self.pool_size = 7
+        self.pool_size = 7
-
+
-        self.features = nn.Sequential()
+        self.features = nn.Sequential()
-        # Initial nChannels should be 3
+        # Initial nChannels should be 3
-        self.num_features = 2 * self.growth[0]
+        self.num_features = 2 * self.growth[0]
-        # Dense-block 1 (224x224)
+        # Dense-block 1 (224x224)
-        self.features.add_sublayer(
+        self.features.add_sublayer(
-            "init_conv",
+            "init_conv",
-            nn.Conv2D(
+            nn.Conv2D(
-                in_channels,
+                in_channels,
-                self.num_features,
+                self.num_features,
-                kernel_size=3,
+                kernel_size=3,
-                stride=self.init_stride,
+                stride=self.init_stride,
-                padding=1,
+                padding=1,
-                bias_attr=False, ), )
+                bias_attr=False, ), )
-        for i in range(len(self.stages)):
+        for i in range(len(self.stages)):
-            activation = "HS" if i >= HS_start_block else "ReLU"
+            activation = "HS" if i >= HS_start_block else "ReLU"
-            use_se = True if i >= SE_start_block else False
+            use_se = True if i >= SE_start_block else False
-            # Dense-block i
+            # Dense-block i
-            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
+            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
-                           activation, use_se)
+                           activation, use_se)
-
+
-        self.fc = nn.Linear(self.num_features, fc_channel)
+        self.fc = nn.Linear(self.num_features, fc_channel)
-        self.fc_act = HS()
+        self.fc_act = HS()
-
+
-        # Classifier layer
+        # Classifier layer
-        if class_num > 0:
+        if class_num > 0:
-            self.classifier = nn.Linear(fc_channel, class_num)
+            self.classifier = nn.Linear(fc_channel, class_num)
-        self._initialize()
+        self._initialize()
-
+
-    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
+    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
-                  activation, use_se):
+                  activation, use_se):
-        # Check if ith is the last one
+        # Check if ith is the last one
-        last = i == len(self.stages) - 1
+        last = i == len(self.stages) - 1
-        block = _SFR_DenseBlock(
+        block = _SFR_DenseBlock(
-            num_layers=self.stages[i],
+            num_layers=self.stages[i],
-            in_channels=self.num_features,
+            in_channels=self.num_features,
-            growth_rate=self.growth[i],
+            growth_rate=self.growth[i],
-            group_1x1=group_1x1,
+            group_1x1=group_1x1,
-            group_3x3=group_3x3,
+            group_3x3=group_3x3,
-            group_trans=group_trans,
+            group_trans=group_trans,
-            bottleneck=bottleneck,
+            bottleneck=bottleneck,
-            activation=activation,
+            activation=activation,
-            use_se=use_se, )
+            use_se=use_se, )
-        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
+        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
-        self.num_features += self.stages[i] * self.growth[i]
+        self.num_features += self.stages[i] * self.growth[i]
-        if not last:
+        if not last:
-            trans = _Transition()
+            trans = _Transition()
-            self.features.add_sublayer("transition_%d" % (i + 1), trans)
+            self.features.add_sublayer("transition_%d" % (i + 1), trans)
-        else:
+        else:
-            self.features.add_sublayer("norm_last",
+            self.features.add_sublayer("norm_last",
-                                       nn.BatchNorm2D(self.num_features))
+                                       nn.BatchNorm2D(self.num_features))
-            self.features.add_sublayer("relu_last", nn.ReLU())
+            self.features.add_sublayer("relu_last", nn.ReLU())
-            self.features.add_sublayer("pool_last",
+            self.features.add_sublayer("pool_last",
-                                       nn.AvgPool2D(self.pool_size))
+                                       nn.AvgPool2D(self.pool_size))
-            # if useSE:
+            # if useSE:
-            self.features.add_sublayer(
+            self.features.add_sublayer(
-                "se_last",
+                "se_last",
-                SELayer(
+                SELayer(
-                    self.num_features, reduction=self.last_se_reduction))
+                    self.num_features, reduction=self.last_se_reduction))
-
+
-    def forward(self, x):
+    def forward(self, x):
-        features = self.features(x)
+        features = self.features(x)
-        out = features.reshape((features.shape[0], features.shape[1] *
+        out = features.reshape((features.shape[0], features.shape[1] *
-                                features.shape[2] * features.shape[3]))
+                                features.shape[2] * features.shape[3]))
-        out = self.fc(out)
+        out = self.fc(out)
-        out = self.fc_act(out)
+        out = self.fc_act(out)
-
+
-        if self.class_num > 0:
+        if self.class_num > 0:
-            out = self.classifier(out)
+            out = self.classifier(out)
-
+
-        return out
+        return out
-
+
-    def _initialize(self):
+    def _initialize(self):
-        # Initialize
+        # Initialize
-        for m in self.sublayers():
+        for m in self.sublayers():
-            if isinstance(m, nn.Conv2D):
+            if isinstance(m, nn.Conv2D):
-                nn.initializer.KaimingNormal()(m.weight)
+                nn.initializer.KaimingNormal()(m.weight)
-            elif isinstance(m, nn.BatchNorm2D):
+            elif isinstance(m, nn.BatchNorm2D):
-                nn.initializer.Constant(value=1.0)(m.weight)
+                nn.initializer.Constant(value=1.0)(m.weight)
-                nn.initializer.Constant(value=0.0)(m.bias)
+                nn.initializer.Constant(value=0.0)(m.bias)
-
+
-
+
-def CondenseNetV2_a(**kwargs):
+def CondenseNetV2_A(**kwargs):
-    model = CondenseNetV2(
+    model = CondenseNetV2(
-        stages=[1, 1, 4, 6, 8],
+        stages=[1, 1, 4, 6, 8],
-        growth=[8, 8, 16, 32, 64],
+        growth=[8, 8, 16, 32, 64],
-        HS_start_block=2,
+        HS_start_block=2,
-        SE_start_block=3,
+        SE_start_block=3,
-        fc_channel=828,
+        fc_channel=828,
-        group_1x1=8,
+        group_1x1=8,
-        group_3x3=8,
+        group_3x3=8,
-        group_trans=8,
+        group_trans=8,
-        bottleneck=4,
+        bottleneck=4,
-        last_se_reduction=16,
+        last_se_reduction=16,
-        **kwargs)
+        **kwargs)
-    return model
+    return model
-
+
-
+
-def CondenseNetV2_b(**kwargs):
+def CondenseNetV2_B(**kwargs):
-    model = CondenseNetV2(
+    model = CondenseNetV2(
-        stages=[2, 4, 6, 8, 6],
+        stages=[2, 4, 6, 8, 6],
-        growth=[6, 12, 24, 48, 96],
+        growth=[6, 12, 24, 48, 96],
-        HS_start_block=2,
+        HS_start_block=2,
-        SE_start_block=3,
+        SE_start_block=3,
-        fc_channel=1024,
+        fc_channel=1024,
-        group_1x1=6,
+        group_1x1=6,
-        group_3x3=6,
+        group_3x3=6,
-        group_trans=6,
+        group_trans=6,
-        bottleneck=4,
+        bottleneck=4,
-        last_se_reduction=16,
+        last_se_reduction=16,
-        **kwargs)
+        **kwargs)
-    return model
+    return model
-
+
-
+
-def CondenseNetV2_c(**kwargs):
+def CondenseNetV2_C(**kwargs):
-    model = CondenseNetV2(
+    model = CondenseNetV2(
-        stages=[4, 6, 8, 10, 8],
+        stages=[4, 6, 8, 10, 8],
-        growth=[8, 16, 32, 64, 128],
+        growth=[8, 16, 32, 64, 128],
-        HS_start_block=2,
+        HS_start_block=2,
-        SE_start_block=3,
+        SE_start_block=3,
-        fc_channel=1024,
+        fc_channel=1024,
-        group_1x1=8,
+        group_1x1=8,
-        group_3x3=8,
+        group_3x3=8,
-        group_trans=8,
+        group_trans=8,
-        bottleneck=4,
+        bottleneck=4,
-        last_se_reduction=16,
+        last_se_reduction=16,
-        **kwargs)
+        **kwargs)
-    return model
+    return model
--- a/paddlers/tasks/change_detector.py
+++ b/paddlers/tasks/change_detector.py
@ -1067,7 +1067,7 @@ class FCCDN(BaseChangeDetector):
            return {
                'types':
                [seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss],
-                'coef': [1.0, 1.0]
+                'coef': [1.0, 0.2]
            }
        else:
            raise ValueError(
--- a/paddlers/tasks/classifier.py
+++ b/paddlers/tasks/classifier.py
@ -34,9 +34,7 @@ from paddlers.utils.checkpoint import cls_pretrain_weights_dict
 from paddlers.transforms import Resize, decode_image
 from .base import BaseModel
-__all__ = [
+__all__ = ["ResNet50_vd", "MobileNetV3", "HRNet", "CondenseNetV2"]
    "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
 ]
 class BaseClassifier(BaseModel):
@ -600,13 +598,13 @@ class ResNet50_vd(BaseClassifier):
            **params)
-class MobileNetV3_small_x1_0(BaseClassifier):
+class MobileNetV3(BaseClassifier):
    def __init__(self,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
                 **params):
-        super(MobileNetV3_small_x1_0, self).__init__(
+        super(MobileNetV3, self).__init__(
            model_name='MobileNetV3_small_x1_0',
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
@ -614,13 +612,13 @@ class MobileNetV3_small_x1_0(BaseClassifier):
            **params)
-class HRNet_W18_C(BaseClassifier):
+class HRNet(BaseClassifier):
    def __init__(self,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
                 **params):
-        super(HRNet_W18_C, self).__init__(
+        super(HRNet, self).__init__(
            model_name='HRNet_W18_C',
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
@ -628,15 +626,21 @@ class HRNet_W18_C(BaseClassifier):
            **params)
-class CondenseNetV2_b(BaseClassifier):
+class CondenseNetV2(BaseClassifier):
    def __init__(self,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
                 in_channels=3,
                 arch='A',
                 **params):
-        super(CondenseNetV2_b, self).__init__(
+        if arch not in ('A', 'B', 'C'):
-            model_name='CondenseNetV2_b',
+            raise ValueError("{} is not a supported architecture.".format(arch))
        model_name = 'CondenseNetV2_' + arch
        super(CondenseNetV2, self).__init__(
            model_name=model_name,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
            in_channels=in_channels,
            **params)
--- a/paddlers/tasks/restorer.py
+++ b/paddlers/tasks/restorer.py
@ -773,7 +773,7 @@ class LESRCNN(BaseRestorer):
                 group=1,
                 **params):
        params.update({
-            'scale': sr_factor,
+            'scale': sr_factor if sr_factor is not None else 1,
            'multi_scale': multi_scale,
            'group': group
        })
--- a/paddlers/tasks/segmenter.py
+++ b/paddlers/tasks/segmenter.py
@ -185,14 +185,7 @@ class BaseSegmenter(BaseModel):
                )
            losses = [getattr(seg_losses, loss)() for loss in losses]
            loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))]
-        if self.model_name == 'FastSCNN':
+        loss_coef = [1.0]
            loss_type *= 2
            loss_coef = [1.0, 0.4]
        elif self.model_name == 'BiSeNetV2':
            loss_type *= 5
            loss_coef = [1.0] * 5
        else:
            loss_coef = [1.0]
        losses = {'types': loss_type, 'coef': loss_coef}
        return losses
@ -761,7 +754,7 @@ class UNet(BaseSegmenter):
        })
        super(UNet, self).__init__(
            model_name='UNet',
-            input_channel=in_channels,
+            in_channels=in_channels,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
@ -789,7 +782,7 @@ class DeepLabV3P(BaseSegmenter):
        if params.get('with_net', True):
            with DisablePrint():
                backbone = getattr(ppseg.models, backbone)(
-                    input_channel=in_channels, output_stride=output_stride)
+                    in_channels=in_channels, output_stride=output_stride)
        else:
            backbone = None
        params.update({
@ -809,6 +802,7 @@ class DeepLabV3P(BaseSegmenter):
 class FastSCNN(BaseSegmenter):
    def __init__(self,
                 in_channels=3,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
@ -817,14 +811,22 @@ class FastSCNN(BaseSegmenter):
        params.update({'align_corners': align_corners})
        super(FastSCNN, self).__init__(
            model_name='FastSCNN',
            in_channels=in_channels,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
            **params)
    def default_loss(self):
        losses = super(FastSCNN, self).default_loss()
        losses['types'] *= 2
        losses['coef'] = [1.0, 0.4]
        return losses
 class HRNet(BaseSegmenter):
    def __init__(self,
                 in_channels=3,
                 num_classes=2,
                 width=48,
                 use_mixed_loss=False,
@ -839,7 +841,7 @@ class HRNet(BaseSegmenter):
        if params.get('with_net', True):
            with DisablePrint():
                backbone = getattr(ppseg.models, self.backbone_name)(
-                    align_corners=align_corners)
+                    in_channels=in_channels, align_corners=align_corners)
        else:
            backbone = None
@ -855,6 +857,7 @@ class HRNet(BaseSegmenter):
 class BiSeNetV2(BaseSegmenter):
    def __init__(self,
                 in_channels=3,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
@ -863,11 +866,18 @@ class BiSeNetV2(BaseSegmenter):
        params.update({'align_corners': align_corners})
        super(BiSeNetV2, self).__init__(
            model_name='BiSeNetV2',
            in_channels=in_channels,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
            **params)
    def default_loss(self):
        losses = super(BiSeNetV2, self).default_loss()
        losses['types'] *= 5
        losses['coef'] = [1.0] * 5
        return losses
 class FarSeg(BaseSegmenter):
    def __init__(self,
--- a/paddlers/utils/checkpoint.py
+++ b/paddlers/utils/checkpoint.py
@ -493,11 +493,12 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
            num_params_loaded = 0
            for k in model_state_dict:
                if k not in param_state_dict:
-                    logging.warning("{} is not in pretrained model".format(k))
+                    logging.warning("{} is not in the pretrained model.".format(
                        k))
                elif list(param_state_dict[k].shape) != list(model_state_dict[k]
                                                             .shape):
                    logging.warning(
-                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
+                        "[SKIP] Shape of parameters {} do not match. (pretrained: {} vs actual: {})"
                        .format(k, param_state_dict[k].shape, model_state_dict[
                            k].shape))
                else:
@ -507,11 +508,11 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
            logging.info("There are {}/{} variables loaded into {}.".format(
                num_params_loaded, len(model_state_dict), model_name))
        else:
-            raise ValueError('The pretrained model directory is not Found: {}'.
+            raise ValueError('The pretrained model directory is not found: {}'.
                             format(pretrain_weights))
    else:
        logging.info(
-            'No pretrained model to load, {} will be trained from scratch.'.
+            'No pretrained model to load. {} will be trained from scratch.'.
            format(model_name))
--- a/test_tipc/README.md
+++ b/test_tipc/README.md
@ -32,6 +32,7 @@
 | 变化检测 | FC-Siam-conc | 支持 | - | - | - |
 | 变化检测 | FC-Siam-diff | 支持 | - | - | - |
 | 变化检测 | ChangeFormer | 支持 | - | - | - |
 | 场景分类 | CondenseNet V2 | 支持 | - | - | - |
 | 场景分类 | HRNet | 支持 | - | - | - |
 | 场景分类 | MobileNetV3 | 支持 | - | - | - |
 | 场景分类 | ResNet50-vd | 支持 | - | - | - |
@ -43,8 +44,11 @@
 | 目标检测 | PP-YOLO Tiny | 支持 | - | - | - |
 | 目标检测 | PP-YOLOv2 | 支持 | - | - | - |
 | 目标检测 | YOLOv3 | 支持 | - | - | - |
 | 图像分割 | BiSeNet V2 | 支持 | - | - | - |
 | 图像分割 | DeepLab V3+ | 支持 | - | - | - |
 | 图像分割 | FarSeg | 支持 | - | - | - |
 | 图像分割 | Fast-SCNN | 支持 | - | - | - |
 | 图像分割 | HRNet | 支持 | - | - | - |
 | 图像分割 | UNet | 支持 | - | - | - |
 ## 3 测试工具简介
--- a/test_tipc/config_utils.py
+++ b/test_tipc/config_utils.py
@ -119,6 +119,7 @@ def parse_args(*args, **kwargs):
    # Global settings
    parser.add_argument('cmd', choices=['train', 'eval'])
    parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
    parser.add_argument('--seed', type=int, default=None)
    # Data
    parser.add_argument('--datasets', type=dict, default={})
--- a/test_tipc/configs/cd/_base_/airchange.yaml
+++ b/test_tipc/configs/cd/_base_/airchange.yaml
@ -1,5 +1,7 @@
 # Basic configurations of AirChange dataset
 seed: 1024
 datasets:
    train: !Node
        type: CDDataset
--- a/test_tipc/configs/cd/_base_/levircd.yaml
+++ b/test_tipc/configs/cd/_base_/levircd.yaml
@ -1,5 +1,7 @@
 # Basic configurations of LEVIR-CD dataset
 seed: 1024
 datasets:
    train: !Node
        type: CDDataset
--- a/test_tipc/configs/cd/bit/bit.yaml
+++ b/test_tipc/configs/cd/bit/bit.yaml
@ -1,8 +0,0 @@
 # Basic configurations of BIT
 _base_: ../_base_/airchange.yaml
 save_dir: ./test_tipc/output/cd/bit/
 model: !Node
    type: BIT
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef`