Merge pull request #40 from Bobholamovic/update_ppseg

[Feat] Update ppseg and Add CondenseNet V2
2 years ago · ebceda8419
parent f403abcadd afec4186fe
commit ebceda8419
143 changed files with 8360 additions and 1965 deletions
--- a/docs/apis/train.md
+++ b/docs/apis/train.md
@ -34,7 +34,7 @@

 ### 初始化`BaseSegmenter`子类对象

- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数，分别表示输入通道数、输出类别数以及是否使用预置的混合损失。部分模型如`FarSeg`暂不支持对`in_channels`参数的设置。
+- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数，分别表示输入通道数、输出类别数以及是否使用预置的混合损失。
 - `use_mixed_loss`参将在未来被弃用，因此不建议使用。
 - 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典，其中`'types'`键和`'coef'`键对应的值为两个等长的列表，分别表示损失函数对象（一个可调用对象）和损失函数的权重。例如：`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数：`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`，其中`logits`和`labels`分别是模型输出和真值标签。
 - 不同的子类支持与模型相关的输入参数，详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。
--- a/docs/intro/model_zoo.md
+++ b/docs/intro/model_zoo.md
@ -20,18 +20,21 @@ PaddleRS目前已支持的全部模型如下（标注\*的为遥感专用模型
 | 变化检测 | \*FCCDN | 是 |
 | 变化检测 | \*SNUNet | 是 |
 | 变化检测 | \*STANet | 是 |
-| 场景分类 | CondenseNetV2 | 是 |
-| 场景分类 | HRNet | 是 |
-| 场景分类 | MobileNetV3 | 是 |
-| 场景分类 | ResNet50-vd | 是 |
+| 场景分类 | CondenseNet V2 | 是 |
+| 场景分类 | HRNet | 否 |
+| 场景分类 | MobileNetV3 | 否 |
+| 场景分类 | ResNet50-vd | 否 |
 | 图像复原 | DRN | 否 |
-| 图像复原 | ESRGAN | 否 |
+| 图像复原 | ESRGAN | 是 |
 | 图像复原 | LESRCNN | 否 |
-| 目标检测 | Faster R-CNN | 是 |
-| 目标检测 | PP-YOLO | 是 |
-| 目标检测 | PP-YOLO Tiny | 是 |
-| 目标检测 | PP-YOLOv2 | 是 |
-| 目标检测 | YOLOv3 | 是 |
+| 目标检测 | Faster R-CNN | 否 |
+| 目标检测 | PP-YOLO | 否 |
+| 目标检测 | PP-YOLO Tiny | 否 |
+| 目标检测 | PP-YOLOv2 | 否 |
+| 目标检测 | YOLOv3 | 否 |
+| 图像分割 | BiSeNet V2 | 是 |
 | 图像分割 | DeepLab V3+ | 是 |
-| 图像分割 | \*FarSeg | 否 |
+| 图像分割 | \*FarSeg | 是 |
+| 图像分割 | Fast-SCNN | 是 |
+| 图像分割 | HRNet | 是 |
 | 图像分割 | UNet | 是 |
--- a/examples/README.md
+++ b/examples/README.md
@ -53,3 +53,4 @@ PaddleRS提供从科学研究到产业应用的丰富示例，希望帮助遥感
 |[【官方】第十一届 “中国软件杯”百度遥感赛项：目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测，比赛基线|
 |[【十一届软件杯】遥感解译赛道：变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测，高分方案|
 |[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测，高分方案|
+|[遥感变化检测助力信贷场景下工程进度管控](https://aistudio.baidu.com/aistudio/projectdetail/4543160)|古代飞|产业范例|变化检测，金融风控|
--- a/examples/rs_research/config_utils.py
+++ b/examples/rs_research/config_utils.py
@ -133,6 +133,7 @@ def parse_args(*args, **kwargs):
    # Global settings
    parser.add_argument('cmd', choices=['train', 'eval'])
    parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
+    parser.add_argument('--seed', type=int, default=None)

    # Data
    parser.add_argument('--datasets', type=dict, default={})
--- a/examples/rs_research/run_task.py
+++ b/examples/rs_research/run_task.py
@ -15,7 +15,9 @@
 # limitations under the License.

 import os
+import random

+import numpy as np
 # Import cv2 and sklearn before paddlers to solve the
 # "ImportError: dlopen: cannot load any more object with static TLS" issue.
 import cv2
@ -62,6 +64,11 @@ if __name__ == '__main__':
    cfg = parse_args()
    print(format_cfg(cfg))

+    if cfg['seed'] is not None:
+        random.seed(cfg['seed'])
+        np.random.seed(cfg['seed'])
+        paddle.seed(cfg['seed'])
+
    # Automatically download data
    if cfg['download_on']:
        paddlers.utils.download_and_decompress(
--- a/paddlers/deploy/predictor.py
+++ b/paddlers/deploy/predictor.py
@ -103,11 +103,11 @@ class Predictor(object):
            config.enable_use_gpu(200, gpu_id)
            config.switch_ir_optim(True)
            if use_trt:
-                if self._model.model_type == 'segmenter':
+                if self.model_type == 'segmenter':
                    logging.warning(
                        "Semantic segmentation models do not support TensorRT acceleration, "
                        "TensorRT is forcibly disabled.")
-                elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
+                elif self.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
                    logging.warning(
                        "RCNN models do not support TensorRT acceleration, "
                        "TensorRT is forcibly disabled.")
@ -150,30 +150,29 @@ class Predictor(object):
    def preprocess(self, images, transforms):
        preprocessed_samples = self._model.preprocess(
            images, transforms, to_tensor=False)
-        if self._model.model_type == 'classifier':
+        if self.model_type == 'classifier':
            preprocessed_samples = {'image': preprocessed_samples[0]}
-        elif self._model.model_type == 'segmenter':
+        elif self.model_type == 'segmenter':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'ori_shape': preprocessed_samples[1]
            }
-        elif self._model.model_type == 'detector':
+        elif self.model_type == 'detector':
            pass
-        elif self._model.model_type == 'change_detector':
+        elif self.model_type == 'change_detector':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'image2': preprocessed_samples[1],
                'ori_shape': preprocessed_samples[2]
            }
-        elif self._model.model_type == 'restorer':
+        elif self.model_type == 'restorer':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'tar_shape': preprocessed_samples[1]
            }
        else:
            logging.error(
-                "Invalid model type {}".format(self._model.model_type),
-                exit=True)
+                "Invalid model type {}".format(self.model_type), exit=True)
        return preprocessed_samples

    def postprocess(self,
@ -182,7 +181,7 @@ class Predictor(object):
                    ori_shape=None,
                    tar_shape=None,
                    transforms=None):
-        if self._model.model_type == 'classifier':
+        if self.model_type == 'classifier':
            true_topk = min(self._model.num_classes, topk)
            if self._model.postprocess is None:
                self._model.build_postprocess_from_labels(topk)
@ -198,7 +197,7 @@ class Predictor(object):
                'scores_map': s,
                'label_names_map': n,
            } for l, s, n in zip(class_ids, scores, label_names)]
-        elif self._model.model_type in ('segmenter', 'change_detector'):
+        elif self.model_type in ('segmenter', 'change_detector'):
            label_map, score_map = self._model.postprocess(
                net_outputs,
                batch_origin_shape=ori_shape,
@ -207,13 +206,13 @@ class Predictor(object):
                'label_map': l,
                'score_map': s
            } for l, s in zip(label_map, score_map)]
-        elif self._model.model_type == 'detector':
+        elif self.model_type == 'detector':
            net_outputs = {
                k: v
                for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
            }
            preds = self._model.postprocess(net_outputs)
-        elif self._model.model_type == 'restorer':
+        elif self.model_type == 'restorer':
            res_maps = self._model.postprocess(
                net_outputs[0],
                batch_tar_shape=tar_shape,
@ -221,8 +220,7 @@ class Predictor(object):
            preds = [{'res_map': res_map} for res_map in res_maps]
        else:
            logging.error(
-                "Invalid model type {}.".format(self._model.model_type),
-                exit=True)
+                "Invalid model type {}.".format(self.model_type), exit=True)

        return preds

@ -360,6 +358,12 @@ class Predictor(object):
            batch_size (int, optional): Batch size used in inference. Defaults to 1.
            quiet (bool, optional): If True, disable the progress bar. Defaults to False.
        """
+
+        if self.model_type not in ('segmenter', 'change_detector'):
+            raise RuntimeError(
+                "Model type is {}, which does not support inference with sliding windows.".
+                format(self.model_type))
+
        slider_predict(
            partial(
                self.predict, quiet=True),
@ -375,3 +379,7 @@ class Predictor(object):

    def batch_predict(self, image_list, **params):
        return self.predict(img_file=image_list, **params)
+
+    @property
+    def model_type(self):
+        return self._model.model_type
--- a/paddlers/models/hash.txt
+++ b/paddlers/models/hash.txt
@ -0,0 +1 @@
+ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef
--- a/paddlers/models/ppseg/init.py
+++ b/paddlers/models/ppseg/init.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/core/infer.py
+++ b/paddlers/models/ppseg/core/infer.py
@ -21,88 +21,16 @@ import paddle
 import paddle.nn.functional as F


-def get_reverse_list(ori_shape, transforms):
-    """
-    get reverse list of transform.
-
-    Args:
-        ori_shape (list): Origin shape of image.
-        transforms (list): List of transform.
-
-    Returns:
-        list: List of tuple, there are two format:
-            ('resize', (h, w)) The image shape before resize,
-            ('padding', (h, w)) The image shape before padding.
-    """
-    reverse_list = []
-    h, w = ori_shape[0], ori_shape[1]
-    for op in transforms:
-        if op.__class__.__name__ in ['Resize']:
-            reverse_list.append(('resize', (h, w)))
-            h, w = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['ResizeByLong']:
-            reverse_list.append(('resize', (h, w)))
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            short_edge = int(round(short_edge * op.long_size / long_edge))
-            long_edge = op.long_size
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-        if op.__class__.__name__ in ['ResizeByShort']:
-            reverse_list.append(('resize', (h, w)))
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            long_edge = int(round(long_edge * op.short_size / short_edge))
-            short_edge = op.short_size
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-        if op.__class__.__name__ in ['Pad']:
-            reverse_list.append(('padding', (h, w)))
-            w, h = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['PadByAspectRatio']:
-            reverse_list.append(('padding', (h, w)))
-            ratio = w / h
-            if ratio == op.aspect_ratio:
-                pass
-            elif ratio > op.aspect_ratio:
-                h = int(w / op.aspect_ratio)
-            else:
-                w = int(h * op.aspect_ratio)
-        if op.__class__.__name__ in ['LimitLong']:
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            if ((op.max_long is not None) and (long_edge > op.max_long)):
-                reverse_list.append(('resize', (h, w)))
-                long_edge = op.max_long
-                short_edge = int(round(short_edge * op.max_long / long_edge))
-            elif ((op.min_long is not None) and (long_edge < op.min_long)):
-                reverse_list.append(('resize', (h, w)))
-                long_edge = op.min_long
-                short_edge = int(round(short_edge * op.min_long / long_edge))
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-    return reverse_list
-
-
-def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
+def reverse_transform(pred, trans_info, mode='nearest'):
    """recover pred to origin shape"""
-    reverse_list = get_reverse_list(ori_shape, transforms)
    intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
    dtype = pred.dtype
-    for item in reverse_list[::-1]:
-        if item[0] == 'resize':
+    for item in trans_info[::-1]:
+        if isinstance(item[0], list):
+            trans_mode = item[0][0]
+        else:
+            trans_mode = item[0]
+        if trans_mode == 'resize':
            h, w = item[1][0], item[1][1]
            if paddle.get_device() == 'cpu' and dtype in intTypeList:
                pred = paddle.cast(pred, 'float32')
@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
                pred = paddle.cast(pred, dtype)
            else:
                pred = F.interpolate(pred, (h, w), mode=mode)
-        elif item[0] == 'padding':
+        elif trans_mode == 'padding':
            h, w = item[1][0], item[1][1]
            pred = pred[:, :, 0:h, 0:w]
        else:
@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):

 def inference(model,
              im,
-              ori_shape=None,
-              transforms=None,
+              trans_info=None,
              is_slide=False,
              stride=None,
              crop_size=None):
@ -216,8 +143,7 @@ def inference(model,
    Args:
        model (paddle.nn.Layer): model to get logits of image.
        im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
-        transforms (list): Transforms for image.
+        trans_info (list): Image shape informating changed process. Default: None.
        is_slide (bool): Whether to infer by sliding window. Default: False.
        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
@ -239,8 +165,8 @@ def inference(model,
        logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
    if hasattr(model, 'data_format') and model.data_format == 'NHWC':
        logit = logit.transpose((0, 3, 1, 2))
-    if ori_shape is not None:
-        logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
+    if trans_info is not None:
+        logit = reverse_transform(logit, trans_info, mode='bilinear')
        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
        return pred, logit
    else:
@ -249,8 +175,7 @@ def inference(model,

 def aug_inference(model,
                  im,
-                  ori_shape,
-                  transforms,
+                  trans_info,
                  scales=1.0,
                  flip_horizontal=False,
                  flip_vertical=False,
@ -263,8 +188,7 @@ def aug_inference(model,
    Args:
        model (paddle.nn.Layer): model to get logits of image.
        im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
-        transforms (list): Transforms for image.
+        trans_info (list): Transforms for image.
        scales (float|tuple|list):  Scales for resize. Default: 1.
        flip_horizontal (bool): Whether to flip horizontally. Default: False.
        flip_vertical (bool): Whether to flip vertically. Default: False.
@ -302,8 +226,7 @@ def aug_inference(model,
            logit = F.softmax(logit, axis=1)
            final_logit = final_logit + logit

-    final_logit = reverse_transform(
-        final_logit, ori_shape, transforms, mode='bilinear')
+    final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
    pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')

    return pred, final_logit
--- a/paddlers/models/ppseg/core/predict.py
+++ b/paddlers/models/ppseg/core/predict.py
@ -36,6 +36,15 @@ def partition_list(arr, m):
    return [arr[i:i + n] for i in range(0, len(arr), n)]


+def preprocess(im_path, transforms):
+    data = {}
+    data['img'] = im_path
+    data = transforms(data)
+    data['img'] = data['img'][np.newaxis, ...]
+    data['img'] = paddle.to_tensor(data['img'])
+    return data
+
+
 def predict(model,
            model_path,
            transforms,
@ -89,18 +98,13 @@ def predict(model,
    color_map = visualize.get_color_map_list(256, custom_color=custom_color)
    with paddle.no_grad():
        for i, im_path in enumerate(img_lists[local_rank]):
-            im = cv2.imread(im_path)
-            ori_shape = im.shape[:2]
-            im, _ = transforms(im)
-            im = im[np.newaxis, ...]
-            im = paddle.to_tensor(im)
+            data = preprocess(im_path, transforms)

            if aug_pred:
                pred, _ = infer.aug_inference(
                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=transforms.transforms,
+                    data['img'],
+                    trans_info=data['trans_info'],
                    scales=scales,
                    flip_horizontal=flip_horizontal,
                    flip_vertical=flip_vertical,
@ -110,9 +114,8 @@ def predict(model,
            else:
                pred, _ = infer.inference(
                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=transforms.transforms,
+                    data['img'],
+                    trans_info=data['trans_info'],
                    is_slide=is_slide,
                    stride=stride,
                    crop_size=crop_size)
@ -141,9 +144,4 @@ def predict(model,
            mkdir(pred_saved_path)
            pred_mask.save(pred_saved_path)

-            # pred_im = utils.visualize(im_path, pred, weight=0.0)
-            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
-            # mkdir(pred_saved_path)
-            # cv2.imwrite(pred_saved_path, pred_im)
-
            progbar_pred.update(i + 1)
--- a/paddlers/models/ppseg/core/train.py
+++ b/paddlers/models/ppseg/core/train.py
@ -35,17 +35,15 @@ def check_logits_losses(logits_list, losses):
            .format(len_logits, len_losses))


-def loss_computation(logits_list, labels, losses, edges=None):
+def loss_computation(logits_list, labels, edges, losses):
    check_logits_losses(logits_list, losses)
    loss_list = []
    for i in range(len(logits_list)):
        logits = logits_list[i]
        loss_i = losses['types'][i]
        coef_i = losses['coef'][i]
-
-        if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
-                                         ) and loss_i.edge_label:
-            # If use edges as labels According to loss type.
+        if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
+            # Use edges as labels According to loss type.
            loss_list.append(coef_i * loss_i(logits, edges))
        elif loss_i.__class__.__name__ == 'MixedLoss':
            mixed_loss_list = loss_i(logits, labels)
@ -75,13 +73,14 @@ def train(model,
          keep_checkpoint_max=5,
          test_config=None,
          precision='fp32',
+          amp_level='O1',
          profiler_options=None,
          to_static_training=False):
    """
    Launch training.

    Args:
-        model（nn.Layer): A sementic segmentation model.
+        model（nn.Layer): A semantic segmentation model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
@ -98,6 +97,9 @@ def train(model,
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
        test_config(dict, optional): Evaluation config.
        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, 
+            the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators 
+            parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
        profiler_options (str, optional): The option of train profiler.
        to_static_training (bool, optional): Whether to use @to_static for training.
    """
@ -112,7 +114,18 @@ def train(model,
    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
-        os.makedirs(save_dir)
+        os.makedirs(save_dir, exist_ok=True)
+
+    # use amp
+    if precision == 'fp16':
+        logger.info('use AMP to train. AMP level = {}'.format(amp_level))
+        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
+        if amp_level == 'O2':
+            model, optimizer = paddle.amp.decorate(
+                models=model,
+                optimizers=optimizer,
+                level='O2',
+                save_dtype='float32')

    if nranks > 1:
        paddle.distributed.fleet.init(is_collective=True)
@ -130,18 +143,13 @@ def train(model,
        return_list=True,
        worker_init_fn=worker_init_fn, )

-    # use amp
-    if precision == 'fp16':
-        logger.info('use amp to train')
-        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
-
    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    if to_static_training:
        model = paddle.jit.to_static(model)
-        logger.info("Successfully to apply @to_static")
+        logger.info("Successfully applied @to_static")

    avg_loss = 0.0
    avg_loss_list = []
@ -164,30 +172,29 @@ def train(model,
                else:
                    break
            reader_cost_averager.record(time.time() - batch_start)
-            images = data[0]
-            labels = data[1].astype('int64')
+            images = data['img']
+            labels = data['label'].astype('int64')
            edges = None
-            if len(data) == 3:
-                edges = data[2].astype('int64')
+            if 'edge' in data.keys():
+                edges = data['edge'].astype('int64')
            if hasattr(model, 'data_format') and model.data_format == 'NHWC':
                images = images.transpose((0, 2, 3, 1))

            if precision == 'fp16':
                with paddle.amp.auto_cast(
+                        level=amp_level,
                        enable=True,
                        custom_white_list={
                            "elementwise_add", "batch_norm", "sync_batch_norm"
                        },
                        custom_black_list={'bilinear_interp_v2'}):
-                    if nranks > 1:
-                        logits_list = ddp_model(images)
-                    else:
-                        logits_list = model(images)
+                    logits_list = ddp_model(images) if nranks > 1 else model(
+                        images)
                    loss_list = loss_computation(
                        logits_list=logits_list,
                        labels=labels,
-                        losses=losses,
-                        edges=edges)
+                        edges=edges,
+                        losses=losses)
                    loss = sum(loss_list)

                scaled = scaler.scale(loss)  # scale the loss
@ -197,15 +204,12 @@ def train(model,
                else:
                    scaler.minimize(optimizer, scaled)  # update parameters
            else:
-                if nranks > 1:
-                    logits_list = ddp_model(images)
-                else:
-                    logits_list = model(images)
+                logits_list = ddp_model(images) if nranks > 1 else model(images)
                loss_list = loss_computation(
                    logits_list=logits_list,
                    labels=labels,
-                    losses=losses,
-                    edges=edges)
+                    edges=edges,
+                    losses=losses)
                loss = sum(loss_list)
                loss.backward()
                # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
@ -278,7 +282,12 @@ def train(model,
                    test_config = {}

                mean_iou, acc, _, _, _ = evaluate(
-                    model, val_dataset, num_workers=num_workers, **test_config)
+                    model,
+                    val_dataset,
+                    num_workers=num_workers,
+                    precision=precision,
+                    amp_level=amp_level,
+                    **test_config)

                model.train()

@ -314,7 +323,7 @@ def train(model,
            batch_start = time.time()

    # Calculate flops.
-    if local_rank == 0:
+    if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
        _, c, h, w = images.shape
        _ = paddle.flops(
            model, [1, c, h, w],
--- a/paddlers/models/ppseg/core/val.py
+++ b/paddlers/models/ppseg/core/val.py
@ -34,6 +34,8 @@ def evaluate(model,
             is_slide=False,
             stride=None,
             crop_size=None,
+             precision='fp32',
+             amp_level='O1',
             num_workers=0,
             print_detail=True,
             auc_roc=False):
@ -41,7 +43,7 @@ def evaluate(model,
    Launch evalution.

    Args:
-        model（nn.Layer): A sementic segmentation model.
+        model（nn.Layer): A semantic segmentation model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
        scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
@ -52,6 +54,8 @@ def evaluate(model,
            It should be provided when `is_slide` is True.
        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
            It should be provided when `is_slide` is True.
+        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
        num_workers (int, optional): Num workers for data loader. Default: 0.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
        auc_roc(bool, optional): whether add auc_roc metric
@ -93,32 +97,66 @@ def evaluate(model,
    batch_cost_averager = TimeAverager()
    batch_start = time.time()
    with paddle.no_grad():
-        for iter, (im, label) in enumerate(loader):
+        for iter, data in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)
-            label = label.astype('int64')
+            label = data['label'].astype('int64')

-            ori_shape = label.shape[-2:]
            if aug_eval:
-                pred, logits = infer.aug_inference(
-                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=eval_dataset.transforms.transforms,
-                    scales=scales,
-                    flip_horizontal=flip_horizontal,
-                    flip_vertical=flip_vertical,
-                    is_slide=is_slide,
-                    stride=stride,
-                    crop_size=crop_size)
+                if precision == 'fp16':
+                    with paddle.amp.auto_cast(
+                            level=amp_level,
+                            enable=True,
+                            custom_white_list={
+                                "elementwise_add", "batch_norm",
+                                "sync_batch_norm"
+                            },
+                            custom_black_list={'bilinear_interp_v2'}):
+                        pred, logits = infer.aug_inference(
+                            model,
+                            data['img'],
+                            trans_info=data['trans_info'],
+                            scales=scales,
+                            flip_horizontal=flip_horizontal,
+                            flip_vertical=flip_vertical,
+                            is_slide=is_slide,
+                            stride=stride,
+                            crop_size=crop_size)
+                else:
+                    pred, logits = infer.aug_inference(
+                        model,
+                        data['img'],
+                        trans_info=data['trans_info'],
+                        scales=scales,
+                        flip_horizontal=flip_horizontal,
+                        flip_vertical=flip_vertical,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
            else:
-                pred, logits = infer.inference(
-                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=eval_dataset.transforms.transforms,
-                    is_slide=is_slide,
-                    stride=stride,
-                    crop_size=crop_size)
+                if precision == 'fp16':
+                    with paddle.amp.auto_cast(
+                            level=amp_level,
+                            enable=True,
+                            custom_white_list={
+                                "elementwise_add", "batch_norm",
+                                "sync_batch_norm"
+                            },
+                            custom_black_list={'bilinear_interp_v2'}):
+                        pred, logits = infer.inference(
+                            model,
+                            data['img'],
+                            trans_info=data['trans_info'],
+                            is_slide=is_slide,
+                            stride=stride,
+                            crop_size=crop_size)
+                else:
+                    pred, logits = infer.inference(
+                        model,
+                        data['img'],
+                        trans_info=data['trans_info'],
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)

            intersect_area, pred_area, label_area = metrics.calculate_area(
                pred,
@ -175,12 +213,12 @@ def evaluate(model,
            batch_cost_averager.reset()
            batch_start = time.time()

-    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
-                                       label_area_all)
-    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
-    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
-    class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
-                                     label_area_all)
+    metrics_input = (intersect_area_all, pred_area_all, label_area_all)
+    class_iou, miou = metrics.mean_iou(*metrics_input)
+    acc, class_precision, class_recall = metrics.class_measurement(
+        *metrics_input)
+    kappa = metrics.kappa(*metrics_input)
+    class_dice, mdice = metrics.dice(*metrics_input)

    if auc_roc:
        auc_roc = metrics.auc_roc(
@ -193,5 +231,7 @@ def evaluate(model,
        infor = infor + auc_infor if auc_roc else infor
        logger.info(infor)
        logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
-        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
-    return miou, acc, class_iou, class_acc, kappa
+        logger.info("[EVAL] Class Precision: \n" + str(
+            np.round(class_precision, 4)))
+        logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
+    return miou, acc, class_iou, class_precision, kappa
--- a/paddlers/models/ppseg/cvlibs/config.py
+++ b/paddlers/models/ppseg/cvlibs/config.py
@ -15,9 +15,12 @@
 import codecs
 import os
 from typing import Any, Dict, Generic
+import warnings
+from ast import literal_eval

 import paddle
 import yaml
+import six

 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import logger
@ -69,7 +72,8 @@ class Config(object):
                 path: str,
                 learning_rate: float=None,
                 batch_size: int=None,
-                 iters: int=None):
+                 iters: int=None,
+                 opts: list=None):
        if not path:
            raise ValueError('Please specify the configuration file path.')

@ -84,7 +88,18 @@ class Config(object):
            raise RuntimeError('Config file should in yaml format!')

        self.update(
-            learning_rate=learning_rate, batch_size=batch_size, iters=iters)
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            iters=iters,
+            opts=opts)
+
+        model_cfg = self.dic.get('model', None)
+        if model_cfg is None:
+            raise RuntimeError('No model specified in the configuration file.')
+        if (not self.train_dataset_config) and (not self.val_dataset_config):
+            raise ValueError(
+                'One of `train_dataset` or `val_dataset should be given, but there are none.'
+            )

    def _update_dic(self, dic, base_dic):
        """
@ -121,7 +136,8 @@ class Config(object):
    def update(self,
               learning_rate: float=None,
               batch_size: int=None,
-               iters: int=None):
+               iters: int=None,
+               opts: list=None):
        '''Update config'''
        if learning_rate:
            if 'lr_scheduler' in self.dic:
@ -135,6 +151,27 @@ class Config(object):
        if iters:
            self.dic['iters'] = iters

+        # fix parameters by --opts of command
+        if opts is not None:
+            if len(opts) % 2 != 0 or len(opts) == 0:
+                raise ValueError(
+                    "Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
+                    format(opts))
+            for key, value in zip(opts[0::2], opts[1::2]):
+                if isinstance(value, six.string_types):
+                    try:
+                        value = literal_eval(value)
+                    except ValueError:
+                        pass
+                    except SyntaxError:
+                        pass
+                key_list = key.split('.')
+                dic = self.dic
+                for subkey in key_list[:-1]:
+                    dic.setdefault(subkey, dict())
+                    dic = dic[subkey]
+                dic[key_list[-1]] = value
+
    @property
    def batch_size(self) -> int:
        return self.dic.get('batch_size', 1)
@ -153,13 +190,32 @@ class Config(object):
                'No `lr_scheduler` specified in the configuration file.')
        params = self.dic.get('lr_scheduler')

+        use_warmup = False
+        if 'warmup_iters' in params:
+            use_warmup = True
+            warmup_iters = params.pop('warmup_iters')
+            assert 'warmup_start_lr' in params, \
+                "When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
+            warmup_start_lr = params.pop('warmup_start_lr')
+            end_lr = params['learning_rate']
+
        lr_type = params.pop('type')
        if lr_type == 'PolynomialDecay':
-            params.setdefault('decay_steps', self.iters)
+            iters = self.iters - warmup_iters if use_warmup else self.iters
+            iters = max(iters, 1)
+            params.setdefault('decay_steps', iters)
            params.setdefault('end_lr', 0)
            params.setdefault('power', 0.9)
+        lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
+
+        if use_warmup:
+            lr_sche = paddle.optimizer.lr.LinearWarmup(
+                learning_rate=lr_sche,
+                warmup_steps=warmup_iters,
+                start_lr=warmup_start_lr,
+                end_lr=end_lr)

-        return getattr(paddle.optimizer.lr, lr_type)(**params)
+        return lr_sche

    @property
    def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
@ -202,15 +258,33 @@ class Config(object):
        args = self.optimizer_args
        optimizer_type = args.pop('type')

+        params = self.model.parameters()
+        if 'backbone_lr_mult' in args:
+            if not hasattr(self.model, 'backbone'):
+                logger.warning('The backbone_lr_mult is not effective because'
+                               ' the model does not have backbone')
+            else:
+                backbone_lr_mult = args.pop('backbone_lr_mult')
+                backbone_params = self.model.backbone.parameters()
+                backbone_params_id = [id(x) for x in backbone_params]
+                other_params = [
+                    x for x in params if id(x) not in backbone_params_id
+                ]
+                params = [{
+                    'params': backbone_params,
+                    'learning_rate': backbone_lr_mult
+                }, {
+                    'params': other_params
+                }]
+
        if optimizer_type == 'sgd':
-            return paddle.optimizer.Momentum(
-                lr, parameters=self.model.parameters(), **args)
+            return paddle.optimizer.Momentum(lr, parameters=params, **args)
        elif optimizer_type == 'adam':
-            return paddle.optimizer.Adam(
-                lr, parameters=self.model.parameters(), **args)
+            return paddle.optimizer.Adam(lr, parameters=params, **args)
        elif optimizer_type in paddle.optimizer.__all__:
-            return getattr(paddle.optimizer, optimizer_type)(
-                lr, parameters=self.model.parameters(), **args)
+            return getattr(paddle.optimizer, optimizer_type)(lr,
+                                                             parameters=params,
+                                                             **args)

        raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))

@ -295,24 +369,6 @@ class Config(object):
    @property
    def model(self) -> paddle.nn.Layer:
        model_cfg = self.dic.get('model').copy()
-        if not model_cfg:
-            raise RuntimeError('No model specified in the configuration file.')
-        if not 'num_classes' in model_cfg:
-            num_classes = None
-            if self.train_dataset_config:
-                if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
-                    num_classes = self.train_dataset_class.NUM_CLASSES
-                elif hasattr(self.train_dataset, 'num_classes'):
-                    num_classes = self.train_dataset.num_classes
-            elif self.val_dataset_config:
-                if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
-                    num_classes = self.val_dataset_class.NUM_CLASSES
-                elif hasattr(self.val_dataset, 'num_classes'):
-                    num_classes = self.val_dataset.num_classes
-
-            if num_classes is not None:
-                model_cfg['num_classes'] = num_classes
-
        if not self._model:
            self._model = self._load_object(model_cfg)
        return self._model
@ -401,3 +457,94 @@ class Config(object):

    def __str__(self) -> str:
        return yaml.dump(self.dic)
+
+    @property
+    def val_transforms(self) -> list:
+        """Get val_transform from val_dataset"""
+        _val_dataset = self.val_dataset_config
+        if not _val_dataset:
+            return []
+        _transforms = _val_dataset.get('transforms', [])
+        transforms = []
+        for i in _transforms:
+            transforms.append(self._load_object(i))
+        return transforms
+
+    def check_sync_info(self) -> None:
+        """
+        Check and sync the info, such as num_classes and img_channels, 
+        between the config of model, train_dataset and val_dataset.
+        """
+        self._check_sync_num_classes()
+        self._check_sync_img_channels()
+
+    def _check_sync_num_classes(self):
+        num_classes_set = set()
+
+        if self.dic['model'].get('num_classes', None) is not None:
+            num_classes_set.add(self.dic['model'].get('num_classes'))
+        if self.train_dataset_config:
+            if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
+                num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
+            elif 'num_classes' in self.train_dataset_config:
+                num_classes_set.add(self.train_dataset_config['num_classes'])
+        if self.val_dataset_config:
+            if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
+                num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
+            elif 'num_classes' in self.val_dataset_config:
+                num_classes_set.add(self.val_dataset_config['num_classes'])
+
+        if len(num_classes_set) == 0:
+            raise ValueError(
+                '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
+            )
+        elif len(num_classes_set) > 1:
+            raise ValueError(
+                '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
+                .format(num_classes_set))
+
+        num_classes = num_classes_set.pop()
+        self.dic['model']['num_classes'] = num_classes
+        if self.train_dataset_config and \
+            (not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
+            self.dic['train_dataset']['num_classes'] = num_classes
+        if self.val_dataset_config and \
+            (not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
+            self.dic['val_dataset']['num_classes'] = num_classes
+
+    def _check_sync_img_channels(self):
+        img_channels_set = set()
+        model_cfg = self.dic['model']
+
+        # If the model has backbone, in_channels is the input params of backbone.
+        # Otherwise, in_channels is the input params of the model.
+        if 'backbone' in model_cfg:
+            x = model_cfg['backbone'].get('in_channels', None)
+            if x is not None:
+                img_channels_set.add(x)
+        elif model_cfg.get('in_channels', None) is not None:
+            img_channels_set.add(model_cfg.get('in_channels'))
+        if self.train_dataset_config and \
+            ('img_channels' in self.train_dataset_config):
+            img_channels_set.add(self.train_dataset_config['img_channels'])
+        if self.val_dataset_config and \
+            ('img_channels' in self.val_dataset_config):
+            img_channels_set.add(self.val_dataset_config['img_channels'])
+
+        if len(img_channels_set) > 1:
+            raise ValueError(
+                '`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
+                .format(img_channels_set))
+
+        img_channels = 3 if len(img_channels_set) == 0 \
+            else img_channels_set.pop()
+        if 'backbone' in model_cfg:
+            self.dic['model']['backbone']['in_channels'] = img_channels
+        else:
+            self.dic['model']['in_channels'] = img_channels
+        if self.train_dataset_config and \
+            self.train_dataset_config['type'] == "Dataset":
+            self.dic['train_dataset']['img_channels'] = img_channels
+        if self.val_dataset_config and \
+            self.val_dataset_config['type'] == "Dataset":
+            self.dic['val_dataset']['img_channels'] = img_channels
--- a/paddlers/models/ppseg/cvlibs/param_init.py
+++ b/paddlers/models/ppseg/cvlibs/param_init.py
@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):

    initializer = nn.initializer.KaimingUniform(**kwargs)
    initializer(param, param.block)
+
+
+def xavier_uniform(param, **kwargs):
+    r"""
+    This implements the Xavier weight initializer from the paper
+    `Understanding the difficulty of training deep feedforward neural
+    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
+    by Xavier Glorot and Yoshua Bengio.
+    This initializer is designed to keep the scale of the gradients
+    approximately same in all the layers. In case of Uniform distribution,
+    the range is [-x, x], where
+    .. math::
+        x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
+    Args:
+        param (Tensor): Tensor that needs to be initialized.
+
+    Examples:
+
+        from paddlers.models.ppseg.cvlibs import param_init
+        import paddle.nn as nn
+
+        linear = nn.Linear(2, 4)
+        param_init.xavier_uniform(linear.weight)
+    """
+    initializer = nn.initializer.XavierUniform(**kwargs)
+    initializer(param, param.block)
--- a/paddlers/models/ppseg/datasets/init.py
+++ b/paddlers/models/ppseg/datasets/init.py
@ -27,3 +27,4 @@ from .drive import DRIVE
 from .hrf import HRF
 from .chase_db1 import CHASEDB1
 from .pp_humanseg14k import PPHumanSeg14K
+from .pssl import PSSLDataset
--- a/paddlers/models/ppseg/datasets/ade.py
+++ b/paddlers/models/ppseg/datasets/ade.py
@ -89,23 +89,31 @@ class ADE20K(Dataset):
            self.file_list.append([img_path, label_path])

    def __getitem__(self, idx):
+        data = {}
+        data['trans_info'] = []
        image_path, label_path = self.file_list[idx]
+        data['img'] = image_path
+        data['gt_fields'] = [
+        ]  # If key in gt_fields, the data[key] have transforms synchronous.
+
        if self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
+            data = self.transforms(data)
            label = np.asarray(Image.open(label_path))
            # The class 0 is ignored. And it will equal to 255 after
            # subtracted 1, because the dtype of label is uint8.
            label = label - 1
            label = label[np.newaxis, :, :]
-            return im, label
+            data['label'] = label
+            return data
        else:
-            im, label = self.transforms(im=image_path, label=label_path)
-            label = label - 1
+            data['label'] = label_path
+            data['gt_fields'].append('label')
+            data = self.transforms(data)
+            data['label'] = data['label'] - 1
            # Recover the ignore pixels adding by transform
-            label[label == 254] = 255
+            data['label'][data['label'] == 254] = 255
            if self.edge:
                edge_mask = F.mask_to_binary_edge(
                    label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
-            else:
-                return im, label
+                data['edge'] = edge_mask
+            return data
--- a/paddlers/models/ppseg/datasets/dataset.py
+++ b/paddlers/models/ppseg/datasets/dataset.py
@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):

        Examples:

-            import paddlers.models.ppseg.transforms as T
+            import paddlers.models.ppseg as ppseg.transforms as T
            from paddlers.models.ppseg.datasets import Dataset

-            transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
+            transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
            dataset_root = 'dataset_root_path'
            train_path = 'train_path'
            num_classes = 2
@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
    """

    def __init__(self,
-                 transforms,
+                 mode,
                 dataset_root,
+                 transforms,
                 num_classes,
-                 mode='train',
+                 img_channels=3,
                 train_path=None,
                 val_path=None,
                 test_path=None,
@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
                 ignore_index=255,
                 edge=False):
        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
+        self.transforms = Compose(transforms, img_channels=img_channels)
        self.file_list = list()
        self.mode = mode.lower()
        self.num_classes = num_classes
+        self.img_channels = img_channels
        self.ignore_index = ignore_index
        self.edge = edge

@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
            raise ValueError(
                "mode should be 'train', 'val' or 'test', but got {}.".format(
                    self.mode))
-
-        if self.transforms is None:
-            raise ValueError("`transforms` is necessary, but it is None.")
-
        if not os.path.exists(self.dataset_root):
            raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
                self.dataset_root))
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+        if num_classes < 1:
+            raise ValueError(
+                "`num_classes` should be greater than 1, but got {}".format(
+                    num_classes))
+        if img_channels not in [1, 3]:
+            raise ValueError("`img_channels` should in [1, 3], but got {}".
+                             format(img_channels))

        if self.mode == 'train':
            if train_path is None:
@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
                self.file_list.append([image_path, label_path])

    def __getitem__(self, idx):
+        data = {}
+        data['trans_info'] = []
        image_path, label_path = self.file_list[idx]
-        if self.mode == 'test':
-            im, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            return im, image_path
-        elif self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
-            label = np.asarray(Image.open(label_path))
-            label = label[np.newaxis, :, :]
-            return im, label
+        data['img'] = image_path
+        data['label'] = label_path
+        # If key in gt_fields, the data[key] have transforms synchronous.
+        data['gt_fields'] = []
+        if self.mode == 'val':
+            data = self.transforms(data)
+            data['label'] = data['label'][np.newaxis, :, :]
+
        else:
-            im, label = self.transforms(im=image_path, label=label_path)
+            data['gt_fields'].append('label')
+            data = self.transforms(data)
            if self.edge:
                edge_mask = F.mask_to_binary_edge(
-                    label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
-            else:
-                return im, label
+                    data['label'], radius=2, num_classes=self.num_classes)
+                data['edge'] = edge_mask
+        return data

    def __len__(self):
        return len(self.file_list)
--- a/paddlers/models/ppseg/datasets/pssl.py
+++ b/paddlers/models/ppseg/datasets/pssl.py
@ -0,0 +1,135 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+from paddlers.models.ppseg.datasets import Dataset
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.transforms import Compose
+
+
+@manager.DATASETS.add_component
+class PSSLDataset(Dataset):
+    """
+    The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
+    is computed by the Consensus explanation algorithm.
+
+    The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation 
+    Models" (https://arxiv.org/abs/2207.03335). 
+    
+    The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification 
+    Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
+
+    To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
+    as follows:
+
+        imagenet_root
+        |
+        |--train
+        |  |--n01440764
+        |  |  |--n01440764_10026.JPEG
+        |  |  |--...
+        |  |--nxxxxxxxx
+        |  |--...
+
+    where only the "train" set is needed.
+
+    The PSSL dataset has the folder structure as follows:
+
+        pssl_root
+        |
+        |--train
+        |  |--n01440764
+        |  |  |--n01440764_10026.JPEG_eiseg.npz
+        |  |  |--...
+        |  |--nxxxxxxxx
+        |  |--...
+        |
+        |--imagenet_lsvrc_2015_synsets.txt
+        |--train.txt
+
+    where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
+
+    Args:
+        transforms (list): Transforms for image.
+        imagenet_root (str): The path to the original ImageNet dataset.
+        pssl_root (str): The path to the PSSL dataset.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        edge (bool, optional): Whether to compute edge while training. Default: False.
+    """
+    ignore_index = 1001  # 0~999 is target class, 1000 is bg
+    NUM_CLASSES = 1001  # consider target class and bg
+
+    def __init__(self,
+                 transforms,
+                 imagenet_root,
+                 pssl_root,
+                 mode='train',
+                 edge=False):
+        mode = mode.lower()
+        if mode not in ['train']:
+            raise ValueError("mode should be 'train', but got {}.".format(mode))
+        if transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+
+        self.transforms = Compose(transforms)
+        self.mode = mode
+        self.edge = edge
+
+        self.num_classes = self.NUM_CLASSES
+        self.ignore_index = self.num_classes  # 1001
+        self.file_list = []
+        self.class_id_dict = {}
+
+        if imagenet_root is None or not os.path.isdir(pssl_root):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        train_list_file = os.path.join(pssl_root, "train.txt")
+        if not os.path.exists(train_list_file):
+            raise ValueError("Train list file isn't exists.")
+        for idx, line in enumerate(open(train_list_file)):
+            # line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
+            label_path = line.strip()
+            img_path = label_path.split('.JPEG')[0] + '.JPEG'
+            label_path = os.path.join(pssl_root, label_path)
+            img_path = os.path.join(imagenet_root, img_path)
+            self.file_list.append([img_path, label_path])
+
+        # mapping class name to class id.
+        class_id_file = os.path.join(pssl_root,
+                                     "imagenet_lsvrc_2015_synsets.txt")
+        if not os.path.exists(class_id_file):
+            raise ValueError("Class id file isn't exists.")
+        for idx, line in enumerate(open(class_id_file)):
+            class_name = line.strip()
+            self.class_id_dict[class_name] = idx
+
+    def __getitem__(self, idx):
+        image_path, label_path = self.file_list[idx]
+
+        # transform label
+        class_name = (image_path.split('/')[-1]).split('_')[0]
+        class_id = self.class_id_dict[class_name]
+
+        pssl_seg = np.load(label_path)['arr_0']
+        gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
+        # [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
+        gt_semantic_seg[pssl_seg == 1] = class_id
+
+        im, label = self.transforms(im=image_path, label=gt_semantic_seg)
+
+        return im, label
--- a/paddlers/models/ppseg/models/init.py
+++ b/paddlers/models/ppseg/models/init.py
@ -49,9 +49,18 @@ from .segnet import SegNet
 from .encnet import ENCNet
 from .hrnet_contrast import HRNetW48Contrast
 from .espnet import ESPNetV2
+from .pp_liteseg import PPLiteSeg
 from .dmnet import DMNet
 from .espnetv1 import ESPNetV1
 from .enet import ENet
 from .bisenetv1 import BiseNetV1
 from .fastfcn import FastFCN
 from .pfpnnet import PFPNNet
+from .glore import GloRe
+from .ddrnet import DDRNet_23
+from .ccnet import CCNet
+from .mobileseg import MobileSeg
+from .upernet import UPerNet
+from .sinet import SINet
+from .lraspp import LRASPP
+from .topformer import TopFormer
--- a/paddlers/models/ppseg/models/attention_unet.py
+++ b/paddlers/models/ppseg/models/attention_unet.py
@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):

    Args:
        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """

-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super().__init__()
-        n_channels = 3
-        self.encoder = Encoder(n_channels, [64, 128, 256, 512])
+        self.encoder = Encoder(in_channels, [64, 128, 256, 512])
        filters = np.array([64, 128, 256, 512, 1024])
        self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
        self.att5 = AttentionBlock(
--- a/paddlers/models/ppseg/models/backbones/init.py
+++ b/paddlers/models/ppseg/models/backbones/init.py
@ -21,3 +21,7 @@ from .swin_transformer import *
 from .mobilenetv2 import *
 from .mix_transformer import *
 from .stdcnet import *
+from .lite_hrnet import *
+from .shufflenetv2 import *
+from .ghostnet import *
+from .top_transformer import *
--- a/paddlers/models/ppseg/models/backbones/ghostnet.py
+++ b/paddlers/models/ppseg/models/backbones/ghostnet.py
@ -0,0 +1,318 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
+
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Uniform, KaimingNormal
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils, logger
+
+__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=KaimingNormal(), name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + "_bn"
+
+        self._batch_norm = BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale", regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset", regularizer=L2Decay(0.0)),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class SEBlock(nn.Layer):
+    def __init__(self, num_channels, reduction_ratio=4, name=None):
+        super(SEBlock, self).__init__()
+        self.pool2d_gap = AdaptiveAvgPool2D(1)
+        self._num_channels = num_channels
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        med_ch = num_channels // reduction_ratio
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_channels,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        pool = self.pool2d_gap(inputs)
+        pool = paddle.squeeze(pool, axis=[2, 3])
+        squeeze = self.squeeze(pool)
+        squeeze = F.relu(squeeze)
+        excitation = self.excitation(squeeze)
+        excitation = paddle.clip(x=excitation, min=0, max=1)
+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
+        out = paddle.multiply(inputs, excitation)
+        return out
+
+
+class GhostModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 output_channels,
+                 kernel_size=1,
+                 ratio=2,
+                 dw_size=3,
+                 stride=1,
+                 relu=True,
+                 name=None):
+        super(GhostModule, self).__init__()
+        init_channels = int(math.ceil(output_channels / ratio))
+        new_channels = int(init_channels * (ratio - 1))
+        self.primary_conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=init_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            groups=1,
+            act="relu" if relu else None,
+            name=name + "_primary_conv")
+        self.cheap_operation = ConvBNLayer(
+            in_channels=init_channels,
+            out_channels=new_channels,
+            kernel_size=dw_size,
+            stride=1,
+            groups=init_channels,
+            act="relu" if relu else None,
+            name=name + "_cheap_operation")
+
+    def forward(self, inputs):
+        x = self.primary_conv(inputs)
+        y = self.cheap_operation(x)
+        out = paddle.concat([x, y], axis=1)
+        return out
+
+
+class GhostBottleneck(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 hidden_dim,
+                 output_channels,
+                 kernel_size,
+                 stride,
+                 use_se,
+                 name=None):
+        super(GhostBottleneck, self).__init__()
+        self._stride = stride
+        self._use_se = use_se
+        self._num_channels = in_channels
+        self._output_channels = output_channels
+        self.ghost_module_1 = GhostModule(
+            in_channels=in_channels,
+            output_channels=hidden_dim,
+            kernel_size=1,
+            stride=1,
+            relu=True,
+            name=name + "_ghost_module_1")
+        if stride == 2:
+            self.depthwise_conv = ConvBNLayer(
+                in_channels=hidden_dim,
+                out_channels=hidden_dim,
+                kernel_size=kernel_size,
+                stride=stride,
+                groups=hidden_dim,
+                act=None,
+                name=name +
+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+        if use_se:
+            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
+        self.ghost_module_2 = GhostModule(
+            in_channels=hidden_dim,
+            output_channels=output_channels,
+            kernel_size=1,
+            relu=False,
+            name=name + "_ghost_module_2")
+        if stride != 1 or in_channels != output_channels:
+            self.shortcut_depthwise = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                groups=in_channels,
+                act=None,
+                name=name +
+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+            self.shortcut_conv = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=output_channels,
+                kernel_size=1,
+                stride=1,
+                groups=1,
+                act=None,
+                name=name + "_shortcut_conv")
+
+    def forward(self, inputs):
+        x = self.ghost_module_1(inputs)
+        if self._stride == 2:
+            x = self.depthwise_conv(x)
+        if self._use_se:
+            x = self.se_block(x)
+        x = self.ghost_module_2(x)
+        if self._stride == 1 and self._num_channels == self._output_channels:
+            shortcut = inputs
+        else:
+            shortcut = self.shortcut_depthwise(inputs)
+            shortcut = self.shortcut_conv(shortcut)
+        return paddle.add(x=x, y=shortcut)
+
+
+class GhostNet(nn.Layer):
+    def __init__(self, scale, in_channels=3, pretrained=None):
+        super(GhostNet, self).__init__()
+        self.cfgs = [
+            # k, t, c, SE, s
+            [3, 16, 16, 0, 1],
+            [3, 48, 24, 0, 2],
+            [3, 72, 24, 0, 1],  # x4
+            [5, 72, 40, 1, 2],
+            [5, 120, 40, 1, 1],  # x8
+            [3, 240, 80, 0, 2],
+            [3, 200, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 480, 112, 1, 1],
+            [3, 672, 112, 1, 1],  # x16
+            [5, 672, 160, 1, 2],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1]  # x32
+        ]
+        self.scale = scale
+        self.pretrained = pretrained
+
+        output_channels = int(self._make_divisible(16 * self.scale, 4))
+        self.conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=output_channels,
+            kernel_size=3,
+            stride=2,
+            groups=1,
+            act="relu",
+            name="conv1")
+
+        # build inverted residual blocks
+        self.out_index = [2, 4, 10, 15]
+        self.feat_channels = []
+        self.ghost_bottleneck_list = []
+        for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
+            in_channels = output_channels
+            output_channels = int(self._make_divisible(c * self.scale, 4))
+            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
+            ghost_bottleneck = self.add_sublayer(
+                name="_ghostbottleneck_" + str(idx),
+                sublayer=GhostBottleneck(
+                    in_channels=in_channels,
+                    hidden_dim=hidden_dim,
+                    output_channels=output_channels,
+                    kernel_size=k,
+                    stride=s,
+                    use_se=use_se,
+                    name="_ghostbottleneck_" + str(idx)))
+            self.ghost_bottleneck_list.append(ghost_bottleneck)
+            if idx in self.out_index:
+                self.feat_channels.append(output_channels)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, inputs):
+        feat_list = []
+        x = self.conv1(inputs)
+        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
+            x = ghost_bottleneck(x)
+            if idx in self.out_index:
+                feat_list.append(x)
+        return feat_list
+
+    def _make_divisible(self, v, divisor, min_value=None):
+        """
+        This function is taken from the original tf repo.
+        It ensures that all layers have a channel number that is divisible by 8
+        It can be seen here:
+        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+        """
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        # Make sure that round down does not go down by more than 10%.
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x0_5(**kwargs):
+    model = GhostNet(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x1_0(**kwargs):
+    model = GhostNet(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x1_3(**kwargs):
+    model = GhostNet(scale=1.3, **kwargs)
+    return model
--- a/paddlers/models/ppseg/models/backbones/hrnet.py
+++ b/paddlers/models/ppseg/models/backbones/hrnet.py
@ -37,6 +37,7 @@ class HRNet(nn.Layer):
    (https://arxiv.org/pdf/1908.07919.pdf).

    Args:
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path of pretrained model.
        stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
        stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
@ -56,6 +57,7 @@ class HRNet(nn.Layer):
    """

    def __init__(self,
+                 in_channels=3,
                 pretrained=None,
                 stage1_num_modules=1,
                 stage1_num_blocks=(4, ),
@ -91,7 +93,7 @@ class HRNet(nn.Layer):
        self.feat_channels = [sum(stage4_num_channels)]

        self.conv_layer1_1 = layers.ConvBNReLU(
-            in_channels=3,
+            in_channels=in_channels,
            out_channels=64,
            kernel_size=3,
            stride=2,
--- a/paddlers/models/ppseg/models/backbones/lite_hrnet.py
+++ b/paddlers/models/ppseg/models/backbones/lite_hrnet.py
@ -0,0 +1,974 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on
+https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
+"""
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from numbers import Integral
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg import utils
+
+__all__ = [
+    "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
+    "Lite_HRNet_wider_naive", "LiteHRNet"
+]
+
+
+def Conv2d(in_channels,
+           out_channels,
+           kernel_size,
+           stride=1,
+           padding=0,
+           dilation=1,
+           groups=1,
+           bias=True,
+           weight_init=Normal(std=0.001),
+           bias_init=Constant(0.)):
+    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
+    if bias:
+        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
+    else:
+        bias_attr = False
+    conv = nn.Conv2D(
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups,
+        weight_attr=weight_attr,
+        bias_attr=bias_attr)
+    return conv
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    x = paddle.reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 norm_type=None,
+                 norm_groups=32,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 act=None):
+        super(ConvNormLayer, self).__init__()
+        self.act = act
+        norm_lr = 0. if freeze_norm else 1.
+        if norm_type is not None:
+            assert norm_type in ['bn', 'sync_bn', 'gn'], \
+                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
+            param_attr = ParamAttr(
+                initializer=Constant(1.0),
+                learning_rate=norm_lr,
+                regularizer=L2Decay(norm_decay), )
+            bias_attr = ParamAttr(
+                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            global_stats = True if freeze_norm else None
+            if norm_type in ['bn', 'sync_bn']:
+                self.norm = nn.BatchNorm2D(
+                    ch_out,
+                    weight_attr=param_attr,
+                    bias_attr=bias_attr,
+                    use_global_stats=global_stats, )
+            elif norm_type == 'gn':
+                self.norm = nn.GroupNorm(
+                    num_groups=norm_groups,
+                    num_channels=ch_out,
+                    weight_attr=param_attr,
+                    bias_attr=bias_attr)
+            norm_params = self.norm.parameters()
+            if freeze_norm:
+                for param in norm_params:
+                    param.stop_gradient = True
+            conv_bias_attr = False
+        else:
+            conv_bias_attr = True
+            self.norm = None
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=Normal(
+                mean=0., std=0.001)),
+            bias_attr=conv_bias_attr)
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        if self.norm is not None:
+            out = self.norm(out)
+
+        if self.act == 'relu':
+            out = F.relu(out)
+        elif self.act == 'sigmoid':
+            out = F.sigmoid(out)
+        return out
+
+
+class DepthWiseSeparableConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 dw_norm_type=None,
+                 pw_norm_type=None,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 dw_act=None,
+                 pw_act=None):
+        super(DepthWiseSeparableConvNormLayer, self).__init__()
+        self.depthwise_conv = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_in,
+            filter_size=filter_size,
+            stride=stride,
+            groups=ch_in,
+            norm_type=dw_norm_type,
+            act=dw_act,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm, )
+        self.pointwise_conv = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_out,
+            filter_size=1,
+            stride=1,
+            norm_type=pw_norm_type,
+            act=pw_act,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm, )
+
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.pointwise_conv(x)
+        return x
+
+
+class CrossResolutionWeightingModule(nn.Layer):
+    def __init__(self,
+                 channels,
+                 ratio=16,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(CrossResolutionWeightingModule, self).__init__()
+        self.channels = channels
+        total_channel = sum(channels)
+        self.conv1 = ConvNormLayer(
+            ch_in=total_channel,
+            ch_out=total_channel // ratio,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.conv2 = ConvNormLayer(
+            ch_in=total_channel // ratio,
+            ch_out=total_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='sigmoid',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        out = []
+        for idx, xi in enumerate(x[:-1]):
+            kernel_size = stride = pow(2, len(x) - idx - 1)
+            xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
+            out.append(xi)
+        out.append(x[-1])
+
+        out = paddle.concat(out, 1)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        out = paddle.split(out, self.channels, 1)
+        out = [
+            s * F.interpolate(
+                a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
+        ]
+        return out
+
+
+class SpatialWeightingModule(nn.Layer):
+    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
+        super(SpatialWeightingModule, self).__init__()
+        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
+        self.conv1 = ConvNormLayer(
+            ch_in=in_channel,
+            ch_out=in_channel // ratio,
+            filter_size=1,
+            stride=1,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.conv2 = ConvNormLayer(
+            ch_in=in_channel // ratio,
+            ch_out=in_channel,
+            filter_size=1,
+            stride=1,
+            act='sigmoid',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        out = self.global_avgpooling(x)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        return x * out
+
+
+class ConditionalChannelWeightingBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 stride,
+                 reduce_ratio,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(ConditionalChannelWeightingBlock, self).__init__()
+        assert stride in [1, 2]
+        branch_channels = [channel // 2 for channel in in_channels]
+
+        self.cross_resolution_weighting = CrossResolutionWeightingModule(
+            branch_channels,
+            ratio=reduce_ratio,
+            norm_type=norm_type,
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.depthwise_convs = nn.LayerList([
+            ConvNormLayer(
+                channel,
+                channel,
+                filter_size=3,
+                stride=stride,
+                groups=channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay) for channel in branch_channels
+        ])
+
+        self.spatial_weighting = nn.LayerList([
+            SpatialWeightingModule(
+                channel,
+                ratio=4,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay) for channel in branch_channels
+        ])
+
+    def forward(self, x):
+        x = [s.chunk(2, axis=1) for s in x]
+        x1 = [s[0] for s in x]
+        x2 = [s[1] for s in x]
+
+        x2 = self.cross_resolution_weighting(x2)
+        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
+        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
+
+        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
+        out = [channel_shuffle(s, groups=2) for s in out]
+        return out
+
+
+class ShuffleUnit(nn.Layer):
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(ShuffleUnit, self).__init__()
+        branch_channel = out_channel // 2
+        self.stride = stride
+        if self.stride == 1:
+            assert in_channel == branch_channel * 2, \
+                "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
+        if stride > 1:
+            self.branch1 = nn.Sequential(
+                ConvNormLayer(
+                    ch_in=in_channel,
+                    ch_out=in_channel,
+                    filter_size=3,
+                    stride=self.stride,
+                    groups=in_channel,
+                    norm_type=norm_type,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay),
+                ConvNormLayer(
+                    ch_in=in_channel,
+                    ch_out=branch_channel,
+                    filter_size=1,
+                    stride=1,
+                    norm_type=norm_type,
+                    act='relu',
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay), )
+        self.branch2 = nn.Sequential(
+            ConvNormLayer(
+                ch_in=branch_channel if stride == 1 else in_channel,
+                ch_out=branch_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=3,
+                stride=self.stride,
+                groups=branch_channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay), )
+
+    def forward(self, x):
+        if self.stride > 1:
+            x1 = self.branch1(x)
+            x2 = self.branch2(x)
+        else:
+            x1, x2 = x.chunk(2, axis=1)
+            x2 = self.branch2(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        out = channel_shuffle(out, groups=2)
+        return out
+
+
+class IterativeHead(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(IterativeHead, self).__init__()
+        num_branches = len(in_channels)
+        self.in_channels = in_channels[::-1]
+
+        projects = []
+        for i in range(num_branches):
+            if i != num_branches - 1:
+                projects.append(
+                    DepthWiseSeparableConvNormLayer(
+                        ch_in=self.in_channels[i],
+                        ch_out=self.in_channels[i + 1],
+                        filter_size=3,
+                        stride=1,
+                        dw_act=None,
+                        pw_act='relu',
+                        dw_norm_type=norm_type,
+                        pw_norm_type=norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+            else:
+                projects.append(
+                    DepthWiseSeparableConvNormLayer(
+                        ch_in=self.in_channels[i],
+                        ch_out=self.in_channels[i],
+                        filter_size=3,
+                        stride=1,
+                        dw_act=None,
+                        pw_act='relu',
+                        dw_norm_type=norm_type,
+                        pw_norm_type=norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+        self.projects = nn.LayerList(projects)
+
+    def forward(self, x):
+        x = x[::-1]
+        y = []
+        last_x = None
+        for i, s in enumerate(x):
+            if last_x is not None:
+                last_x = F.interpolate(
+                    last_x,
+                    size=paddle.shape(s)[-2:],
+                    mode='bilinear',
+                    align_corners=True)
+                s = s + last_x
+            s = self.projects[i](s)
+            y.append(s)
+            last_x = s
+
+        return y[::-1]
+
+
+class Stem(nn.Layer):
+    def __init__(self,
+                 in_channel,
+                 stem_channel,
+                 out_channel,
+                 expand_ratio,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(Stem, self).__init__()
+        self.conv1 = ConvNormLayer(
+            in_channel,
+            stem_channel,
+            filter_size=3,
+            stride=2,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        mid_channel = int(round(stem_channel * expand_ratio))
+        branch_channel = stem_channel // 2
+        if stem_channel == out_channel:
+            inc_channel = out_channel - branch_channel
+        else:
+            inc_channel = out_channel - stem_channel
+        self.branch1 = nn.Sequential(
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=3,
+                stride=2,
+                groups=branch_channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=inc_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay), )
+        self.expand_conv = ConvNormLayer(
+            ch_in=branch_channel,
+            ch_out=mid_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.depthwise_conv = ConvNormLayer(
+            ch_in=mid_channel,
+            ch_out=mid_channel,
+            filter_size=3,
+            stride=2,
+            groups=mid_channel,
+            norm_type=norm_type,
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.linear_conv = ConvNormLayer(
+            ch_in=mid_channel,
+            ch_out=branch_channel
+            if stem_channel == out_channel else stem_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x1, x2 = x.chunk(2, axis=1)
+        x1 = self.branch1(x1)
+        x2 = self.expand_conv(x2)
+        x2 = self.depthwise_conv(x2)
+        x2 = self.linear_conv(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        out = channel_shuffle(out, groups=2)
+
+        return out
+
+
+class LiteHRNetModule(nn.Layer):
+    def __init__(self,
+                 num_branches,
+                 num_blocks,
+                 in_channels,
+                 reduce_ratio,
+                 module_type,
+                 multiscale_output=False,
+                 with_fuse=True,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(LiteHRNetModule, self).__init__()
+        assert num_branches == len(in_channels),\
+            "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
+        assert module_type in [
+            'LITE', 'NAIVE'
+        ], "module_type should be one of ['LITE', 'NAIVE']"
+        self.num_branches = num_branches
+        self.in_channels = in_channels
+        self.multiscale_output = multiscale_output
+        self.with_fuse = with_fuse
+        self.norm_type = 'bn'
+        self.module_type = module_type
+
+        if self.module_type == 'LITE':
+            self.layers = self._make_weighting_blocks(
+                num_blocks,
+                reduce_ratio,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay)
+        elif self.module_type == 'NAIVE':
+            self.layers = self._make_naive_branches(
+                num_branches,
+                num_blocks,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay)
+
+        if self.with_fuse:
+            self.fuse_layers = self._make_fuse_layers(
+                freeze_norm=freeze_norm, norm_decay=norm_decay)
+            self.relu = nn.ReLU()
+
+    def _make_weighting_blocks(self,
+                               num_blocks,
+                               reduce_ratio,
+                               stride=1,
+                               freeze_norm=False,
+                               norm_decay=0.):
+        layers = []
+        for i in range(num_blocks):
+            layers.append(
+                ConditionalChannelWeightingBlock(
+                    self.in_channels,
+                    stride=stride,
+                    reduce_ratio=reduce_ratio,
+                    norm_type=self.norm_type,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay))
+        return nn.Sequential(*layers)
+
+    def _make_naive_branches(self,
+                             num_branches,
+                             num_blocks,
+                             freeze_norm=False,
+                             norm_decay=0.):
+        branches = []
+        for branch_idx in range(num_branches):
+            layers = []
+            for i in range(num_blocks):
+                layers.append(
+                    ShuffleUnit(
+                        self.in_channels[branch_idx],
+                        self.in_channels[branch_idx],
+                        stride=1,
+                        norm_type=self.norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+            branches.append(nn.Sequential(*layers))
+        return nn.LayerList(branches)
+
+    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
+        if self.num_branches == 1:
+            return None
+        fuse_layers = []
+        num_out_branches = self.num_branches if self.multiscale_output else 1
+        for i in range(num_out_branches):
+            fuse_layer = []
+            for j in range(self.num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            Conv2d(
+                                self.in_channels[j],
+                                self.in_channels[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(self.in_channels[i]),
+                            nn.Upsample(
+                                scale_factor=2**(j - i), mode='nearest')))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv_downsamples = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=self.in_channels[j],
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[i],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[i])))
+                        else:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=self.in_channels[j],
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    nn.ReLU()))
+
+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
+            fuse_layers.append(nn.LayerList(fuse_layer))
+
+        return nn.LayerList(fuse_layers)
+
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.layers[0](x[0])]
+        if self.module_type == 'LITE':
+            out = self.layers(x)
+        elif self.module_type == 'NAIVE':
+            for i in range(self.num_branches):
+                x[i] = self.layers[i](x[i])
+            out = x
+        if self.with_fuse:
+            out_fuse = []
+            for i in range(len(self.fuse_layers)):
+                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
+                for j in range(self.num_branches):
+                    if j == 0:
+                        y += y
+                    elif i == j:
+                        y += out[j]
+                    else:
+                        y += self.fuse_layers[i][j](out[j])
+                    if i == 0:
+                        out[i] = y
+                out_fuse.append(self.relu(y))
+            out = out_fuse
+        elif not self.multiscale_output:
+            out = [out[0]]
+        return out
+
+
+class LiteHRNet(nn.Layer):
+    """
+    @inproceedings{Yulitehrnet21,
+    title={Lite-HRNet: A Lightweight High-Resolution Network},
+        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+        booktitle={CVPR},year={2021}
+    }
+
+    Args:
+        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
+            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
+            "wider_naive": Naive network with wider channels in each block.
+            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
+            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        freeze_at (int): the stage to freeze
+        freeze_norm (bool): whether to freeze norm in HRNet
+        norm_decay (float): weight decay for normalization layer weights
+        return_idx (List): the stage to return
+    """
+
+    def __init__(self,
+                 network_type,
+                 in_channels=3,
+                 freeze_at=0,
+                 freeze_norm=True,
+                 norm_decay=0.,
+                 return_idx=[0, 1, 2, 3],
+                 use_head=False,
+                 pretrained=None):
+        super(LiteHRNet, self).__init__()
+        if isinstance(return_idx, Integral):
+            return_idx = [return_idx]
+        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
+            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
+        assert len(return_idx) > 0, "need one or more return index"
+        self.freeze_at = freeze_at
+        self.freeze_norm = freeze_norm
+        self.norm_decay = norm_decay
+        self.return_idx = return_idx
+        self.norm_type = 'bn'
+        self.use_head = use_head
+        self.pretrained = pretrained
+
+        self.module_configs = {
+            "lite_18": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["LITE", "LITE", "LITE"],
+                "reduce_ratios": [8, 8, 8],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+            "lite_30": {
+                "num_modules": [3, 8, 3],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["LITE", "LITE", "LITE"],
+                "reduce_ratios": [8, 8, 8],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+            "naive": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
+                "reduce_ratios": [1, 1, 1],
+                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
+            },
+            "wider_naive": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
+                "reduce_ratios": [1, 1, 1],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+        }
+
+        self.stages_config = self.module_configs[network_type]
+
+        self.stem = Stem(in_channels, 32, 32, 1)
+        num_channels_pre_layer = [32]
+        for stage_idx in range(3):
+            num_channels = self.stages_config["num_channels"][stage_idx]
+            setattr(self, 'transition{}'.format(stage_idx),
+                    self._make_transition_layer(num_channels_pre_layer,
+                                                num_channels, self.freeze_norm,
+                                                self.norm_decay))
+            stage, num_channels_pre_layer = self._make_stage(
+                self.stages_config, stage_idx, num_channels, True,
+                self.freeze_norm, self.norm_decay)
+            setattr(self, 'stage{}'.format(stage_idx), stage)
+
+        num_channels = self.stages_config["num_channels"][-1]
+        self.feat_channels = num_channels
+
+        if self.use_head:
+            self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
+                                            self.freeze_norm, self.norm_decay)
+
+            self.feat_channels = [num_channels[0]]
+            for i in range(1, len(num_channels)):
+                self.feat_channels.append(num_channels[i] // 2)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def _make_transition_layer(self,
+                               num_channels_pre_layer,
+                               num_channels_cur_layer,
+                               freeze_norm=False,
+                               norm_decay=0.):
+        num_branches_pre = len(num_channels_pre_layer)
+        num_branches_cur = len(num_channels_cur_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_pre_layer[i],
+                                kernel_size=3,
+                                stride=1,
+                                padding=1,
+                                groups=num_channels_pre_layer[i],
+                                bias=False),
+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
+                            Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
+                            nn.ReLU()))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv_downsamples = []
+                for j in range(i + 1 - num_branches_pre):
+                    conv_downsamples.append(
+                        nn.Sequential(
+                            Conv2d(
+                                num_channels_pre_layer[-1],
+                                num_channels_pre_layer[-1],
+                                groups=num_channels_pre_layer[-1],
+                                kernel_size=3,
+                                stride=2,
+                                padding=1,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
+                            Conv2d(
+                                num_channels_pre_layer[-1],
+                                num_channels_cur_layer[i]
+                                if j == i - num_branches_pre else
+                                num_channels_pre_layer[-1],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]
+                                           if j == i - num_branches_pre else
+                                           num_channels_pre_layer[-1]),
+                            nn.ReLU()))
+                transition_layers.append(nn.Sequential(*conv_downsamples))
+        return nn.LayerList(transition_layers)
+
+    def _make_stage(self,
+                    stages_config,
+                    stage_idx,
+                    in_channels,
+                    multiscale_output,
+                    freeze_norm=False,
+                    norm_decay=0.):
+        num_modules = stages_config["num_modules"][stage_idx]
+        num_branches = stages_config["num_branches"][stage_idx]
+        num_blocks = stages_config["num_blocks"][stage_idx]
+        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
+        module_type = stages_config['module_type'][stage_idx]
+
+        modules = []
+        for i in range(num_modules):
+            if not multiscale_output and i == num_modules - 1:
+                reset_multiscale_output = False
+            else:
+                reset_multiscale_output = True
+            modules.append(
+                LiteHRNetModule(
+                    num_branches,
+                    num_blocks,
+                    in_channels,
+                    reduce_ratio,
+                    module_type,
+                    multiscale_output=reset_multiscale_output,
+                    with_fuse=True,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay))
+            in_channels = modules[-1].in_channels
+        return nn.Sequential(*modules), in_channels
+
+    def forward(self, x):
+        x = self.stem(x)
+
+        y_list = [x]
+        for stage_idx in range(3):
+            x_list = []
+            transition = getattr(self, 'transition{}'.format(stage_idx))
+            for j in range(self.stages_config["num_branches"][stage_idx]):
+                if transition[j] is not None:
+                    if j >= len(y_list):
+                        x_list.append(transition[j](y_list[-1]))
+                    else:
+                        x_list.append(transition[j](y_list[j]))
+                else:
+                    x_list.append(y_list[j])
+            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
+
+        if self.use_head:
+            y_list = self.head_layer(y_list)
+
+        res = []
+        for i, layer in enumerate(y_list):
+            if i == self.freeze_at:
+                layer.stop_gradient = True
+            if i in self.return_idx:
+                res.append(layer)
+        return res
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_18(**kwargs):
+    model = LiteHRNet(network_type="lite_18", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_30(**kwargs):
+    model = LiteHRNet(network_type="lite_30", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_naive(**kwargs):
+    model = LiteHRNet(network_type="naive", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_wider_naive(**kwargs):
+    model = LiteHRNet(network_type="wider_naive", **kwargs)
+    return model
--- a/paddlers/models/ppseg/models/backbones/mix_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/mix_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
    def __init__(self,
                 img_size=224,
                 patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                 num_classes=1000,
                 embed_dims=[64, 128, 256, 512],
                 num_heads=[1, 2, 4, 8],
@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
            img_size=img_size,
            patch_size=7,
            stride=4,
-            in_chans=in_chans,
+            in_chans=in_channels,
            embed_dim=embed_dims[0])
        self.patch_embed2 = OverlapPatchEmbed(
            img_size=img_size // 4,
--- a/paddlers/models/ppseg/models/backbones/mobilenetv2.py
+++ b/paddlers/models/ppseg/models/backbones/mobilenetv2.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -12,13 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import paddle
+from paddle import ParamAttr
 import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D

 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg import utils

+__all__ = [
+    "MobileNetV2_x0_25",
+    "MobileNetV2_x0_5",
+    "MobileNetV2_x0_75",
+    "MobileNetV2_x1_0",
+    "MobileNetV2_x1_5",
+    "MobileNetV2_x2_0",
+]
+

-@manager.BACKBONES.add_component
 class MobileNetV2(nn.Layer):
    """
        The MobileNetV2 implementation based on PaddlePaddle.
@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
        (https://arxiv.org/abs/1801.04381).

        Args:
-            channel_ratio (float, optional): The ratio of channel. Default: 1.0
-            min_channel (int, optional): The minimum of channel. Default: 16
+            scale (float, optional): The scale of channel. Default: 1.0
+            in_channels (int, optional): The channels of input image. Default: 3.
            pretrained (str, optional): The path or url of pretrained model. Default: None
        """

-    def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
-        super(MobileNetV2, self).__init__()
-        self.channel_ratio = channel_ratio
-        self.min_channel = min_channel
+    def __init__(self, scale=1.0, in_channels=3, pretrained=None):
+        super().__init__()
+        self.scale = scale
        self.pretrained = pretrained
+        prefix_name = ""

-        self.stage0 = conv_bn(3, self.depth(32), 3, 2)
-
-        self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
-
-        self.stage2 = nn.Sequential(
-            InvertedResidual(self.depth(16), self.depth(24), 2, 6),
-            InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
-
-        self.stage3 = nn.Sequential(
-            InvertedResidual(self.depth(24), self.depth(32), 2, 6),
-            InvertedResidual(self.depth(32), self.depth(32), 1, 6),
-            InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
+        bottleneck_params_list = [
+            (1, 16, 1, 1),
+            (6, 24, 2, 2),  # x4
+            (6, 32, 3, 2),  # x8
+            (6, 64, 4, 2),
+            (6, 96, 3, 1),  # x16
+            (6, 160, 3, 2),
+            (6, 320, 1, 1),  # x32
+        ]
+        self.out_index = [1, 2, 4, 6]

-        self.stage4 = nn.Sequential(
-            InvertedResidual(self.depth(32), self.depth(64), 2, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
+        self.conv1 = ConvBNLayer(
+            num_channels=in_channels,
+            num_filters=int(32 * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            name=prefix_name + "conv1_1")

-        self.stage5 = nn.Sequential(
-            InvertedResidual(self.depth(64), self.depth(96), 1, 6),
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6),
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
+        self.block_list = []
+        i = 1
+        in_c = int(32 * scale)
+        for layer_setting in bottleneck_params_list:
+            t, c, n, s = layer_setting
+            i += 1
+            block = self.add_sublayer(
+                prefix_name + "conv" + str(i),
+                sublayer=InvresiBlocks(
+                    in_c=in_c,
+                    t=t,
+                    c=int(c * scale),
+                    n=n,
+                    s=s,
+                    name=prefix_name + "conv" + str(i)))
+            self.block_list.append(block)
+            in_c = int(c * scale)

-        self.stage6 = nn.Sequential(
-            InvertedResidual(self.depth(96), self.depth(160), 2, 6),
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6),
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
-
-        self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
+        out_channels = [
+            bottleneck_params_list[idx][1] for idx in self.out_index
+        ]
+        self.feat_channels = [int(c * scale) for c in out_channels]

        self.init_weight()

-    def depth(self, channels):
-        min_channel = min(channels, self.min_channel)
-        return max(min_channel, int(channels * self.channel_ratio))
-
-    def forward(self, x):
+    def forward(self, inputs):
        feat_list = []

-        feature_1_2 = self.stage0(x)
-        feature_1_2 = self.stage1(feature_1_2)
-        feature_1_4 = self.stage2(feature_1_2)
-        feature_1_8 = self.stage3(feature_1_4)
-        feature_1_16 = self.stage4(feature_1_8)
-        feature_1_16 = self.stage5(feature_1_16)
-        feature_1_32 = self.stage6(feature_1_16)
-        feature_1_32 = self.stage7(feature_1_32)
-        feat_list.append(feature_1_4)
-        feat_list.append(feature_1_8)
-        feat_list.append(feature_1_16)
-        feat_list.append(feature_1_32)
+        y = self.conv1(inputs, if_act=True)
+        for idx, block in enumerate(self.block_list):
+            y = block(y)
+            if idx in self.out_index:
+                feat_list.append(y)
+
        return feat_list

    def init_weight(self):
@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
            utils.load_entire_model(self, self.pretrained)


-def conv_bn(inp, oup, kernel, stride):
-    return nn.Sequential(
-        nn.Conv2D(
-            in_channels=inp,
-            out_channels=oup,
-            kernel_size=kernel,
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 filter_size,
+                 num_filters,
+                 stride,
+                 padding,
+                 channels=None,
+                 num_groups=1,
+                 name=None,
+                 use_cudnn=True):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            num_filters,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs, if_act=True):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        if if_act:
+            y = F.relu6(y)
+        return y
+
+
+class InvertedResidualUnit(nn.Layer):
+    def __init__(self, num_channels, num_in_filter, num_filters, stride,
+                 filter_size, padding, expansion_factor, name):
+        super(InvertedResidualUnit, self).__init__()
+        num_expfilter = int(round(num_in_filter * expansion_factor))
+        self._expand_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_expfilter,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_expand")
+
+        self._bottleneck_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_expfilter,
+            filter_size=filter_size,
            stride=stride,
-            padding=(kernel - 1) // 2,
-            bias_attr=False),
-        nn.BatchNorm2D(
-            num_features=oup, epsilon=1e-05, momentum=0.1),
-        nn.ReLU())
-
-
-class InvertedResidual(nn.Layer):
-    def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-        self.use_res_connect = self.stride == 1 and inp == oup
-
-        self.conv = nn.Sequential(
-            nn.Conv2D(
-                inp,
-                inp * expand_ratio,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
-            nn.ReLU(),
-            nn.Conv2D(
-                inp * expand_ratio,
-                inp * expand_ratio,
-                kernel_size=3,
-                stride=stride,
-                padding=dilation,
-                dilation=dilation,
-                groups=inp * expand_ratio,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
-            nn.ReLU(),
-            nn.Conv2D(
-                inp * expand_ratio,
-                oup,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=oup, epsilon=1e-05, momentum=0.1), )
-
-    def forward(self, x):
-        if self.use_res_connect:
-            return x + self.conv(x)
-        else:
-            return self.conv(x)
+            padding=padding,
+            num_groups=num_expfilter,
+            use_cudnn=False,
+            name=name + "_dwise")
+
+        self._linear_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_filters,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_linear")
+
+    def forward(self, inputs, ifshortcut):
+        y = self._expand_conv(inputs, if_act=True)
+        y = self._bottleneck_conv(y, if_act=True)
+        y = self._linear_conv(y, if_act=False)
+        if ifshortcut:
+            y = paddle.add(inputs, y)
+        return y
+
+
+class InvresiBlocks(nn.Layer):
+    def __init__(self, in_c, t, c, n, s, name):
+        super(InvresiBlocks, self).__init__()
+
+        self._first_block = InvertedResidualUnit(
+            num_channels=in_c,
+            num_in_filter=in_c,
+            num_filters=c,
+            stride=s,
+            filter_size=3,
+            padding=1,
+            expansion_factor=t,
+            name=name + "_1")
+
+        self._block_list = []
+        for i in range(1, n):
+            block = self.add_sublayer(
+                name + "_" + str(i + 1),
+                sublayer=InvertedResidualUnit(
+                    num_channels=c,
+                    num_in_filter=c,
+                    num_filters=c,
+                    stride=1,
+                    filter_size=3,
+                    padding=1,
+                    expansion_factor=t,
+                    name=name + "_" + str(i + 1)))
+            self._block_list.append(block)
+
+    def forward(self, inputs):
+        y = self._first_block(inputs, ifshortcut=False)
+        for block in self._block_list:
+            y = block(y, ifshortcut=True)
+        return y
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_25(**kwargs):
+    model = MobileNetV2(scale=0.25, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_5(**kwargs):
+    model = MobileNetV2(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_75(**kwargs):
+    model = MobileNetV2(scale=0.75, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x1_0(**kwargs):
+    model = MobileNetV2(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x1_5(**kwargs):
+    model = MobileNetV2(scale=1.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x2_0(**kwargs):
+    model = MobileNetV2(scale=2.0, **kwargs)
+    return model
--- a/paddlers/models/ppseg/models/backbones/mobilenetv3.py
+++ b/paddlers/models/ppseg/models/backbones/mobilenetv3.py
@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -14,10 +14,12 @@

 import paddle
 import paddle.nn as nn
-import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear

 from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddlers.models.ppseg.utils import utils, logger
 from paddlers.models.ppseg.models import layers

 __all__ = [
@ -28,8 +30,92 @@ __all__ = [
    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
 ]

-
-def make_divisible(v, divisor=8, min_value=None):
+MODEL_STAGES_PATTERN = {
+    "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
+    "MobileNetV3_large":
+    ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
+}
+
+# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
+# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
+# k: kernel_size
+# exp: middle channel number in depthwise block
+# c: output channel number in depthwise block
+# se: whether to use SE block
+# act: which activation to use
+# s: stride in depthwise block
+# d: dilation rate in depthwise block
+NET_CONFIG = {
+    "large": [
+        # k, exp, c, se, act, s
+        [3, 16, 16, False, "relu", 1],
+        [3, 64, 24, False, "relu", 2],
+        [3, 72, 24, False, "relu", 1],  # x4
+        [5, 72, 40, True, "relu", 2],
+        [5, 120, 40, True, "relu", 1],
+        [5, 120, 40, True, "relu", 1],  # x8
+        [3, 240, 80, False, "hardswish", 2],
+        [3, 200, 80, False, "hardswish", 1],
+        [3, 184, 80, False, "hardswish", 1],
+        [3, 184, 80, False, "hardswish", 1],
+        [3, 480, 112, True, "hardswish", 1],
+        [3, 672, 112, True, "hardswish", 1],  # x16
+        [5, 672, 160, True, "hardswish", 2],
+        [5, 960, 160, True, "hardswish", 1],
+        [5, 960, 160, True, "hardswish", 1],  # x32
+    ],
+    "small": [
+        # k, exp, c, se, act, s
+        [3, 16, 16, True, "relu", 2],
+        [3, 72, 24, False, "relu", 2],
+        [3, 88, 24, False, "relu", 1],
+        [5, 96, 40, True, "hardswish", 2],
+        [5, 240, 40, True, "hardswish", 1],
+        [5, 240, 40, True, "hardswish", 1],
+        [5, 120, 48, True, "hardswish", 1],
+        [5, 144, 48, True, "hardswish", 1],
+        [5, 288, 96, True, "hardswish", 2],
+        [5, 576, 96, True, "hardswish", 1],
+        [5, 576, 96, True, "hardswish", 1],
+    ],
+    "large_os8": [
+        # k, exp, c, se, act, s, {d}
+        [3, 16, 16, False, "relu", 1],
+        [3, 64, 24, False, "relu", 2],
+        [3, 72, 24, False, "relu", 1],  # x4
+        [5, 72, 40, True, "relu", 2],
+        [5, 120, 40, True, "relu", 1],
+        [5, 120, 40, True, "relu", 1],  # x8
+        [3, 240, 80, False, "hardswish", 1],
+        [3, 200, 80, False, "hardswish", 1, 2],
+        [3, 184, 80, False, "hardswish", 1, 2],
+        [3, 184, 80, False, "hardswish", 1, 2],
+        [3, 480, 112, True, "hardswish", 1, 2],
+        [3, 672, 112, True, "hardswish", 1, 2],
+        [5, 672, 160, True, "hardswish", 1, 2],
+        [5, 960, 160, True, "hardswish", 1, 4],
+        [5, 960, 160, True, "hardswish", 1, 4],
+    ],
+    "small_os8": [
+        # k, exp, c, se, act, s, {d}
+        [3, 16, 16, True, "relu", 2],
+        [3, 72, 24, False, "relu", 2],
+        [3, 88, 24, False, "relu", 1],
+        [5, 96, 40, True, "hardswish", 1],
+        [5, 240, 40, True, "hardswish", 1, 2],
+        [5, 240, 40, True, "hardswish", 1, 2],
+        [5, 120, 48, True, "hardswish", 1, 2],
+        [5, 144, 48, True, "hardswish", 1, 2],
+        [5, 288, 96, True, "hardswish", 1, 2],
+        [5, 576, 96, True, "hardswish", 1, 4],
+        [5, 576, 96, True, "hardswish", 1, 4],
+    ]
+}
+
+OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
+
+
+def _make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
    return new_v


-class MobileNetV3(nn.Layer):
-    """
-    The MobileNetV3 implementation based on PaddlePaddle.
+def _create_act(act):
+    if act == "hardswish":
+        return nn.Hardswish()
+    elif act == "relu":
+        return nn.ReLU()
+    elif act is None:
+        return None
+    else:
+        raise RuntimeError(
+            "The activation function is not supported: {}".format(act))

-    The original article refers to Jingdong
-    Andrew Howard, et, al. "Searching for MobileNetV3"
-    (https://arxiv.org/pdf/1905.02244.pdf).

+class MobileNetV3(nn.Layer):
+    """
+    MobileNetV3
    Args:
-        pretrained (str, optional): The path of pretrained model.
-        scale (float, optional): The scale of channels . Default: 1.0.
-        model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
-        output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
-
+        config: list. MobileNetV3 depthwise blocks config.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        scale: float=1.0. The coefficient that controls the size of network parameters. 
+    Returns:
+        model: nn.Layer. Specific MobileNetV3 model depends on args.
    """

    def __init__(self,
-                 pretrained=None,
+                 config,
+                 stages_pattern,
+                 out_index,
+                 in_channels=3,
                 scale=1.0,
-                 model_name="small",
-                 output_stride=None):
-        super(MobileNetV3, self).__init__()
+                 pretrained=None):
+        super().__init__()

+        self.cfg = config
+        self.out_index = out_index
+        self.scale = scale
+        self.pretrained = pretrained
        inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
-                [3, 240, 80, False, "hard_swish", 2],
-                [3, 200, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 480, 112, True, "hard_swish", 1],
-                [3, 672, 112, True, "hard_swish",
-                 1],  # output 3 -> out_index=11
-                [5, 672, 160, True, "hard_swish", 2],
-                [5, 960, 160, True, "hard_swish", 1],
-                [5, 960, 160, True, "hard_swish",
-                 1],  # output 3 -> out_index=14
-            ]
-            self.out_indices = [2, 5, 11, 14]
-            self.feat_channels = [
-                make_divisible(i * scale) for i in [24, 40, 112, 160]
-            ]
-
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
-                [5, 96, 40, True, "hard_swish", 2],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 120, 48, True, "hard_swish", 1],
-                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
-                [5, 288, 96, True, "hard_swish", 2],
-                [5, 576, 96, True, "hard_swish", 1],
-                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
-            ]
-            self.out_indices = [0, 3, 7, 10]
-            self.feat_channels = [
-                make_divisible(i * scale) for i in [16, 24, 48, 96]
-            ]
-
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError(
-                "mode[{}_model] is not implemented!".format(model_name))
-
-        ###################################################
-        # modify stride and dilation based on output_stride
-        self.dilation_cfg = [1] * len(self.cfg)
-        self.modify_bottle_params(output_stride=output_stride)
-        ###################################################
-
-        self.conv1 = ConvBNLayer(
-            in_c=3,
-            out_c=make_divisible(inplanes * scale),
+
+        self.conv = ConvBNLayer(
+            in_c=in_channels,
+            out_c=_make_divisible(inplanes * self.scale),
            filter_size=3,
            stride=2,
            padding=1,
            num_groups=1,
            if_act=True,
-            act="hard_swish")
-
-        self.block_list = []
-
-        inplanes = make_divisible(inplanes * scale)
-        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
-            ######################################
-            # add dilation rate
-            dilation_rate = self.dilation_cfg[i]
-            ######################################
-            self.block_list.append(
-                ResidualUnit(
-                    in_c=inplanes,
-                    mid_c=make_divisible(scale * exp),
-                    out_c=make_divisible(scale * c),
-                    filter_size=k,
-                    stride=s,
-                    dilation=dilation_rate,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2)))
-            self.add_sublayer(
-                sublayer=self.block_list[-1], name="conv" + str(i + 2))
-            inplanes = make_divisible(scale * c)
-
-        self.pretrained = pretrained
+            act="hardswish")
+        self.blocks = nn.Sequential(*[
+            ResidualUnit(
+                in_c=_make_divisible(inplanes * self.scale if i == 0 else
+                                     self.cfg[i - 1][2] * self.scale),
+                mid_c=_make_divisible(self.scale * exp),
+                out_c=_make_divisible(self.scale * c),
+                filter_size=k,
+                stride=s,
+                use_se=se,
+                act=act,
+                dilation=td[0] if td else 1)
+            for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
+        ])
+
+        out_channels = [config[idx][2] for idx in self.out_index]
+        self.feat_channels = [
+            _make_divisible(self.scale * c) for c in out_channels
+        ]
+
+        self.init_res(stages_pattern)
        self.init_weight()

-    def modify_bottle_params(self, output_stride=None):
-
-        if output_stride is not None and output_stride % 2 != 0:
-            raise ValueError("output stride must to be even number")
-        if output_stride is not None:
-            stride = 2
-            rate = 1
-            for i, _cfg in enumerate(self.cfg):
-                stride = stride * _cfg[-1]
-                if stride > output_stride:
-                    rate = rate * _cfg[-1]
-                    self.cfg[i][-1] = 1
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def init_res(self, stages_pattern, return_patterns=None,
+                 return_stages=None):
+        if return_patterns and return_stages:
+            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
+            logger.warning(msg)
+            return_stages = None
+
+        if return_stages is True:
+            return_patterns = stages_pattern
+        # return_stages is int or bool
+        if type(return_stages) is int:
+            return_stages = [return_stages]
+        if isinstance(return_stages, list):
+            if max(return_stages) > len(stages_pattern) or min(
+                    return_stages) < 0:
+                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
+                logger.warning(msg)
+                return_stages = [
+                    val for val in return_stages
+                    if val >= 0 and val < len(stages_pattern)
+                ]
+            return_patterns = [stages_pattern[i] for i in return_stages]

-                self.dilation_cfg[i] = rate
+    def forward(self, x):
+        x = self.conv(x)

-    def forward(self, inputs, label=None):
-        x = self.conv1(inputs)
-        # A feature list saves each downsampling feature.
        feat_list = []
-        for i, block in enumerate(self.block_list):
+        for idx, block in enumerate(self.blocks):
            x = block(x)
-            if i in self.out_indices:
+            if idx in self.out_index:
                feat_list.append(x)

        return feat_list

-    def init_weight(self):
-        if self.pretrained is not None:
-            utils.load_pretrained_model(self, self.pretrained)
-

 class ConvBNLayer(nn.Layer):
    def __init__(self,
@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
                 filter_size,
                 stride,
                 padding,
-                 dilation=1,
                 num_groups=1,
                 if_act=True,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
+                 act=None,
+                 dilation=1):
+        super().__init__()

-        self.conv = nn.Conv2D(
+        self.conv = Conv2D(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=filter_size,
            stride=stride,
            padding=padding,
-            dilation=dilation,
            groups=num_groups,
-            bias_attr=False)
-        self.bn = layers.SyncBatchNorm(
-            num_features=out_c,
-            weight_attr=paddle.ParamAttr(
-                regularizer=paddle.regularizer.L2Decay(0.0)),
-            bias_attr=paddle.ParamAttr(
-                regularizer=paddle.regularizer.L2Decay(0.0)))
-        self._act_op = layers.Activation(act='hardswish')
+            bias_attr=False,
+            dilation=dilation)
+        self.bn = BatchNorm(
+            num_channels=out_c,
+            act=None,
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        self.if_act = if_act
+        self.act = _create_act(act)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if self.if_act:
-            x = self._act_op(x)
+            x = self.act(x)
        return x


@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
                 filter_size,
                 stride,
                 use_se,
-                 dilation=1,
                 act=None,
-                 name=''):
-        super(ResidualUnit, self).__init__()
+                 dilation=1):
+        super().__init__()
        self.if_shortcut = stride == 1 and in_c == out_c
        self.if_se = use_se

@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
            padding=0,
            if_act=True,
            act=act)
-
        self.bottleneck_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=mid_c,
            filter_size=filter_size,
            stride=stride,
-            padding='same',
-            dilation=dilation,
+            padding=int((filter_size - 1) // 2) * dilation,
            num_groups=mid_c,
            if_act=True,
-            act=act)
+            act=act,
+            dilation=dilation)
        if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
+            self.mid_se = SEModule(mid_c)
        self.linear_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=out_c,
@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
            padding=0,
            if_act=False,
            act=None)
-        self.dilation = dilation

-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
+    def forward(self, x):
+        identity = x
+        x = self.expand_conv(x)
        x = self.bottleneck_conv(x)
        if self.if_se:
            x = self.mid_se(x)
        x = self.linear_conv(x)
        if self.if_shortcut:
-            x = inputs + x
+            x = paddle.add(identity, x)
        return x


+# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
+class Hardsigmoid(nn.Layer):
+    def __init__(self, slope=0.2, offset=0.5):
+        super().__init__()
+        self.slope = slope
+        self.offset = offset
+
+    def forward(self, x):
+        return nn.functional.hardsigmoid(
+            x, slope=self.slope, offset=self.offset)
+
+
 class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.conv1 = nn.Conv2D(
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
            in_channels=channel,
            out_channels=channel // reduction,
            kernel_size=1,
            stride=1,
            padding=0)
-        self.conv2 = nn.Conv2D(
+        self.relu = nn.ReLU()
+        self.conv2 = Conv2D(
            in_channels=channel // reduction,
            out_channels=channel,
            kernel_size=1,
            stride=1,
            padding=0)
+        self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)

-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = F.hardsigmoid(outputs)
-        return paddle.multiply(x=inputs, y=outputs)
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        return paddle.multiply(x=identity, y=x)


+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_35(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.35,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_5(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.5,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_75(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.75,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
    return model


@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_0(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_25(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=1.25,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_35(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.35,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_5(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.5,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_75(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.75,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
    return model


@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_0(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
    return model


+@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_25(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=1.25,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_large_x1_0_os8(**kwargs):
+    model = MobileNetV3(
+        config=NET_CONFIG["large_os8"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_small_x1_0_os8(**kwargs):
+    model = MobileNetV3(
+        config=NET_CONFIG["small_os8"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
    return model
--- a/paddlers/models/ppseg/models/backbones/resnet_vd.py
+++ b/paddlers/models/ppseg/models/backbones/resnet_vd.py
@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
        layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
        output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
        multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path of pretrained model.

    """

    def __init__(self,
-                 input_channel=3,
                 layers=50,
                 output_stride=8,
                 multi_grid=(1, 1, 1),
+                 in_channels=3,
                 pretrained=None,
                 data_format='NCHW'):
        super(ResNet_vd, self).__init__()
@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
            dilation_dict = {3: 2}

        self.conv1_1 = ConvBNLayer(
-            in_channels=input_channel,
+            in_channels=in_channels,
            out_channels=32,
            kernel_size=3,
            stride=2,
--- a/paddlers/models/ppseg/models/backbones/shufflenetv2.py
+++ b/paddlers/models/ppseg/models/backbones/shufflenetv2.py
@ -0,0 +1,315 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import ParamAttr, reshape, transpose, concat, split
+from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
+from paddle.nn.initializer import KaimingNormal
+from paddle.nn.functional import swish
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils, logger
+
+__all__ = [
+    'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
+    'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
+    'ShuffleNetV2_swish'
+]
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    # reshape
+    x = reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+
+    # transpose
+    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
+
+    # flatten
+    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class ConvBNLayer(Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            groups=1,
+            act=None,
+            name=None, ):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=KaimingNormal(), name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            out_channels,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            act=act,
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class InvertedResidual(Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu",
+                 name=None):
+        super(InvertedResidual, self).__init__()
+        self._conv_pw = ConvBNLayer(
+            in_channels=in_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv1')
+        self._conv_dw = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None,
+            name='stage_' + name + '_conv2')
+        self._conv_linear = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv3')
+
+    def forward(self, inputs):
+        x1, x2 = split(
+            inputs,
+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
+            axis=1)
+        x2 = self._conv_pw(x2)
+        x2 = self._conv_dw(x2)
+        x2 = self._conv_linear(x2)
+        out = concat([x1, x2], axis=1)
+        return channel_shuffle(out, 2)
+
+
+class InvertedResidualDS(Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu",
+                 name=None):
+        super(InvertedResidualDS, self).__init__()
+
+        # branch1
+        self._conv_dw_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=in_channels,
+            act=None,
+            name='stage_' + name + '_conv4')
+        self._conv_linear_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv5')
+        # branch2
+        self._conv_pw_2 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv1')
+        self._conv_dw_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None,
+            name='stage_' + name + '_conv2')
+        self._conv_linear_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv3')
+
+    def forward(self, inputs):
+        x1 = self._conv_dw_1(inputs)
+        x1 = self._conv_linear_1(x1)
+        x2 = self._conv_pw_2(inputs)
+        x2 = self._conv_dw_2(x2)
+        x2 = self._conv_linear_2(x2)
+        out = concat([x1, x2], axis=1)
+
+        return channel_shuffle(out, 2)
+
+
+class ShuffleNet(Layer):
+    def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
+        super(ShuffleNet, self).__init__()
+        self.scale = scale
+        self.pretrained = pretrained
+        stage_repeats = [4, 8, 4]
+
+        if scale == 0.25:
+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
+        elif scale == 0.33:
+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
+        elif scale == 0.5:
+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif scale == 1.0:
+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif scale == 1.5:
+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif scale == 2.0:
+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
+        else:
+            raise NotImplementedError("This scale size:[" + str(scale) +
+                                      "] is not implemented!")
+
+        self.out_index = [3, 11, 15]
+        self.feat_channels = stage_out_channels[1:5]
+
+        # 1. conv1
+        self._conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=stage_out_channels[1],
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            act=act,
+            name='stage1_conv')
+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        # 2. bottleneck sequences
+        self._block_list = []
+        for stage_id, num_repeat in enumerate(stage_repeats):
+            for i in range(num_repeat):
+                if i == 0:
+                    block = self.add_sublayer(
+                        name=str(stage_id + 2) + '_' + str(i + 1),
+                        sublayer=InvertedResidualDS(
+                            in_channels=stage_out_channels[stage_id + 1],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=2,
+                            act=act,
+                            name=str(stage_id + 2) + '_' + str(i + 1)))
+                else:
+                    block = self.add_sublayer(
+                        name=str(stage_id + 2) + '_' + str(i + 1),
+                        sublayer=InvertedResidual(
+                            in_channels=stage_out_channels[stage_id + 2],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=1,
+                            act=act,
+                            name=str(stage_id + 2) + '_' + str(i + 1)))
+                self._block_list.append(block)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, inputs):
+        feat_list = []
+
+        y = self._conv1(inputs)
+        y = self._max_pool(y)
+        feat_list.append(y)
+
+        for idx, inv in enumerate(self._block_list):
+            y = inv(y)
+            if idx in self.out_index:
+                feat_list.append(y)
+        return feat_list
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_25(**kwargs):
+    model = ShuffleNet(scale=0.25, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_33(**kwargs):
+    model = ShuffleNet(scale=0.33, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_5(**kwargs):
+    model = ShuffleNet(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x1_0(**kwargs):
+    model = ShuffleNet(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x1_5(**kwargs):
+    model = ShuffleNet(scale=1.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x2_0(**kwargs):
+    model = ShuffleNet(scale=2.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_swish(**kwargs):
+    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
+    return model
--- a/paddlers/models/ppseg/models/backbones/stdcnet.py
+++ b/paddlers/models/ppseg/models/backbones/stdcnet.py
@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
        layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
        block_num(int,optional): block_num of features block. Default: 4.
        type(str,optional): feature fusion method "cat"/"add". Default: "cat".
-        num_classes(int, optional): class number for image classification. Default: 1000.
-        dropout(float,optional): dropout ratio. if >0,use dropout ratio.  Default: 0.20.
-        use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
+        relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective 
+            learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0. 
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained(str, optional): the path of pretrained model.
    """

@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
                 layers=[4, 5, 3],
                 block_num=4,
                 type="cat",
-                 num_classes=1000,
-                 dropout=0.20,
-                 use_conv_last=False,
+                 relative_lr=1.0,
+                 in_channels=3,
                 pretrained=None):
        super(STDCNet, self).__init__()
        if type == "cat":
            block = CatBottleneck
        elif type == "add":
            block = AddBottleneck
-        self.use_conv_last = use_conv_last
-        self.features = self._make_layers(base, layers, block_num, block)
-        self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
-
-        if (layers == [4, 5, 3]):  #stdc1446
-            self.x2 = nn.Sequential(self.features[:1])
-            self.x4 = nn.Sequential(self.features[1:2])
-            self.x8 = nn.Sequential(self.features[2:6])
-            self.x16 = nn.Sequential(self.features[6:11])
-            self.x32 = nn.Sequential(self.features[11:])
-        elif (layers == [2, 2, 2]):  #stdc813
-            self.x2 = nn.Sequential(self.features[:1])
-            self.x4 = nn.Sequential(self.features[1:2])
-            self.x8 = nn.Sequential(self.features[2:4])
-            self.x16 = nn.Sequential(self.features[4:6])
-            self.x32 = nn.Sequential(self.features[6:])
-        else:
-            raise NotImplementedError(
-                "model with layers:{} is not implemented!".format(layers))
+        self.layers = layers
+        self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
+        self.features = self._make_layers(in_channels, base, layers, block_num,
+                                          block, relative_lr)

        self.pretrained = pretrained
        self.init_weight()
@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
        """
        forward function for feature extract.
        """
-        feat2 = self.x2(x)
-        feat4 = self.x4(feat2)
-        feat8 = self.x8(feat4)
-        feat16 = self.x16(feat8)
-        feat32 = self.x32(feat16)
-        if self.use_conv_last:
-            feat32 = self.conv_last(feat32)
-        return feat2, feat4, feat8, feat16, feat32
-
-    def _make_layers(self, base, layers, block_num, block):
+        out_feats = []
+
+        x = self.features[0](x)
+        out_feats.append(x)
+        x = self.features[1](x)
+        out_feats.append(x)
+
+        idx = [[2, 2 + self.layers[0]],
+               [2 + self.layers[0], 2 + sum(self.layers[0:2])],
+               [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
+        for start_idx, end_idx in idx:
+            for i in range(start_idx, end_idx):
+                x = self.features[i](x)
+            out_feats.append(x)
+
+        return out_feats
+
+    def _make_layers(self, in_channels, base, layers, block_num, block,
+                     relative_lr):
        features = []
-        features += [ConvBNRelu(3, base // 2, 3, 2)]
-        features += [ConvBNRelu(base // 2, base, 3, 2)]
+        features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
+        features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]

        for i, layer in enumerate(layers):
            for j in range(layer):
                if i == 0 and j == 0:
-                    features.append(block(base, base * 4, block_num, 2))
+                    features.append(
+                        block(base, base * 4, block_num, 2, relative_lr))
                elif j == 0:
                    features.append(
                        block(base * int(math.pow(2, i + 1)), base * int(
-                            math.pow(2, i + 2)), block_num, 2))
+                            math.pow(2, i + 2)), block_num, 2, relative_lr))
                else:
                    features.append(
                        block(base * int(math.pow(2, i + 2)), base * int(
-                            math.pow(2, i + 2)), block_num, 1))
+                            math.pow(2, i + 2)), block_num, 1, relative_lr))

        return nn.Sequential(*features)

@ -125,16 +119,24 @@ class STDCNet(nn.Layer):


 class ConvBNRelu(nn.Layer):
-    def __init__(self, in_planes, out_planes, kernel=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 kernel=3,
+                 stride=1,
+                 relative_lr=1.0):
        super(ConvBNRelu, self).__init__()
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
        self.conv = nn.Conv2D(
            in_planes,
            out_planes,
            kernel_size=kernel,
            stride=stride,
            padding=kernel // 2,
+            weight_attr=param_attr,
            bias_attr=False)
-        self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
+        self.bn = nn.BatchNorm2D(
+            out_planes, weight_attr=param_attr, bias_attr=param_attr)
        self.relu = nn.ReLU()

    def forward(self, x):
@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):


 class AddBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 block_num=3,
+                 stride=1,
+                 relative_lr=1.0):
        super(AddBottleneck, self).__init__()
        assert block_num > 1, "block number should be larger than 1."
        self.conv_list = nn.LayerList()
        self.stride = stride
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
        if stride == 2:
            self.avd_layer = nn.Sequential(
                nn.Conv2D(
@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
                    stride=2,
                    padding=1,
                    groups=out_planes // 2,
+                    weight_attr=param_attr,
                    bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
+                    out_planes // 2,
+                    weight_attr=param_attr,
+                    bias_attr=param_attr), )
            self.skip = nn.Sequential(
                nn.Conv2D(
                    in_planes,
@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
                    stride=2,
                    padding=1,
                    groups=in_planes,
+                    weight_attr=param_attr,
                    bias_attr=False),
-                nn.BatchNorm2D(in_planes),
+                nn.BatchNorm2D(
+                    in_planes, weight_attr=param_attr, bias_attr=param_attr),
                nn.Conv2D(
-                    in_planes, out_planes, kernel_size=1, bias_attr=False),
-                nn.BatchNorm2D(out_planes), )
+                    in_planes,
+                    out_planes,
+                    kernel_size=1,
+                    bias_attr=False,
+                    weight_attr=param_attr),
+                nn.BatchNorm2D(
+                    out_planes, weight_attr=param_attr, bias_attr=param_attr), )
            stride = 1

        for idx in range(block_num):
            if idx == 0:
                self.conv_list.append(
                    ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
+                        out_planes // 2,
+                        kernel=1,
+                        relative_lr=relative_lr))
            elif idx == 1 and block_num == 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
+                        out_planes // 2,
+                        stride=stride,
+                        relative_lr=relative_lr))
            elif idx == 1 and block_num > 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
+                        out_planes // 4,
+                        stride=stride,
+                        relative_lr=relative_lr))
            elif idx < block_num - 1:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx + 1))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx + 1)),
+                        relative_lr=relative_lr))
            else:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx))))
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)),
+                               out_planes // int(math.pow(2, idx))),
+                    relative_lr=relative_lr)

    def forward(self, x):
        out_list = []
@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):


 class CatBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 block_num=3,
+                 stride=1,
+                 relative_lr=1.0):
        super(CatBottleneck, self).__init__()
        assert block_num > 1, "block number should be larger than 1."
        self.conv_list = nn.LayerList()
        self.stride = stride
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
        if stride == 2:
            self.avd_layer = nn.Sequential(
                nn.Conv2D(
@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
                    stride=2,
                    padding=1,
                    groups=out_planes // 2,
+                    weight_attr=param_attr,
                    bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
+                    out_planes // 2,
+                    weight_attr=param_attr,
+                    bias_attr=param_attr), )
            self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
            stride = 1

@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
            if idx == 0:
                self.conv_list.append(
                    ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
+                        out_planes // 2,
+                        kernel=1,
+                        relative_lr=relative_lr))
            elif idx == 1 and block_num == 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
+                        out_planes // 2,
+                        stride=stride,
+                        relative_lr=relative_lr))
            elif idx == 1 and block_num > 2:
                self.conv_list.append(
                    ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
+                        out_planes // 4,
+                        stride=stride,
+                        relative_lr=relative_lr))
            elif idx < block_num - 1:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx + 1))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx + 1)),
+                        relative_lr=relative_lr))
            else:
                self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx)),
+                        relative_lr=relative_lr))

    def forward(self, x):
        out_list = []
--- a/paddlers/models/ppseg/models/backbones/swin_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/swin_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
    Args:
        pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
        patch_size (int | tuple(int)): Patch size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
+        in_channels (int): Number of input image channels. Default: 3.
        embed_dim (int): Number of linear projection output channels. Default: 96.
        depths (tuple[int]): Depths of each Swin Transformer stage.
        num_heads (tuple[int]): Number of attention head of each stage.
@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
    def __init__(self,
                 pretrain_img_size=224,
                 patch_size=4,
-                 in_chans=3,
+                 in_channels=3,
                 embed_dim=96,
                 depths=[2, 2, 6, 2],
                 num_heads=[3, 6, 12, 24],
@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
            embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)

--- a/paddlers/models/ppseg/models/backbones/top_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/top_transformer.py
@ -0,0 +1,716 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
+"""
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath
+
+__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
+
+
+def make_divisible(val, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * val:
+        new_v += divisor
+    return new_v
+
+
+class HSigmoid(nn.Layer):
+    def __init__(self, inplace=True):
+        super().__init__()
+        self.relu = nn.ReLU6()
+
+    def forward(self, x):
+        return self.relu(x + 3) / 6
+
+
+class Conv2DBN(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ks=1,
+                 stride=1,
+                 pad=0,
+                 dilation=1,
+                 groups=1,
+                 bn_weight_init=1,
+                 lr_mult=1.0):
+        super().__init__()
+        conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.c = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=ks,
+            stride=stride,
+            padding=pad,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=conv_weight_attr,
+            bias_attr=False)
+        bn_weight_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(bn_weight_init),
+            learning_rate=lr_mult)
+        bn_bias_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
+        self.bn = nn.BatchNorm2D(
+            out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
+
+    def forward(self, inputs):
+        out = self.c(inputs)
+        out = self.bn(out)
+        return out
+
+
+class ConvBNAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0,
+                 groups=1,
+                 norm=nn.BatchNorm2D,
+                 act=None,
+                 bias_attr=False,
+                 lr_mult=1.0):
+        super(ConvBNAct, self).__init__()
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=param_attr,
+            bias_attr=param_attr if bias_attr else False)
+        self.act = act() if act is not None else Identity()
+        self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
+            if norm is not None else Identity()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.act(x)
+        return x
+
+
+class MLP(nn.Layer):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.ReLU,
+                 drop=0.,
+                 lr_mult=1.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.dwconv = nn.Conv2D(
+            hidden_features,
+            hidden_features,
+            3,
+            1,
+            1,
+            groups=hidden_features,
+            weight_attr=param_attr,
+            bias_attr=param_attr)
+        self.act = act_layer()
+        self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class InvertedResidual(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 expand_ratio,
+                 activations=None,
+                 lr_mult=1.0):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2], "The stride should be 1 or 2."
+
+        if activations is None:
+            activations = nn.ReLU
+
+        hidden_dim = int(round(in_channels * expand_ratio))
+        self.use_res_connect = stride == 1 and in_channels == out_channels
+
+        layers = []
+        if expand_ratio != 1:
+            layers.append(
+                Conv2DBN(
+                    in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
+            layers.append(activations())
+        layers.extend([
+            Conv2DBN(
+                hidden_dim,
+                hidden_dim,
+                ks=kernel_size,
+                stride=stride,
+                pad=kernel_size // 2,
+                groups=hidden_dim,
+                lr_mult=lr_mult), activations(), Conv2DBN(
+                    hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
+        ])
+        self.conv = nn.Sequential(*layers)
+        self.out_channels = out_channels
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class TokenPyramidModule(nn.Layer):
+    def __init__(self,
+                 cfgs,
+                 out_indices,
+                 in_channels=3,
+                 inp_channel=16,
+                 activation=nn.ReLU,
+                 width_mult=1.,
+                 lr_mult=1.):
+        super().__init__()
+        self.out_indices = out_indices
+
+        self.stem = nn.Sequential(
+            Conv2DBN(
+                in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
+            activation())
+
+        self.layers = []
+        for i, (k, t, c, s) in enumerate(cfgs):
+            output_channel = make_divisible(c * width_mult, 8)
+            exp_size = t * inp_channel
+            exp_size = make_divisible(exp_size * width_mult, 8)
+            layer_name = 'layer{}'.format(i + 1)
+            layer = InvertedResidual(
+                inp_channel,
+                output_channel,
+                kernel_size=k,
+                stride=s,
+                expand_ratio=t,
+                activations=activation,
+                lr_mult=lr_mult)
+            self.add_sublayer(layer_name, layer)
+            self.layers.append(layer_name)
+            inp_channel = output_channel
+
+    def forward(self, x):
+        outs = []
+        x = self.stem(x)
+        for i, layer_name in enumerate(self.layers):
+            layer = getattr(self, layer_name)
+            x = layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return outs
+
+
+class Attention(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 attn_ratio=4,
+                 activation=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.num_heads = num_heads
+        self.scale = key_dim**-0.5
+        self.key_dim = key_dim
+        self.nh_kd = nh_kd = key_dim * num_heads
+        self.d = int(attn_ratio * key_dim)
+        self.dh = int(attn_ratio * key_dim) * num_heads
+        self.attn_ratio = attn_ratio
+
+        self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
+
+        self.proj = nn.Sequential(
+            activation(),
+            Conv2DBN(
+                self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        H, W = x_shape[2], x_shape[3]
+
+        qq = self.to_q(x).reshape(
+            [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
+        kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
+        vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
+            [0, 1, 3, 2])
+
+        attn = paddle.matmul(qq, kk)
+        attn = F.softmax(attn, axis=-1)
+
+        xx = paddle.matmul(attn, vv)
+
+        xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
+        xx = self.proj(xx)
+        return xx
+
+
+class Block(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.ReLU,
+                 lr_mult=1.0):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.mlp_ratios = mlp_ratios
+
+        self.attn = Attention(
+            dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            attn_ratio=attn_ratio,
+            activation=act_layer,
+            lr_mult=lr_mult)
+
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+        mlp_hidden_dim = int(dim * mlp_ratios)
+        self.mlp = MLP(in_features=dim,
+                       hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer,
+                       drop=drop,
+                       lr_mult=lr_mult)
+
+    def forward(self, x):
+        h = x
+        x = self.attn(x)
+        x = self.drop_path(x)
+        x = h + x
+
+        h = x
+        x = self.mlp(x)
+        x = self.drop_path(x)
+        x = x + h
+        return x
+
+
+class BasicLayer(nn.Layer):
+    def __init__(self,
+                 block_num,
+                 embedding_dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.block_num = block_num
+
+        self.transformer_blocks = nn.LayerList()
+        for i in range(self.block_num):
+            self.transformer_blocks.append(
+                Block(
+                    embedding_dim,
+                    key_dim=key_dim,
+                    num_heads=num_heads,
+                    mlp_ratios=mlp_ratios,
+                    attn_ratio=attn_ratio,
+                    drop=drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list) else drop_path,
+                    act_layer=act_layer,
+                    lr_mult=lr_mult))
+
+    def forward(self, x):
+        # token * N 
+        for i in range(self.block_num):
+            x = self.transformer_blocks[i](x)
+        return x
+
+
+class PyramidPoolAgg(nn.Layer):
+    def __init__(self, stride):
+        super().__init__()
+        self.stride = stride
+        self.tmp = Identity()  # avoid the error of paddle.flops
+
+    def forward(self, inputs):
+        '''
+        # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
+        # so exporting the inference model will raise error.
+        _, _, H, W = inputs[-1].shape
+        H = (H - 1) // self.stride + 1
+        W = (W - 1) // self.stride + 1
+        return paddle.concat(
+            [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
+        '''
+        out = []
+        ks = 2**len(inputs)
+        stride = self.stride**len(inputs)
+        for x in inputs:
+            x = F.avg_pool2d(x, int(ks), int(stride))
+            ks /= 2
+            stride /= 2
+            out.append(x)
+        out = paddle.concat(out, axis=1)
+        return out
+
+
+class InjectionMultiSum(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None,
+                 lr_mult=1.0):
+        super(InjectionMultiSum, self).__init__()
+
+        self.local_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.global_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.global_act = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_global):
+        xl_hw = paddle.shape(x_low)[2:]
+        local_feat = self.local_embedding(x_low)
+
+        global_act = self.global_act(x_global)
+        sig_act = F.interpolate(
+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
+
+        global_feat = self.global_embedding(x_global)
+        global_feat = F.interpolate(
+            global_feat, xl_hw, mode='bilinear', align_corners=False)
+
+        out = local_feat * sig_act + global_feat
+        return out
+
+
+class InjectionMultiSumCBR(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None):
+        '''
+        local_embedding: conv-bn-relu
+        global_embedding: conv-bn-relu
+        global_act: conv
+        '''
+        super(InjectionMultiSumCBR, self).__init__()
+
+        self.local_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1)
+        self.global_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1)
+        self.global_act = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, norm=None, act=None)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_global):
+        xl_hw = paddle.shape(x)[2:]
+        local_feat = self.local_embedding(x_low)
+        # kernel
+        global_act = self.global_act(x_global)
+        global_act = F.interpolate(
+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
+        # feat_h
+        global_feat = self.global_embedding(x_global)
+        global_feat = F.interpolate(
+            global_feat, xl_hw, mode='bilinear', align_corners=False)
+        out = local_feat * global_act + global_feat
+        return out
+
+
+class FuseBlockSum(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None):
+        super(FuseBlockSum, self).__init__()
+
+        self.fuse1 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.fuse2 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+
+    def forward(self, x_low, x_high):
+        xl_hw = paddle.shape(x)[2:]
+        inp = self.fuse1(x_low)
+        kernel = self.fuse2(x_high)
+        feat_h = F.interpolate(
+            kernel, xl_hw, mode='bilinear', align_corners=False)
+        out = inp + feat_h
+        return out
+
+
+class FuseBlockMulti(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            stride=1,
+            activations=None, ):
+        super(FuseBlockMulti, self).__init__()
+        assert stride in [1, 2], "The stride should be 1 or 2."
+
+        self.fuse1 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.fuse2 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_high):
+        xl_hw = paddle.shape(x)[2:]
+        inp = self.fuse1(x_low)
+        sig_act = self.fuse2(x_high)
+        sig_act = F.interpolate(
+            self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
+        out = inp * sig_act
+        return out
+
+
+SIM_BLOCK = {
+    "fuse_sum": FuseBlockSum,
+    "fuse_multi": FuseBlockMulti,
+    "multi_sum": InjectionMultiSum,
+    "multi_sum_cbr": InjectionMultiSumCBR,
+}
+
+
+class TopTransformer(nn.Layer):
+    def __init__(self,
+                 cfgs,
+                 injection_out_channels,
+                 encoder_out_indices,
+                 trans_out_indices=[1, 2, 3],
+                 depths=4,
+                 key_dim=16,
+                 num_heads=8,
+                 attn_ratios=2,
+                 mlp_ratios=2,
+                 c2t_stride=2,
+                 drop_path_rate=0.,
+                 act_layer=nn.ReLU6,
+                 injection_type="muli_sum",
+                 injection=True,
+                 lr_mult=1.0,
+                 in_channels=3,
+                 pretrained=None):
+        super().__init__()
+        self.feat_channels = [
+            c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
+        ]
+        self.injection_out_channels = injection_out_channels
+        self.injection = injection
+        self.embed_dim = sum(self.feat_channels)
+        self.trans_out_indices = trans_out_indices
+
+        self.tpm = TokenPyramidModule(
+            cfgs=cfgs,
+            out_indices=encoder_out_indices,
+            in_channels=in_channels,
+            lr_mult=lr_mult)
+        self.ppa = PyramidPoolAgg(stride=c2t_stride)
+
+        dpr = [x.item() for x in \
+               paddle.linspace(0, drop_path_rate, depths)]
+        self.trans = BasicLayer(
+            block_num=depths,
+            embedding_dim=self.embed_dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            mlp_ratios=mlp_ratios,
+            attn_ratio=attn_ratios,
+            drop=0,
+            attn_drop=0,
+            drop_path=dpr,
+            act_layer=act_layer,
+            lr_mult=lr_mult)
+
+        self.SIM = nn.LayerList()
+        inj_module = SIM_BLOCK[injection_type]
+        if self.injection:
+            for i in range(len(self.feat_channels)):
+                if i in trans_out_indices:
+                    self.SIM.append(
+                        inj_module(
+                            self.feat_channels[i],
+                            injection_out_channels[i],
+                            activations=act_layer,
+                            lr_mult=lr_mult))
+                else:
+                    self.SIM.append(Identity())
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        ouputs = self.tpm(x)
+        out = self.ppa(ouputs)
+        out = self.trans(out)
+
+        if self.injection:
+            xx = out.split(self.feat_channels, axis=1)
+            results = []
+            for i in range(len(self.feat_channels)):
+                if i in self.trans_out_indices:
+                    local_tokens = ouputs[i]
+                    global_semantics = xx[i]
+                    out_ = self.SIM[i](local_tokens, global_semantics)
+                    results.append(out_)
+            return results
+        else:
+            ouputs.append(out)
+            return ouputs
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Base(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2        
+        [3, 4, 32, 2],  # 1/4 1      
+        [3, 3, 32, 1],  #            
+        [5, 3, 64, 2],  # 1/8 3      
+        [5, 3, 64, 1],  #            
+        [3, 3, 128, 2],  # 1/16 5     
+        [3, 3, 128, 1],  #            
+        [5, 6, 160, 2],  # 1/32 7     
+        [5, 6, 160, 1],  #            
+        [3, 6, 160, 1],  #            
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 256, 256, 256],
+        encoder_out_indices=[2, 4, 6, 9],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=8,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Small(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2        
+        [3, 4, 24, 2],  # 1/4 1      
+        [3, 3, 24, 1],  #            
+        [5, 3, 48, 2],  # 1/8 3      
+        [5, 3, 48, 1],  #            
+        [3, 3, 96, 2],  # 1/16 5     
+        [3, 3, 96, 1],  #            
+        [5, 6, 128, 2],  # 1/32 7     
+        [5, 6, 128, 1],  #            
+        [3, 6, 128, 1],  #           
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 192, 192, 192],
+        encoder_out_indices=[2, 4, 6, 9],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=6,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Tiny(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2       
+        [3, 4, 16, 2],  # 1/4 1      
+        [3, 3, 16, 1],  #            
+        [5, 3, 32, 2],  # 1/8 3      
+        [5, 3, 32, 1],  #            
+        [3, 3, 64, 2],  # 1/16 5     
+        [3, 3, 64, 1],  #            
+        [5, 6, 96, 2],  # 1/32 7     
+        [5, 6, 96, 1],  #               
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 128, 128, 128],
+        encoder_out_indices=[2, 4, 6, 8],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=4,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
--- a/paddlers/models/ppseg/models/backbones/transformer_utils.py
+++ b/paddlers/models/ppseg/models/backbones/transformer_utils.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
        return x
    keep_prob = paddle.to_tensor(1 - drop_prob)
    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
    random_tensor = paddle.floor(random_tensor)  # binarize
    output = x.divide(keep_prob) * random_tensor
    return output
--- a/paddlers/models/ppseg/models/backbones/vision_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/vision_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
    def __init__(self,
                 img_size=224,
                 patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
        self.patch_embed = PatchEmbed(
            img_size=img_size,
            patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
            embed_dim=embed_dim)
        self.pos_w = self.patch_embed.num_patches_in_w
        self.pos_h = self.patch_embed.num_patches_in_h
--- a/paddlers/models/ppseg/models/backbones/xception_deeplab.py
+++ b/paddlers/models/ppseg/models/backbones/xception_deeplab.py
@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):

     Args:
         backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
+         in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path of pretrained model.
         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.

    """

-    def __init__(self, backbone, pretrained=None, output_stride=16):
+    def __init__(self,
+                 backbone,
+                 in_channels=3,
+                 pretrained=None,
+                 output_stride=16):

        super(XceptionDeeplab, self).__init__()

@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
        self.feat_channels = [128, 2048]

        self._conv1 = ConvBNLayer(
-            3,
+            in_channels,
            32,
            3,
            stride=2,
--- a/paddlers/models/ppseg/models/bisenet.py
+++ b/paddlers/models/ppseg/models/bisenet.py
@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
    Args:
        num_classes (int): The unique number of target classes.
        lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """

@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
                 num_classes,
                 lambd=0.25,
                 align_corners=False,
+                 in_channels=3,
                 pretrained=None):
        super().__init__()

@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
        sb_channels = (C1, C3, C4, C5)
        mid_channels = 128

-        self.db = DetailBranch(db_channels)
-        self.sb = SemanticBranch(sb_channels)
+        self.db = DetailBranch(in_channels, db_channels)
+        self.sb = SemanticBranch(in_channels, sb_channels)

        self.bga = BGA(mid_channels, align_corners)
        self.aux_head1 = SegHead(C1, C1, num_classes)
@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
 class DetailBranch(nn.Layer):
    """The detail branch of BiSeNet, which has wide channels but shallow layers."""

-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
        super().__init__()

-        C1, C2, C3 = in_channels
+        C1, C2, C3 = feature_channels

        self.convs = nn.Sequential(
            # stage 1
            layers.ConvBNReLU(
-                3, C1, 3, stride=2),
+                in_channels, C1, 3, stride=2),
            layers.ConvBNReLU(C1, C1, 3),
            # stage 2
            layers.ConvBNReLU(
@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
 class SemanticBranch(nn.Layer):
    """The semantic branch of BiSeNet, which has narrow channels but deep layers."""

-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
        super().__init__()
-        C1, C3, C4, C5 = in_channels
+        C1, C3, C4, C5 = feature_channels

-        self.stem = StemBlock(3, C1)
+        self.stem = StemBlock(in_channels, C1)

        self.stage3 = nn.Sequential(
            GatherAndExpansionLayer2(C1, C3, 6),
--- a/paddlers/models/ppseg/models/ccnet.py
+++ b/paddlers/models/ppseg/models/ccnet.py
@ -0,0 +1,174 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+
+
+@manager.MODELS.add_component
+class CCNet(nn.Layer):
+    """
+    The CCNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
+    (https://arxiv.org/abs/1811.11721)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
+        backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
+        dropout_prob (float, optional): The probability of dropout. Default: 0.0.
+        recurrence (int, optional): The number of recurrent operations. Defautl: 1.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=(2, 3),
+                 enable_auxiliary_loss=True,
+                 dropout_prob=0.0,
+                 recurrence=1,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.recurrence = recurrence
+        self.align_corners = align_corners
+
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        backbone_channels = [
+            backbone.feat_channels[i] for i in backbone_indices
+        ]
+
+        if enable_auxiliary_loss:
+            self.aux_head = layers.AuxLayer(
+                backbone_channels[0],
+                512,
+                num_classes,
+                dropout_prob=dropout_prob)
+        self.head = RCCAModule(
+            backbone_channels[1],
+            512,
+            num_classes,
+            dropout_prob=dropout_prob,
+            recurrence=recurrence)
+        self.pretrained = pretrained
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        feat_list = self.backbone(x)
+        logit_list = []
+        output = self.head(feat_list[self.backbone_indices[-1]])
+        logit_list.append(output)
+        if self.training and self.enable_auxiliary_loss:
+            aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
+            logit_list.append(aux_out)
+        return [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+
+
+class RCCAModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_classes,
+                 dropout_prob=0.1,
+                 recurrence=1):
+        super().__init__()
+        inter_channels = in_channels // 4
+        self.recurrence = recurrence
+        self.conva = layers.ConvBNLeakyReLU(
+            in_channels, inter_channels, 3, padding=1, bias_attr=False)
+        self.cca = CrissCrossAttention(inter_channels)
+        self.convb = layers.ConvBNLeakyReLU(
+            inter_channels, inter_channels, 3, padding=1, bias_attr=False)
+        self.out = layers.AuxLayer(
+            in_channels + inter_channels,
+            out_channels,
+            num_classes,
+            dropout_prob=dropout_prob)
+
+    def forward(self, x):
+        feat = self.conva(x)
+        for i in range(self.recurrence):
+            feat = self.cca(feat)
+        feat = self.convb(feat)
+        output = self.out(paddle.concat([x, feat], axis=1))
+        return output
+
+
+class CrissCrossAttention(nn.Layer):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
+        self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
+        self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
+        self.softmax = nn.Softmax(axis=3)
+        self.gamma = self.create_parameter(
+            shape=(1, ), default_initializer=nn.initializer.Constant(0))
+        self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
+
+    def forward(self, x):
+        b, c, h, w = paddle.shape(x)
+        proj_q = self.q_conv(x)
+        proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
+            [b * w, -1, h]).transpose([0, 2, 1])
+        proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
+            [b * h, -1, w]).transpose([0, 2, 1])
+
+        proj_k = self.k_conv(x)
+        proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
+        proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
+
+        proj_v = self.v_conv(x)
+        proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
+        proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
+
+        energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
+            [b, w, h, h]).transpose([0, 2, 1, 3])
+        energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
+        concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
+
+        attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
+            [b * w, h, h])
+        attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
+        out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
+            [b, w, -1, h]).transpose([0, 2, 3, 1])
+        out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
+            [b, h, -1, w]).transpose([0, 2, 1, 3])
+        return self.gamma * (out_h + out_w) + x
+
+    def Inf(self, B, H, W):
+        return -paddle.tile(
+            paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
+            [B * W, 1, 1])
--- a/paddlers/models/ppseg/models/ddrnet.py
+++ b/paddlers/models/ppseg/models/ddrnet.py
@ -0,0 +1,403 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager, param_init
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+
+
+class DualResNet(nn.Layer):
+    """
+    The DDRNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
+    (https://arxiv.org/abs/2101.06085)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): Number of input channels. Default: 3.
+        block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
+        planes (int): Base channels in network. Default: 64.
+        spp_planes (int): Branch channels for DAPPM. Default: 128.
+        head_planes (int): Mid channels of segmentation head. Default: 128.
+        enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels=3,
+                 block_layers=[2, 2, 2, 2],
+                 planes=64,
+                 spp_planes=128,
+                 head_planes=128,
+                 enable_auxiliary_loss=False,
+                 pretrained=None):
+        super().__init__()
+        highres_planes = planes * 2
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.conv1 = nn.Sequential(
+            layers.ConvBNReLU(
+                in_channels, planes, kernel_size=3, stride=2, padding=1),
+            layers.ConvBNReLU(
+                planes, planes, kernel_size=3, stride=2, padding=1), )
+        self.relu = nn.ReLU()
+        self.layer1 = self._make_layers(BasicBlock, planes, planes,
+                                        block_layers[0])
+        self.layer2 = self._make_layers(
+            BasicBlock, planes, planes * 2, block_layers[1], stride=2)
+        self.layer3 = self._make_layers(
+            BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
+        self.layer4 = self._make_layers(
+            BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
+
+        self.compression3 = layers.ConvBN(
+            planes * 4, highres_planes, kernel_size=1, bias_attr=False)
+
+        self.compression4 = layers.ConvBN(
+            planes * 8, highres_planes, kernel_size=1, bias_attr=False)
+
+        self.down3 = layers.ConvBN(
+            highres_planes,
+            planes * 4,
+            kernel_size=3,
+            stride=2,
+            bias_attr=False)
+
+        self.down4 = nn.Sequential(
+            layers.ConvBNReLU(
+                highres_planes,
+                planes * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias_attr=False),
+            layers.ConvBN(
+                planes * 4,
+                planes * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias_attr=False))
+
+        self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
+                                         2)
+        self.layer4_ = self._make_layers(BasicBlock, highres_planes,
+                                         highres_planes, 2)
+        self.layer5_ = self._make_layers(Bottleneck, highres_planes,
+                                         highres_planes, 1)
+        self.layer5 = self._make_layers(
+            Bottleneck, planes * 8, planes * 8, 1, stride=2)
+
+        self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
+        if self.enable_auxiliary_loss:
+            self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
+        self.head = DDRNetHead(planes * 4, head_planes, num_classes)
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+        else:
+            for m in self.sublayers():
+                if isinstance(m, nn.Conv2D):
+                    param_init.kaiming_normal_init(m.weight)
+                elif isinstance(m, nn.BatchNorm2D):
+                    param_init.constant_init(m.weight, value=1)
+                    param_init.constant_init(m.bias, value=0)
+
+    def _make_layers(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2D(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias_attr=False),
+                nn.BatchNorm2D(planes * block.expansion), )
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            if i == (blocks - 1):
+                layers.append(block(inplanes, planes, stride=1, no_relu=True))
+            else:
+                layers.append(block(inplanes, planes, stride=1, no_relu=False))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        n, c, h, w = paddle.shape(x)
+        width_output = w // 8
+        height_output = h // 8
+
+        x = self.conv1(x)
+        stage1_out = self.layer1(x)
+        stage2_out = self.layer2(self.relu(stage1_out))
+        stage3_out = self.layer3(self.relu(stage2_out))
+        stage3_out_dual = self.layer3_(self.relu(stage2_out))
+        x = stage3_out + self.down3(self.relu(stage3_out_dual))
+        stage3_merge = stage3_out_dual + F.interpolate(
+            self.compression3(self.relu(stage3_out)),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        stage4_out = self.layer4(self.relu(x))
+        stage4_out_dual = self.layer4_(self.relu(stage3_merge))
+
+        x = stage4_out + self.down4(self.relu(stage4_out_dual))
+        stage4_merge = stage4_out_dual + F.interpolate(
+            self.compression4(self.relu(stage4_out)),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        stage5_out_dual = self.layer5_(self.relu(stage4_merge))
+        x = F.interpolate(
+            self.spp(self.layer5(self.relu(x))),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        output = self.head(x + stage5_out_dual)
+        logit_list = []
+        logit_list.append(output)
+
+        if self.enable_auxiliary_loss:
+            aux_out = self.aux_head(stage3_merge)
+            logit_list.append(aux_out)
+        return [
+            F.interpolate(
+                logit, [h, w], mode='bilinear') for logit in logit_list
+        ]
+
+
+class BasicBlock(nn.Layer):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 no_relu=False):
+        super().__init__()
+        self.conv_bn_relu = layers.ConvBNReLU(
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.relu = nn.ReLU()
+        self.conv_bn = layers.ConvBN(
+            planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+        out = self.conv_bn_relu(x)
+        out = self.conv_bn(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class Bottleneck(nn.Layer):
+    expansion = 2
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 no_relu=True):
+        super().__init__()
+        self.conv_bn_relu1 = layers.ConvBNReLU(
+            inplanes, planes, kernel_size=1, bias_attr=False)
+        self.conv_bn_relu2 = layers.ConvBNReLU(
+            planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.conv_bn = layers.ConvBN(
+            planes, planes * self.expansion, kernel_size=1, bias_attr=False)
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+        out = self.conv_bn_relu1(x)
+        out = self.conv_bn_relu2(out)
+        out = self.conv_bn(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class DAPPM(nn.Layer):
+    def __init__(self, inplanes, branch_planes, outplanes):
+        super().__init__()
+        self.scale1 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=5, stride=2, padding=2),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale2 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=9, stride=4, padding=4),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale3 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=17, stride=8, padding=8),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale4 = nn.Sequential(
+            nn.AdaptiveAvgPool2D((1, 1)),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale0 = nn.Sequential(
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.process1 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process2 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process3 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process4 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.compression = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes * 5),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
+        self.shortcut = nn.Sequential(
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, outplanes, kernel_size=1, bias_attr=False))
+
+    def forward(self, x):
+        n, c, h, w = paddle.shape(x)
+        x0 = self.scale0(x)
+        x1 = self.process1(
+            F.interpolate(
+                self.scale1(x), size=[h, w], mode='bilinear') + x0)
+        x2 = self.process2(
+            F.interpolate(
+                self.scale2(x), size=[h, w], mode='bilinear') + x1)
+        x3 = self.process3(
+            F.interpolate(
+                self.scale3(x), size=[h, w], mode='bilinear') + x2)
+        x4 = self.process4(
+            F.interpolate(
+                self.scale4(x), size=[h, w], mode='bilinear') + x3)
+
+        out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
+                                             1)) + self.shortcut(x)
+        return out
+
+
+class DDRNetHead(nn.Layer):
+    def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
+        super().__init__()
+        self.bn1 = nn.BatchNorm2D(inplanes)
+        self.relu = nn.ReLU()
+        self.conv_bn_relu = layers.ConvBNReLU(
+            inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
+        self.conv = nn.Conv2D(
+            interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
+
+        self.scale_factor = scale_factor
+
+    def forward(self, x):
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv_bn_relu(x)
+        out = self.conv(x)
+
+        if self.scale_factor is not None:
+            out = F.interpolate(
+                out, scale_factor=self.scale_factor, mode='bilinear')
+        return out
+
+
+@manager.MODELS.add_component
+def DDRNet_23(**kwargs):
+    return DualResNet(
+        block_layers=[2, 2, 2, 2],
+        planes=64,
+        spp_planes=128,
+        head_planes=128,
+        **kwargs)
--- a/paddlers/models/ppseg/models/emanet.py
+++ b/paddlers/models/ppseg/models/emanet.py
@ -209,7 +209,9 @@ class EMAU(nn.Layer):
            mu = F.normalize(mu, axis=1, p=2)
            mu = self.mu * (1 - self.momentum) + mu * self.momentum
            if paddle.distributed.get_world_size() > 1:
-                mu = paddle.distributed.all_reduce(mu)
+                out = paddle.distributed.all_reduce(mu)
+                if out is not None:
+                    mu = out
                mu /= paddle.distributed.get_world_size()
            self.mu = mu

--- a/paddlers/models/ppseg/models/enet.py
+++ b/paddlers/models/ppseg/models/enet.py
@ -34,6 +34,7 @@ class ENet(nn.Layer):

    Args:
        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
        encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
            function; otherwise, PReLU is used. Default: False.
@ -43,13 +44,14 @@ class ENet(nn.Layer):

    def __init__(self,
                 num_classes,
+                 in_channels=3,
                 pretrained=None,
                 encoder_relu=False,
                 decoder_relu=True):
        super(ENet, self).__init__()

        self.numclasses = num_classes
-        self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
+        self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)

        self.downsample1_0 = DownsamplingBottleneck(
            16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)
--- a/paddlers/models/ppseg/models/fast_scnn.py
+++ b/paddlers/models/ppseg/models/fast_scnn.py
@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
    (https://arxiv.org/pdf/1902.04502.pdf).
    Args:
        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
            If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
        align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):

    def __init__(self,
                 num_classes,
+                 in_channels=3,
                 enable_auxiliary_loss=True,
                 align_corners=False,
                 pretrained=None):

        super().__init__()

-        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
+        self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
+                                                           64)
        self.global_feature_extractor = GlobalFeatureExtractor(
            in_channels=64,
            block_channels=[64, 96, 128],
@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
        out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
    """

-    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
+    def __init__(self,
+                 in_channels=3,
+                 dw_channels1=32,
+                 dw_channels2=48,
+                 out_channels=64):
        super(LearningToDownsample, self).__init__()

        self.conv_bn_relu = layers.ConvBNReLU(
-            in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
+            in_channels=in_channels,
+            out_channels=dw_channels1,
+            kernel_size=3,
+            stride=2)
        self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
            in_channels=dw_channels1,
            out_channels=dw_channels2,
--- a/paddlers/models/ppseg/models/ginet.py
+++ b/paddlers/models/ppseg/models/ginet.py
@ -92,7 +92,7 @@ class GINet(nn.Layer):

        return [
            F.interpolate(
-                logit, (h, w),
+                logit, [h, w],
                mode='bilinear',
                align_corners=self.align_corners) for logit in logit_list
        ]
--- a/paddlers/models/ppseg/models/glore.py
+++ b/paddlers/models/ppseg/models/glore.py
@ -0,0 +1,198 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+
+
+@manager.MODELS.add_component
+class GloRe(nn.Layer):
+    """
+    The GloRe implementation based on PaddlePaddle.
+
+    The original article refers to:
+       Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
+       (https://arxiv.org/pdf/1811.12814.pdf)
+    
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
+        backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
+        gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
+        gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
+        gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=(2, 3),
+                 gru_channels=512,
+                 gru_num_state=128,
+                 gru_num_node=64,
+                 enable_auxiliary_loss=True,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+
+        self.backbone = backbone
+        backbone_channels = [
+            backbone.feat_channels[i] for i in backbone_indices
+        ]
+
+        self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
+                              gru_channels, gru_num_state, gru_num_node,
+                              enable_auxiliary_loss)
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        feat_list = self.backbone(x)
+        logit_list = self.head(feat_list)
+        return [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class GloReHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 backbone_indices,
+                 backbone_channels,
+                 gru_channels=512,
+                 gru_num_state=128,
+                 gru_num_node=64,
+                 enable_auxiliary_loss=True):
+        super().__init__()
+
+        in_channels = backbone_channels[1]
+        self.conv_bn_relu = layers.ConvBNReLU(
+            in_channels, gru_channels, 1, bias_attr=False)
+        self.gru_module = GruModule(
+            num_input=gru_channels,
+            num_state=gru_num_state,
+            num_node=gru_num_node)
+
+        self.dropout = nn.Dropout(0.1)
+        self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
+        self.auxlayer = layers.AuxLayer(
+            in_channels=backbone_channels[0],
+            inter_channels=backbone_channels[0] // 4,
+            out_channels=num_classes)
+
+        self.backbone_indices = backbone_indices
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+    def forward(self, feat_list):
+
+        logit_list = []
+        x = feat_list[self.backbone_indices[1]]
+
+        feature = self.conv_bn_relu(x)
+        gru_output = self.gru_module(feature)
+        output = self.dropout(gru_output)
+        logit = self.classifier(output)
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            low_level_feat = feat_list[self.backbone_indices[0]]
+            auxiliary_logit = self.auxlayer(low_level_feat)
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+
+class GCN(nn.Layer):
+    def __init__(self, num_state, num_node, bias=False):
+        super(GCN, self).__init__()
+        self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv1D(
+            num_state, num_state, kernel_size=1, bias_attr=bias)
+
+    def forward(self, x):
+        h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
+        h = paddle.transpose(h, perm=(0, 2, 1))
+        h = h + x
+        h = self.relu(self.conv2(h))
+        return h
+
+
+class GruModule(nn.Layer):
+    def __init__(self,
+                 num_input=512,
+                 num_state=128,
+                 num_node=64,
+                 normalize=False):
+        super(GruModule, self).__init__()
+        self.normalize = normalize
+        self.num_state = num_state
+        self.num_node = num_node
+        self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
+        self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
+        self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
+        self.extend_dim = nn.Conv2D(
+            self.num_state, num_input, kernel_size=1, bias_attr=False)
+        self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
+
+    def forward(self, input):
+        n, c, h, w = input.shape
+        # B, C, H, W
+        reduction_dim = self.reduction_dim(input)
+        # B, N, H, W
+        mat_B = self.projection_mat(input)
+        # B, C, H*W
+        reshaped_reduction = paddle.reshape(
+            reduction_dim, shape=[n, self.num_state, h * w])
+        # B, N, H*W
+        reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
+        # B, N, H*W
+        reproject = reshaped_B
+        # B, C, N
+        node_state_V = paddle.matmul(
+            reshaped_reduction, paddle.transpose(
+                reshaped_B, perm=[0, 2, 1]))
+
+        if self.normalize:
+            node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
+
+        # B, C, N
+        gcn_out = self.gcn(node_state_V)
+        # B, C, H*W
+        Y = paddle.matmul(gcn_out, reproject)
+        # B, C, H, W
+        Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
+        Y_extend = self.extend_dim(Y)
+        Y_extend = self.extend_bn(Y_extend)
+
+        out = input + Y_extend
+        return out
--- a/paddlers/models/ppseg/models/hardnet.py
+++ b/paddlers/models/ppseg/models/hardnet.py
@ -31,6 +31,7 @@ class HarDNet(nn.Layer):

    Args:
        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
        stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
        ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
        grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
@ -43,6 +44,7 @@ class HarDNet(nn.Layer):

    def __init__(self,
                 num_classes,
+                 in_channels=3,
                 stem_channels=(16, 24, 32, 48),
                 ch_list=(64, 96, 160, 224, 320),
                 grmul=1.7,
@ -60,7 +62,7 @@ class HarDNet(nn.Layer):

        self.stem = nn.Sequential(
            layers.ConvBNReLU(
-                3, stem_channels[0], kernel_size=3, bias_attr=False),
+                in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
            layers.ConvBNReLU(
                stem_channels[0],
                stem_channels[1],
--- a/paddlers/models/ppseg/models/layers/init.py
+++ b/paddlers/models/ppseg/models/layers/init.py
@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
+from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
 from .activation import Activation
 from .pyramid_pool import ASPPModule, PPModule
 from .attention import AttentionBlock
 from .nonlocal2d import NonLocal2D
 from .wrap_functions import *
+from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten
--- a/paddlers/models/ppseg/models/layers/attention.py
+++ b/paddlers/models/ppseg/models/layers/attention.py
@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
        if self.out_project is not None:
            context = self.out_project(context)
        return context
+
+
+class DualAttentionModule(nn.Layer):
+    """
+    Dual attention module.
+
+    Args:
+        in_channels (int): The number of input channels.
+        out_channels (int): The number of output channels.
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        inter_channels = in_channels // 4
+
+        self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
+        self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
+        self.pam = PAM(inter_channels)
+        self.cam = CAM(inter_channels)
+        self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
+        self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
+        self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
+
+    def forward(self, feats):
+        channel_feats = self.channel_conv(feats)
+        channel_feats = self.cam(channel_feats)
+        channel_feats = self.conv1(channel_feats)
+
+        position_feats = self.position_conv(feats)
+        position_feats = self.pam(position_feats)
+        position_feats = self.conv2(position_feats)
+
+        feats_sum = position_feats + channel_feats
+        out = self.conv3(feats_sum)
+        return out
+
+
+class PAM(nn.Layer):
+    """
+    Position attention module.
+    Args:
+        in_channels (int): The number of input channels.
+    """
+
+    def __init__(self, in_channels):
+        super().__init__()
+        mid_channels = in_channels // 8
+        self.mid_channels = mid_channels
+        self.in_channels = in_channels
+
+        self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
+        self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
+        self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
+
+        self.gamma = self.create_parameter(
+            shape=[1],
+            dtype='float32',
+            default_initializer=nn.initializer.Constant(0))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+
+        # query: n, h * w, c1
+        query = self.query_conv(x)
+        query = paddle.reshape(query, (0, self.mid_channels, -1))
+        query = paddle.transpose(query, (0, 2, 1))
+
+        # key: n, c1, h * w
+        key = self.key_conv(x)
+        key = paddle.reshape(key, (0, self.mid_channels, -1))
+
+        # sim: n, h * w, h * w
+        sim = paddle.bmm(query, key)
+        sim = F.softmax(sim, axis=-1)
+
+        value = self.value_conv(x)
+        value = paddle.reshape(value, (0, self.in_channels, -1))
+        sim = paddle.transpose(sim, (0, 2, 1))
+
+        # feat: from (n, c2, h * w) -> (n, c2, h, w)
+        feat = paddle.bmm(value, sim)
+        feat = paddle.reshape(feat,
+                              (0, self.in_channels, x_shape[2], x_shape[3]))
+
+        out = self.gamma * feat + x
+        return out
+
+
+class CAM(nn.Layer):
+    """
+    Channel attention module.
+    Args:
+        in_channels (int): The number of input channels.
+    """
+
+    def __init__(self, channels):
+        super().__init__()
+
+        self.channels = channels
+        self.gamma = self.create_parameter(
+            shape=[1],
+            dtype='float32',
+            default_initializer=nn.initializer.Constant(0))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        # query: n, c, h * w
+        query = paddle.reshape(x, (0, self.channels, -1))
+        # key: n, h * w, c
+        key = paddle.reshape(x, (0, self.channels, -1))
+        key = paddle.transpose(key, (0, 2, 1))
+
+        # sim: n, c, c
+        sim = paddle.bmm(query, key)
+        # The danet author claims that this can avoid gradient divergence
+        sim = paddle.max(sim, axis=-1, keepdim=True).tile(
+            [1, 1, self.channels]) - sim
+        sim = F.softmax(sim, axis=-1)
+
+        # feat: from (n, c, h * w) to (n, c, h, w)
+        value = paddle.reshape(x, (0, self.channels, -1))
+        feat = paddle.bmm(sim, value)
+        feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
+
+        out = self.gamma * feat + x
+        return out
--- a/paddlers/models/ppseg/models/layers/layer_libs.py
+++ b/paddlers/models/ppseg/models/layers/layer_libs.py
@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
        return x


+class ConvBNAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding='same',
+                 act_type=None,
+                 **kwargs):
+        super().__init__()
+
+        self._conv = nn.Conv2D(
+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
+
+        if 'data_format' in kwargs:
+            data_format = kwargs['data_format']
+        else:
+            data_format = 'NCHW'
+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
+
+        self._act_type = act_type
+        if act_type is not None:
+            self._act = layers.Activation(act_type)
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        if self._act_type is not None:
+            x = self._act(x)
+        return x
+
+
 class ConvBN(nn.Layer):
    def __init__(self,
                 in_channels,
@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
        x = self._batch_norm(x)
        x = self._prelu(x)
        return x
+
+
+class ConvBNLeakyReLU(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding='same',
+                 **kwargs):
+        super().__init__()
+
+        self._conv = nn.Conv2D(
+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
+
+        if 'data_format' in kwargs:
+            data_format = kwargs['data_format']
+        else:
+            data_format = 'NCHW'
+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
+        self._relu = layers.Activation("leakyrelu")
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        x = self._relu(x)
+        return x
--- a/paddlers/models/ppseg/models/layers/tensor_fusion.py
+++ b/paddlers/models/ppseg/models/layers/tensor_fusion.py
@ -0,0 +1,285 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper
+
+
+class UAFM(nn.Layer):
+    """
+    The base of Unified Attention Fusion Module.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__()
+
+        self.conv_x = layers.ConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.ConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+        self.resize_mode = resize_mode
+
+    def check(self, x, y):
+        assert x.ndim == 4 and y.ndim == 4
+        x_h, x_w = x.shape[2:]
+        y_h, y_w = y.shape[2:]
+        assert x_h >= y_h and x_w >= y_w
+
+    def prepare(self, x, y):
+        x = self.prepare_x(x, y)
+        y = self.prepare_y(x, y)
+        return x, y
+
+    def prepare_x(self, x, y):
+        x = self.conv_x(x)
+        return x
+
+    def prepare_y(self, x, y):
+        y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
+        return y_up
+
+    def fuse(self, x, y):
+        out = x + y
+        out = self.conv_out(out)
+        return out
+
+    def forward(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        self.check(x, y)
+        x, y = self.prepare(x, y)
+        out = self.fuse(x, y)
+        return out
+
+
+class UAFM_ChAtten(UAFM):
+    """
+    The UAFM with channel attention, which uses mean and max values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNAct(
+                4 * y_ch,
+                y_ch // 2,
+                kernel_size=1,
+                bias_attr=False,
+                act_type="leakyrelu"),
+            layers.ConvBN(
+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_hw([x, y], self.training)
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_ChAtten_S(UAFM):
+    """
+    The UAFM with channel attention, which uses mean values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNAct(
+                2 * y_ch,
+                y_ch // 2,
+                kernel_size=1,
+                bias_attr=False,
+                act_type="leakyrelu"),
+            layers.ConvBN(
+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_reduce_hw([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_SpAtten(UAFM):
+    """
+    The UAFM with spatial attention, which uses mean and max values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                4, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+        self._scale = self.create_parameter(
+            shape=[1],
+            attr=ParamAttr(initializer=Constant(value=1.)),
+            dtype="float32")
+        self._scale.stop_gradient = True
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (self._scale - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_SpAtten_S(UAFM):
+    """
+    The UAFM with spatial attention, which uses mean values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                2, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFMMobile(UAFM):
+    """
+    Unified Attention Fusion Module for mobile.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_x = layers.SeparableConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.SeparableConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+
+
+class UAFMMobile_SpAtten(UAFM):
+    """
+    Unified Attention Fusion Module with spatial attention for mobile.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_x = layers.SeparableConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.SeparableConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                4, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
--- a/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
+++ b/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
@ -0,0 +1,133 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+def avg_reduce_hw(x):
+    # Reduce hw by avg
+    # Return cat([avg_pool_0, avg_pool_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return F.adaptive_avg_pool2d(x, 1)
+    elif len(x) == 1:
+        return F.adaptive_avg_pool2d(x[0], 1)
+    else:
+        res = []
+        for xi in x:
+            res.append(F.adaptive_avg_pool2d(xi, 1))
+        return paddle.concat(res, axis=1)
+
+
+def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
+    assert not isinstance(x, (list, tuple))
+    avg_pool = F.adaptive_avg_pool2d(x, 1)
+    # TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
+    if is_training:
+        max_pool = F.adaptive_max_pool2d(x, 1)
+    else:
+        max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([avg_pool, max_pool], axis=1)
+    else:
+        res = [avg_pool, max_pool]
+    return res
+
+
+def avg_max_reduce_hw(x, is_training):
+    # Reduce hw by avg and max
+    # Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_hw_helper(x, is_training)
+    elif len(x) == 1:
+        return avg_max_reduce_hw_helper(x[0], is_training)
+    else:
+        res_avg = []
+        res_max = []
+        for xi in x:
+            avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
+            res_avg.append(avg)
+            res_max.append(max)
+        res = res_avg + res_max
+        return paddle.concat(res, axis=1)
+
+
+def avg_reduce_channel(x):
+    # Reduce channel by avg
+    # Return cat([avg_ch_0, avg_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return paddle.mean(x, axis=1, keepdim=True)
+    elif len(x) == 1:
+        return paddle.mean(x[0], axis=1, keepdim=True)
+    else:
+        res = []
+        for xi in x:
+            res.append(paddle.mean(xi, axis=1, keepdim=True))
+        return paddle.concat(res, axis=1)
+
+
+def max_reduce_channel(x):
+    # Reduce channel by max
+    # Return cat([max_ch_0, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return paddle.max(x, axis=1, keepdim=True)
+    elif len(x) == 1:
+        return paddle.max(x[0], axis=1, keepdim=True)
+    else:
+        res = []
+        for xi in x:
+            res.append(paddle.max(xi, axis=1, keepdim=True))
+        return paddle.concat(res, axis=1)
+
+
+def avg_max_reduce_channel_helper(x, use_concat=True):
+    # Reduce hw by avg and max, only support single input
+    assert not isinstance(x, (list, tuple))
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([mean_value, max_value], axis=1)
+    else:
+        res = [mean_value, max_value]
+    return res
+
+
+def avg_max_reduce_channel(x):
+    # Reduce hw by avg and max
+    # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_channel_helper(x)
+    elif len(x) == 1:
+        return avg_max_reduce_channel_helper(x[0])
+    else:
+        res = []
+        for xi in x:
+            res.extend(avg_max_reduce_channel_helper(xi, False))
+        return paddle.concat(res, axis=1)
+
+
+def cat_avg_max_reduce_channel(x):
+    # Reduce hw by cat+avg+max
+    assert isinstance(x, (list, tuple)) and len(x) > 1
+
+    x = paddle.concat(x, axis=1)
+
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+    res = paddle.concat([mean_value, max_value], axis=1)
+
+    return res
--- a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
                    raise ValueError(
                        "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
                        .format(self.weight))
-            elif isinstance(self.weight, paddle.VarBase):
+            elif not isinstance(self.weight, paddle.Tensor):
                raise TypeError(
                    'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
                    .format(type(self.weight)))
--- a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py
@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
            logit = paddle.transpose(logit, [0, 2, 3, 1])
        label = label.astype('int64')

-        # In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
-        # When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
        loss = F.cross_entropy(
            logit,
            label,
@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
            loss = loss * semantic_weights

        if self.weight is not None:
-            _one_hot = F.one_hot(label, logit.shape[-1])
+            _one_hot = F.one_hot(label * mask, logit.shape[-1])
            coef = paddle.sum(_one_hot * self.weight, axis=-1)
        else:
            coef = paddle.ones_like(label)
--- a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
+++ b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
@ -16,7 +16,7 @@ import numpy as np
 import paddle
 from paddle import nn
 import paddle.nn.functional as F
-from scipy.ndimage.interpolation import shift
+from scipy.ndimage import shift

 from paddlers.models.ppseg.cvlibs import manager

--- a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
+++ b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/losses/dice_loss.py
+++ b/paddlers/models/ppseg/models/losses/dice_loss.py
@ -19,38 +19,59 @@ from paddlers.models.ppseg.cvlibs import manager
@manager.LOSSES.add_component
 class DiceLoss(nn.Layer):
    """
-    Implements the dice loss function.
+    The implements of the dice loss.

    Args:
-        ignore_index (int64): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default ``255``.
-        smooth (float32): laplace smoothing,
-            to smooth dice loss and accelerate convergence. following:
-            https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
+        weight (list[float], optional): The weight for each class. Default: None.
+        ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
+            is ignored and does not contribute to the input gradient. Default ``255``.
+        smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
+            Default: 1.0
    """

-    def __init__(self, ignore_index=255, smooth=0.):
-        super(DiceLoss, self).__init__()
+    def __init__(self, weight=None, ignore_index=255, smooth=1.0):
+        super().__init__()
+        self.weight = weight
        self.ignore_index = ignore_index
-        self.eps = 1e-5
        self.smooth = smooth
+        self.eps = 1e-8

    def forward(self, logits, labels):
-        labels = paddle.cast(labels, dtype='int32')
-        labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
-        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
-        labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
+        num_class = logits.shape[1]
+        if self.weight is not None:
+            assert num_class == len(self.weight), \
+                "The lenght of weight should be euqal to the num class"
+
+        mask = labels != self.ignore_index
+        mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')

+        labels[labels == self.ignore_index] = 0
+        labels_one_hot = F.one_hot(labels, num_class)
+        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
        logits = F.softmax(logits, axis=1)

-        mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
-        logits = logits * mask
-        labels_one_hot = labels_one_hot * mask
+        dice_loss = 0.0
+        for i in range(num_class):
+            dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
+                                           mask, self.smooth, self.eps)
+            if self.weight is not None:
+                dice_loss_i *= self.weight[i]
+            dice_loss += dice_loss_i
+        dice_loss = dice_loss / num_class
+
+        return dice_loss

-        dims = (0, ) + tuple(range(2, labels.ndimension() + 1))

-        intersection = paddle.sum(logits * labels_one_hot, dims)
-        cardinality = paddle.sum(logits + labels_one_hot, dims)
-        dice_loss = ((2. * intersection + self.smooth) /
-                     (cardinality + self.eps + self.smooth)).mean()
-        return 1 - dice_loss
+def dice_loss_helper(logit, label, mask, smooth, eps):
+    assert logit.shape == label.shape, \
+        "The shape of logit and label should be the same"
+    logit = paddle.reshape(logit, [0, -1])
+    label = paddle.reshape(label, [0, -1])
+    mask = paddle.reshape(mask, [0, -1])
+    logit *= mask
+    label *= mask
+    intersection = paddle.sum(logit * label, axis=1)
+    cardinality = paddle.sum(logit + label, axis=1)
+    dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
+    dice_loss = dice_loss.mean()
+    return dice_loss
--- a/paddlers/models/ppseg/models/losses/focal_loss.py
+++ b/paddlers/models/ppseg/models/losses/focal_loss.py
@ -23,38 +23,110 @@ from paddlers.models.ppseg.cvlibs import manager
@manager.LOSSES.add_component
 class FocalLoss(nn.Layer):
    """
-    Focal Loss.
+    The implement of focal loss.

-    Code referenced from:
-    https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
+    The focal loss requires the label is 0 or 1 for now.

    Args:
-        gamma (float): the coefficient of Focal Loss.
-        ignore_index (int64): Specifies a target value that is ignored
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
+        ignore_index (int64, optional): Specifies a target value that is ignored
            and does not contribute to the input gradient. Default ``255``.
    """

-    def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
-        super(FocalLoss, self).__init__()
+    def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
+        super().__init__()
+        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
-        self.edge_label = edge_label
+        self.EPS = 1e-10

    def forward(self, logit, label):
-        logit = paddle.reshape(
-            logit, [logit.shape[0], logit.shape[1], -1])  # N,C,H,W => N,C,H*W
-        logit = paddle.transpose(logit, [0, 2, 1])  # N,C,H*W => N,H*W,C
-        logit = paddle.reshape(logit,
-                               [-1, logit.shape[2]])  # N,H*W,C => N*H*W,C
-        label = paddle.reshape(label, [-1, 1])
-        range_ = paddle.arange(0, label.shape[0])
-        range_ = paddle.unsqueeze(range_, axis=-1)
-        label = paddle.cast(label, dtype='int64')
-        label = paddle.concat([range_, label], axis=-1)
-        logpt = F.log_softmax(logit)
-        logpt = paddle.gather_nd(logpt, label)
-
-        pt = paddle.exp(logpt.detach())
-        loss = -1 * (1 - pt)**self.gamma * logpt
-        loss = paddle.mean(loss)
-        return loss
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
+                (N, C, H, W), where C is number of classes.
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
+                where each value is 0 <= label[i] <= C-1.
+        Returns:
+            (Tensor): The average loss.
+        """
+        assert logit.ndim == 4, "The ndim of logit should be 4."
+        assert logit.shape[1] == 2, "The channel of logit should be 2."
+        assert label.ndim == 3, "The ndim of label should be 3."
+
+        class_num = logit.shape[1]  # class num is 2
+        logit = paddle.transpose(logit, [0, 2, 3, 1])  # N,C,H,W => N,H,W,C
+
+        mask = label != self.ignore_index  # N,H,W
+        mask = paddle.unsqueeze(mask, 3)
+        mask = paddle.cast(mask, 'float32')
+        mask.stop_gradient = True
+
+        label = F.one_hot(label, class_num)  # N,H,W,C
+        label = paddle.cast(label, logit.dtype)
+        label.stop_gradient = True
+
+        loss = F.sigmoid_focal_loss(
+            logit=logit,
+            label=label,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction='none')
+        loss = loss * mask
+        avg_loss = paddle.sum(loss) / (
+            paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
+        return avg_loss
+
+
+@manager.LOSSES.add_component
+class MultiClassFocalLoss(nn.Layer):
+    """
+    The implement of focal loss for multi class.
+
+    Args:
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
+        ignore_index (int64, optional): Specifies a target value that is ignored
+            and does not contribute to the input gradient. Default ``255``.
+    """
+
+    def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
+        super().__init__()
+        self.num_class = num_class
+        self.alpha = alpha
+        self.gamma = gamma
+        self.ignore_index = ignore_index
+        self.EPS = 1e-10
+
+    def forward(self, logit, label):
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
+                (N, C, H, W), where C is number of classes.
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
+                where each value is 0 <= label[i] <= C-1.
+        Returns:
+            (Tensor): The average loss.
+        """
+        assert logit.ndim == 4, "The ndim of logit should be 4."
+        assert label.ndim == 3, "The ndim of label should be 3."
+
+        logit = paddle.transpose(logit, [0, 2, 3, 1])
+        label = label.astype('int64')
+        ce_loss = F.cross_entropy(
+            logit, label, ignore_index=self.ignore_index, reduction='none')
+
+        pt = paddle.exp(-ce_loss)
+        focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
+
+        mask = paddle.cast(label != self.ignore_index, 'float32')
+        focal_loss *= mask
+        avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
+        return avg_loss
--- a/paddlers/models/ppseg/models/losses/l1_loss.py
+++ b/paddlers/models/ppseg/models/losses/l1_loss.py
@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):

    def __init__(self, reduction='mean', ignore_index=255):
        super().__init__(reduction=reduction)
+        self.ignore_index = ignore_index
+        self.EPS = 1e-10
+
+    def forward(self, input, label):
+        mask = label != self.ignore_index
+        mask = paddle.cast(mask, "float32")
+        label.stop_gradient = True
+        mask.stop_gradient = True
+
+        output = paddle.nn.functional.l1_loss(
+            input, label, "none", name=self.name) * mask
+
+        if self.reduction == "mean":
+            return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
+        elif self.reduction == "none":
+            return output
+        elif self.reduction == "sum":
+            return paddle.sum(output)
+        else:
+            raise ValueError(
+                "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
+                "received %s, which is not allowed." % self.reduction)
--- a/paddlers/models/ppseg/models/losses/lovasz_loss.py
+++ b/paddlers/models/ppseg/models/losses/lovasz_loss.py
@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
    signs = 2. * labels - 1.
    signs.stop_gradient = True
    errors = 1. - logits * signs
-    errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
-                                                        'descending', True)
+    if hasattr(paddle, "_legacy_C_ops"):
+        errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
+                                                           'descending', True)
+    else:
+        errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
+                                                    'descending', True)
    errors_sorted.stop_gradient = False
    gt_sorted = paddle.gather(labels, perm)
    grad = lovasz_grad(gt_sorted)
@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
        else:
            class_pred = probas[:, c]
        errors = paddle.abs(fg - class_pred)
-        errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
-                                                            'descending', True)
+        if hasattr(paddle, "_legacy_C_ops"):
+            errors_sorted, perm = paddle._legacy_C_ops.argsort(
+                errors, 'axis', 0, 'descending', True)
+        else:
+            errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
+                                                        'descending', True)
        errors_sorted.stop_gradient = False

        fg_sorted = paddle.gather(fg, perm)
--- a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):

        # get the label after ohem
        n, c, h, w = logit.shape
-        label = label.reshape((-1, ))
+        label = label.reshape((-1, )).astype('int64')
        valid_mask = (label != self.ignore_index).astype('int64')
        num_valid = valid_mask.sum()
        label = label * valid_mask
--- a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
                elif num_hard >= n_view / 2:
                    num_easy_keep = num_easy
                    num_hard_keep = n_view - num_easy_keep
-                else:
+                elif num_easy >= n_view / 2:
                    num_hard_keep = num_hard
                    num_easy_keep = n_view - num_hard_keep
+                else:
+                    num_hard_keep = num_hard
+                    num_easy_keep = num_easy

                indices = None
                if num_hard > 0:
--- a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
+++ b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
                label_num_conn, label_conn = cv2.connectedComponents(
                    labels_np_class.astype(np.uint8))

+                origin_pred_num_conn = pred_num_conn
                if pred_num_conn > 2 * label_num_conn:
                    pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
                real_pred_num = pred_num_conn - 1
@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
                # Connected Components Matching and SC Loss Calculation
                if real_label_num > 0 and real_pred_num > 0:
                    img_connectivity = compute_class_connectiveity(
-                        pred_conn, label_conn, pred_num_conn, label_num_conn,
-                        pred_i, real_label_num, real_pred_num, zero)
+                        pred_conn, label_conn, pred_num_conn,
+                        origin_pred_num_conn, label_num_conn, pred_i,
+                        real_label_num, real_pred_num, zero)
                    sc_loss += 1 - img_connectivity
                elif real_label_num == 0 and real_pred_num == 0:
                    # if no connected component, SC Loss = 0, so pass
@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):


 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
-                                label_num_conn, pred, real_label_num,
-                                real_pred_num, zero):
+                                origin_pred_num_conn, label_num_conn, pred,
+                                real_label_num, real_pred_num, zero):

    pred_conn = paddle.to_tensor(pred_conn)
    label_conn = paddle.to_tensor(label_conn)
-    pred_conn = F.one_hot(pred_conn, pred_num_conn)
+    pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
    label_conn = F.one_hot(label_conn, label_num_conn)

    ious = paddle.zeros((real_label_num, real_pred_num))
--- a/paddlers/models/ppseg/models/lraspp.py
+++ b/paddlers/models/ppseg/models/lraspp.py
@ -0,0 +1,162 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+
+
+@manager.MODELS.add_component
+class LRASPP(nn.Layer):
+    """
+    Semantic segmentation model with a light R-ASPP head.
+    
+    The original article refers to
+        Howard, Andrew, et al. "Searching for mobilenetv3."
+        (https://arxiv.org/pdf/1909.11065.pdf)
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of backbone output 
+            used as the input of the LR-ASPP head.
+            Default: [0, 1, 3].
+        lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
+            Default: [32, 64].
+        lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
+            Default: 128
+        resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
+            Default: bilinear.
+        use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
+            a 49x49 kernel for average pooling.
+            Default: True.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[0, 1, 3],
+                 lraspp_head_inter_chs=[32, 64],
+                 lraspp_head_out_ch=128,
+                 resize_mode='bilinear',
+                 use_gap=True,
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
+            "should not be lesser than 1"
+
+        # head
+        assert len(backbone_indices) == len(
+            lraspp_head_inter_chs
+        ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
+        self.backbone_indices = backbone_indices
+
+        self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
+                                      lraspp_head_inter_chs, lraspp_head_out_ch,
+                                      num_classes, resize_mode, use_gap)
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        y = self.lraspp_head(feats_backbone)
+        y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
+        logit_list = [y]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class LRASPPHead(nn.Layer):
+    def __init__(self,
+                 indices,
+                 in_chs,
+                 mid_chs,
+                 out_ch,
+                 n_classes,
+                 resize_mode,
+                 use_gap,
+                 align_corners=False):
+        super().__init__()
+
+        self.indices = indices[-2::-1]
+        self.in_chs = [in_chs[i] for i in indices[::-1]]
+        self.mid_chs = mid_chs[::-1]
+        self.convs = nn.LayerList()
+        self.conv_ups = nn.LayerList()
+        for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
+            self.convs.append(
+                nn.Conv2D(
+                    in_ch, mid_ch, kernel_size=1, bias_attr=False))
+            self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
+        self.conv_w = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=(49, 49), stride=(16, 20))
+            if not use_gap else nn.AdaptiveAvgPool2D(1),
+            nn.Conv2D(
+                self.in_chs[0], out_ch, 1, bias_attr=False),
+            nn.Sigmoid())
+        self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
+        self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            out_ch, n_classes, kernel_size=1, bias_attr=False)
+
+        self.interp = partial(
+            F.interpolate, mode=resize_mode, align_corners=align_corners)
+
+    def forward(self, in_feat_list):
+        x = in_feat_list[-1]
+
+        x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
+        y = self.conv_t(x)
+
+        for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
+            feat = in_feat_list[idx]
+            y = self.interp(y, paddle.shape(feat)[2:])
+            y = paddle.concat([y, conv(feat)], axis=1)
+            y = conv_up(y)
+
+        y = self.conv_out(y)
+        return y
--- a/paddlers/models/ppseg/models/mla_transformer.py
+++ b/paddlers/models/ppseg/models/mla_transformer.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/mobileseg.py
+++ b/paddlers/models/ppseg/models/mobileseg.py
@ -0,0 +1,289 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+
+
+@manager.MODELS.add_component
+class MobileSeg(nn.Layer):
+    """
+    The semantic segmentation models for mobile devices.
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
+            Default: [2, 3, 4].
+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
+            Default: [64, 64, 64].
+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
+            Default: bilinear.
+        use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[1, 2, 3],
+                 cm_bin_sizes=[1, 2],
+                 cm_out_ch=64,
+                 arm_type='UAFMMobile',
+                 arm_out_chs=[32, 48, 64],
+                 seg_head_inter_chs=[32, 32, 32],
+                 resize_mode='bilinear',
+                 use_last_fuse=False,
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
+            "should not be lesser than 1"
+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
+
+        # head
+        if len(arm_out_chs) == 1:
+            arm_out_chs = arm_out_chs * len(backbone_indices)
+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
+            "arm_out_chs and backbone_indices should be equal"
+
+        self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
+                                        cm_bin_sizes, cm_out_ch, arm_type,
+                                        resize_mode, use_last_fuse)
+
+        if len(seg_head_inter_chs) == 1:
+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
+            "seg_head_inter_chs and backbone_indices should be equal"
+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)  # [x4, x8, x16, x32]
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
+
+        if self.training:
+            logit_list = []
+            for x, seg_head in zip(feats_head, self.seg_heads):
+                x = seg_head(x)
+                logit_list.append(x)
+            logit_list = [
+                F.interpolate(
+                    x, x_hw, mode='bilinear', align_corners=False)
+                for x in logit_list
+            ]
+        else:
+            x = self.seg_heads[0](feats_head[0])
+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
+            logit_list = [x]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class MobileSegHead(nn.Layer):
+    """
+    The head of MobileSeg.
+
+    Args:
+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
+        arm_out_chs (List(int)): The out channels of each arm module.
+        cm_bin_sizes (List(int)): The bin size of context module.
+        cm_out_ch (int): The output channel of the last context module.
+        arm_type (str): The type of attention refinement module.
+        resize_mode (str): The resize mode for the upsampling operation in decoder.
+    """
+
+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
+                 arm_type, resize_mode, use_last_fuse):
+        super().__init__()
+
+        self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
+                                      cm_out_ch, cm_bin_sizes)
+
+        assert hasattr(layers,arm_type), \
+            "Not support arm_type ({})".format(arm_type)
+        arm_class = eval("layers." + arm_type)
+
+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
+        for i in range(len(backbone_out_chs)):
+            low_chs = backbone_out_chs[i]
+            high_ch = cm_out_ch if i == len(
+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
+            out_ch = arm_out_chs[i]
+            arm = arm_class(
+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
+            self.arm_list.append(arm)
+
+        self.use_last_fuse = use_last_fuse
+        if self.use_last_fuse:
+            self.fuse_convs = nn.LayerList()
+            for i in range(1, len(arm_out_chs)):
+                conv = layers.SeparableConvBNReLU(
+                    arm_out_chs[i],
+                    arm_out_chs[0],
+                    kernel_size=3,
+                    bias_attr=False)
+                self.fuse_convs.append(conv)
+            self.last_conv = layers.SeparableConvBNReLU(
+                len(arm_out_chs) * arm_out_chs[0],
+                arm_out_chs[0],
+                kernel_size=3,
+                bias_attr=False)
+
+    def forward(self, in_feat_list):
+        """
+        Args:
+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+        Returns:
+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+                The length of in_feat_list and out_feat_list are the same.
+        """
+
+        high_feat = self.cm(in_feat_list[-1])
+        out_feat_list = []
+
+        for i in reversed(range(len(in_feat_list))):
+            low_feat = in_feat_list[i]
+            arm = self.arm_list[i]
+            high_feat = arm(low_feat, high_feat)
+            out_feat_list.insert(0, high_feat)
+
+        if self.use_last_fuse:
+            x_list = [out_feat_list[0]]
+            size = paddle.shape(out_feat_list[0])[2:]
+            for i, (x, conv
+                    ) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
+                x = conv(x)
+                x = F.interpolate(
+                    x, size=size, mode='bilinear', align_corners=False)
+                x_list.append(x)
+            x = paddle.concat(x_list, axis=1)
+            x = self.last_conv(x)
+            out_feat_list[0] = x
+
+        return out_feat_list
+
+
+class MobileContextModule(nn.Layer):
+    """
+    Context Module for Mobile Model.
+
+    Args:
+        in_channels (int): The number of input channels to pyramid pooling module.
+        inter_channels (int): The number of inter channels to pyramid pooling module.
+        out_channels (int): The number of output channels after pyramid pooling module.
+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
+        align_corners (bool): An argument of F.interpolate. It should be set to False
+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 bin_sizes,
+                 align_corners=False):
+        super().__init__()
+
+        self.stages = nn.LayerList([
+            self._make_stage(in_channels, inter_channels, size)
+            for size in bin_sizes
+        ])
+
+        self.conv_out = layers.SeparableConvBNReLU(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            bias_attr=False)
+
+        self.align_corners = align_corners
+
+    def _make_stage(self, in_channels, out_channels, size):
+        prior = nn.AdaptiveAvgPool2D(output_size=size)
+        conv = layers.ConvBNReLU(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, input):
+        out = None
+        input_shape = paddle.shape(input)[2:]
+
+        for stage in self.stages:
+            x = stage(input)
+            x = F.interpolate(
+                x,
+                input_shape,
+                mode='bilinear',
+                align_corners=self.align_corners)
+            if out is None:
+                out = x
+            else:
+                out += x
+
+        out = self.conv_out(out)
+        return out
+
+
+class SegHead(nn.Layer):
+    def __init__(self, in_chan, mid_chan, n_classes):
+        super().__init__()
+        self.conv = layers.SeparableConvBNReLU(
+            in_chan, mid_chan, kernel_size=3, bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
--- a/paddlers/models/ppseg/models/pointrend.py
+++ b/paddlers/models/ppseg/models/pointrend.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/portraitnet.py
+++ b/paddlers/models/ppseg/models/portraitnet.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -50,19 +50,9 @@ class PortraitNet(nn.Layer):
        self.init_weight()

    def forward(self, x):
-        img = x[:, :3, :, :]
-        img_ori = x[:, 3:, :, :]
-
-        feat_list = self.backbone(img)
+        feat_list = self.backbone(x)
        logits_list = self.head(feat_list)
-
-        feat_list = self.backbone(img_ori)
-        logits_ori_list = self.head(feat_list)
-
-        return [
-            logits_list[0], logits_ori_list[0], logits_list[1],
-            logits_ori_list[1]
-        ]
+        return [logits_list]

    def init_weight(self):
        if self.pretrained is not None:
--- a/paddlers/models/ppseg/models/pp_liteseg.py
+++ b/paddlers/models/ppseg/models/pp_liteseg.py
@ -0,0 +1,273 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils
+
+
+@manager.MODELS.add_component
+class PPLiteSeg(nn.Layer):
+    """
+    The PP_LiteSeg implementation based on PaddlePaddle.
+
+    The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu,
+    Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai,
+    Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic
+    Segmentation Model. https://arxiv.org/abs/2204.02681".
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
+            Default: [2, 3, 4].
+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
+            Default: [64, 64, 64].
+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
+            Default: bilinear.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[2, 3, 4],
+                 arm_type='UAFM_SpAtten',
+                 cm_bin_sizes=[1, 2, 4],
+                 cm_out_ch=128,
+                 arm_out_chs=[64, 96, 128],
+                 seg_head_inter_chs=[64, 64, 64],
+                 resize_mode='bilinear',
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) > 1, "The lenght of backbone_indices " \
+            "should be greater than 1"
+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
+
+        # head
+        if len(arm_out_chs) == 1:
+            arm_out_chs = arm_out_chs * len(backbone_indices)
+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
+            "arm_out_chs and backbone_indices should be equal"
+
+        self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs,
+                                        cm_bin_sizes, cm_out_ch, arm_type,
+                                        resize_mode)
+
+        if len(seg_head_inter_chs) == 1:
+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
+            "seg_head_inter_chs and backbone_indices should be equal"
+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)  # [x2, x4, x8, x16, x32]
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
+
+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
+
+        if self.training:
+            logit_list = []
+
+            for x, seg_head in zip(feats_head, self.seg_heads):
+                x = seg_head(x)
+                logit_list.append(x)
+
+            logit_list = [
+                F.interpolate(
+                    x, x_hw, mode='bilinear', align_corners=False)
+                for x in logit_list
+            ]
+        else:
+            x = self.seg_heads[0](feats_head[0])
+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
+            logit_list = [x]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class PPLiteSegHead(nn.Layer):
+    """
+    The head of PPLiteSeg.
+
+    Args:
+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
+        arm_out_chs (List(int)): The out channels of each arm module.
+        cm_bin_sizes (List(int)): The bin size of context module.
+        cm_out_ch (int): The output channel of the last context module.
+        arm_type (str): The type of attention refinement module.
+        resize_mode (str): The resize mode for the upsampling operation in decoder.
+    """
+
+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
+                 arm_type, resize_mode):
+        super().__init__()
+
+        self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch,
+                                  cm_bin_sizes)
+
+        assert hasattr(layers,arm_type), \
+            "Not support arm_type ({})".format(arm_type)
+        arm_class = eval("layers." + arm_type)
+
+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
+        for i in range(len(backbone_out_chs)):
+            low_chs = backbone_out_chs[i]
+            high_ch = cm_out_ch if i == len(
+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
+            out_ch = arm_out_chs[i]
+            arm = arm_class(
+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
+            self.arm_list.append(arm)
+
+    def forward(self, in_feat_list):
+        """
+        Args:
+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+        Returns:
+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+                The length of in_feat_list and out_feat_list are the same.
+        """
+
+        high_feat = self.cm(in_feat_list[-1])
+        out_feat_list = []
+
+        for i in reversed(range(len(in_feat_list))):
+            low_feat = in_feat_list[i]
+            arm = self.arm_list[i]
+            high_feat = arm(low_feat, high_feat)
+            out_feat_list.insert(0, high_feat)
+
+        return out_feat_list
+
+
+class PPContextModule(nn.Layer):
+    """
+    Simple Context module.
+
+    Args:
+        in_channels (int): The number of input channels to pyramid pooling module.
+        inter_channels (int): The number of inter channels to pyramid pooling module.
+        out_channels (int): The number of output channels after pyramid pooling module.
+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
+        align_corners (bool): An argument of F.interpolate. It should be set to False
+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 bin_sizes,
+                 align_corners=False):
+        super().__init__()
+
+        self.stages = nn.LayerList([
+            self._make_stage(in_channels, inter_channels, size)
+            for size in bin_sizes
+        ])
+
+        self.conv_out = layers.ConvBNReLU(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.align_corners = align_corners
+
+    def _make_stage(self, in_channels, out_channels, size):
+        prior = nn.AdaptiveAvgPool2D(output_size=size)
+        conv = layers.ConvBNReLU(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, input):
+        out = None
+        input_shape = paddle.shape(input)[2:]
+
+        for stage in self.stages:
+            x = stage(input)
+            x = F.interpolate(
+                x,
+                input_shape,
+                mode='bilinear',
+                align_corners=self.align_corners)
+            if out is None:
+                out = x
+            else:
+                out += x
+
+        out = self.conv_out(out)
+        return out
+
+
+class SegHead(nn.Layer):
+    def __init__(self, in_chan, mid_chan, n_classes):
+        super().__init__()
+        self.conv = layers.ConvBNReLU(
+            in_chan,
+            mid_chan,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
--- a/paddlers/models/ppseg/models/pphumanseg_lite.py
+++ b/paddlers/models/ppseg/models/pphumanseg_lite.py
@ -27,13 +27,17 @@ __all__ = ['PPHumanSegLite']
 class PPHumanSegLite(nn.Layer):
    "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals."

-    def __init__(self, num_classes, pretrained=None, align_corners=False):
+    def __init__(self,
+                 num_classes,
+                 in_channels=3,
+                 pretrained=None,
+                 align_corners=False):
        super().__init__()
        self.pretrained = pretrained
        self.num_classes = num_classes
        self.align_corners = align_corners

-        self.conv_bn0 = _ConvBNReLU(3, 36, 3, 2, 1)
+        self.conv_bn0 = _ConvBNReLU(in_channels, 36, 3, 2, 1)
        self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0)

        self.block1 = nn.Sequential(
--- a/paddlers/models/ppseg/models/segformer.py
+++ b/paddlers/models/ppseg/models/segformer.py
@ -127,51 +127,3 @@ class SegFormer(nn.Layer):
                mode='bilinear',
                align_corners=self.align_corners)
        ]
-
-
-@manager.MODELS.add_component
-def SegFormer_B0(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B0'](),
-        embedding_dim=256,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B1(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B1'](),
-        embedding_dim=256,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B2(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B2'](),
-        embedding_dim=768,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B3(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B3'](),
-        embedding_dim=768,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B4(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B4'](),
-        embedding_dim=768,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B5(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B5'](),
-        embedding_dim=768,
-        **kwargs)
--- a/paddlers/models/ppseg/models/segnet.py
+++ b/paddlers/models/ppseg/models/segnet.py
@ -32,14 +32,14 @@ class SegNet(nn.Layer):
        num_classes (int): The unique number of target classes.
    """

-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super().__init__()

        # Encoder Module

        self.enco1 = nn.Sequential(
            layers.ConvBNReLU(
-                3, 64, 3, padding=1),
+                in_channels, 64, 3, padding=1),
            layers.ConvBNReLU(
                64, 64, 3, padding=1))

--- a/paddlers/models/ppseg/models/sinet.py
+++ b/paddlers/models/ppseg/models/sinet.py
@ -0,0 +1,449 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils
+
+CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]],
+       [[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]],
+       [[3, 2], [3, 4]], [[3, 1], [5, 2]]]
+
+
+@manager.MODELS.add_component
+class SINet(nn.Layer):
+    """
+    The SINet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak
+    "SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules
+    and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099).
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        config (List, optional): The config for SINet. Defualt use the CFG.
+        stage2_blocks (int, optional): The num of blocks in stage2. Default: 2.
+        stage3_blocks (int, optional): The num of blocks in stage3. Default: 8.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes=2,
+                 config=CFG,
+                 stage2_blocks=2,
+                 stage3_blocks=8,
+                 in_channels=3,
+                 pretrained=None):
+        super().__init__()
+        dim1 = 16
+        dim2 = 48
+        dim3 = 96
+
+        self.encoder = SINetEncoder(config, in_channels, num_classes,
+                                    stage2_blocks, stage3_blocks)
+
+        self.up = nn.UpsamplingBilinear2D(scale_factor=2)
+        self.bn_3 = nn.BatchNorm(num_classes)
+
+        self.level2_C = CBR(dim2, num_classes, 1, 1)
+        self.bn_2 = nn.BatchNorm(num_classes)
+
+        self.classifier = nn.Sequential(
+            nn.UpsamplingBilinear2D(scale_factor=2),
+            nn.Conv2D(
+                num_classes, num_classes, 3, 1, 1, bias_attr=False))
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, input):
+        output1 = self.encoder.level1(input)  # x2
+
+        output2_0 = self.encoder.level2_0(output1)  # x4
+        for i, layer in enumerate(self.encoder.level2):
+            if i == 0:
+                output2 = layer(output2_0)
+            else:
+                output2 = layer(output2)
+        output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1))
+
+        output3_0 = self.encoder.level3_0(output2_cat)  # x8
+        for i, layer in enumerate(self.encoder.level3):
+            if i == 0:
+                output3 = layer(output3_0)
+            else:
+                output3 = layer(output3)
+        output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1))
+        enc_final = self.encoder.classifier(output3_cat)  # x8
+
+        dec_stage1 = self.bn_3(self.up(enc_final))  # x4
+        stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1)
+        stage1_gate = (1 - stage1_confidence).unsqueeze(1)
+
+        dec_stage2_0 = self.level2_C(output2)  # x4
+        dec_stage2 = self.bn_2(
+            self.up(dec_stage2_0 * stage1_gate + dec_stage1))  # x2
+
+        out = self.classifier(dec_stage2)  # x
+
+        return [out]
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    # reshape
+    x = paddle.reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+
+    # transpose
+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
+
+    # flatten
+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class CBR(nn.Layer):
+    '''
+    This class defines the convolution layer with batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+
+        self.conv = nn.Conv2D(
+            nIn,
+            nOut, (kSize, kSize),
+            stride=stride,
+            padding=(padding, padding),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        output = self.act(output)
+        return output
+
+
+class SeparableCBR(nn.Layer):
+    '''
+    This class defines the convolution layer with batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                nIn,
+                nIn, (kSize, kSize),
+                stride=stride,
+                padding=(padding, padding),
+                groups=nIn,
+                bias_attr=False),
+            nn.Conv2D(
+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        output = self.act(output)
+        return output
+
+
+class SqueezeBlock(nn.Layer):
+    def __init__(self, exp_size, divide=4.0):
+        super(SqueezeBlock, self).__init__()
+
+        if divide > 1:
+            self.dense = nn.Sequential(
+                nn.Linear(exp_size, int(exp_size / divide)),
+                nn.PReLU(int(exp_size / divide)),
+                nn.Linear(int(exp_size / divide), exp_size),
+                nn.PReLU(exp_size), )
+        else:
+            self.dense = nn.Sequential(
+                nn.Linear(exp_size, exp_size), nn.PReLU(exp_size))
+
+    def forward(self, x):
+        alpha = F.adaptive_avg_pool2d(x, [1, 1])
+        alpha = paddle.squeeze(alpha, axis=[2, 3])
+        alpha = self.dense(alpha)
+        alpha = paddle.unsqueeze(alpha, axis=[2, 3])
+        out = x * alpha
+        return out
+
+
+class SESeparableCBR(nn.Layer):
+    '''
+    This class defines the convolution layer with batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                nIn,
+                nIn, (kSize, kSize),
+                stride=stride,
+                padding=(padding, padding),
+                groups=nIn,
+                bias_attr=False),
+            SqueezeBlock(
+                nIn, divide=divide),
+            nn.Conv2D(
+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
+
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        output = self.act(output)
+        return output
+
+
+class BR(nn.Layer):
+    '''
+    This class groups the batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nOut):
+        super().__init__()
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.bn(input)
+        output = self.act(output)
+        return output
+
+
+class CB(nn.Layer):
+    '''
+    This class groups the convolution and batch normalization
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+        self.conv = nn.Conv2D(
+            nIn,
+            nOut, (kSize, kSize),
+            stride=stride,
+            padding=(padding, padding),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        return output
+
+
+class C(nn.Layer):
+    '''
+    This class is for a convolutional layer.
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1, group=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+        self.conv = nn.Conv2D(
+            nIn,
+            nOut, (kSize, kSize),
+            stride=stride,
+            padding=(padding, padding),
+            bias_attr=False,
+            groups=group)
+
+    def forward(self, input):
+        output = self.conv(input)
+        return output
+
+
+class S2block(nn.Layer):
+    '''
+    This class defines the dilated convolution.
+    '''
+
+    def __init__(self, nIn, nOut, kSize, avgsize):
+        super().__init__()
+
+        self.resolution_down = False
+        if avgsize > 1:
+            self.resolution_down = True
+            self.down_res = nn.AvgPool2D(avgsize, avgsize)
+            self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize)
+            self.avgsize = avgsize
+
+        padding = int((kSize - 1) / 2)
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                nIn,
+                nIn,
+                kernel_size=(kSize, kSize),
+                stride=1,
+                padding=(padding, padding),
+                groups=nIn,
+                bias_attr=False),
+            nn.BatchNorm(nIn))
+
+        self.act_conv1x1 = nn.Sequential(
+            nn.PReLU(nIn),
+            nn.Conv2D(
+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
+
+        self.bn = nn.BatchNorm(nOut)
+
+    def forward(self, input):
+        if self.resolution_down:
+            input = self.down_res(input)
+        output = self.conv(input)
+
+        output = self.act_conv1x1(output)
+        if self.resolution_down:
+            output = self.up_res(output)
+        return self.bn(output)
+
+
+class S2module(nn.Layer):
+    '''
+    This class defines the ESP block, which is based on the following principle
+        Reduce ---> Split ---> Transform --> Merge
+    '''
+
+    def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]):
+        super().__init__()
+
+        group_n = len(config)
+        assert group_n == 2
+        n = int(nOut / group_n)
+        n1 = nOut - group_n * n
+
+        self.c1 = C(nIn, n, 1, 1, group=group_n)
+        # self.c1 = C(nIn, n, 1, 1)
+
+        for i in range(group_n):
+            if i == 0:
+                self.layer_0 = S2block(
+                    n, n + n1, kSize=config[i][0], avgsize=config[i][1])
+            else:
+                self.layer_1 = S2block(
+                    n, n, kSize=config[i][0], avgsize=config[i][1])
+
+        self.BR = BR(nOut)
+        self.add = add
+        self.group_n = group_n
+
+    def forward(self, input):
+        output1 = self.c1(input)
+        output1 = channel_shuffle(output1, self.group_n)
+        res_0 = self.layer_0(output1)
+        res_1 = self.layer_1(output1)
+        combine = paddle.concat([res_0, res_1], 1)
+
+        if self.add:
+            combine = input + combine
+        output = self.BR(combine)
+        return output
+
+
+class SINetEncoder(nn.Layer):
+    def __init__(self,
+                 config,
+                 in_channels=3,
+                 num_classes=2,
+                 stage2_blocks=2,
+                 stage3_blocks=8):
+        super().__init__()
+        assert stage2_blocks == 2
+        dim1 = 16
+        dim2 = 48
+        dim3 = 96
+
+        self.level1 = CBR(in_channels, 12, 3, 2)
+
+        self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1)
+
+        self.level2 = nn.LayerList()
+        for i in range(0, stage2_blocks):
+            if i == 0:
+                self.level2.append(
+                    S2module(
+                        dim1, dim2, config=config[i], add=False))
+            else:
+                self.level2.append(S2module(dim2, dim2, config=config[i]))
+        self.BR2 = BR(dim2 + dim1)
+
+        self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2)
+        self.level3 = nn.LayerList()
+        for i in range(0, stage3_blocks):
+            if i == 0:
+                self.level3.append(
+                    S2module(
+                        dim2, dim3, config=config[2 + i], add=False))
+            else:
+                self.level3.append(S2module(dim3, dim3, config=config[2 + i]))
+        self.BR3 = BR(dim3 + dim2)
+
+        self.classifier = C(dim3 + dim2, num_classes, 1, 1)
+
+    def forward(self, input):
+        output1 = self.level1(input)  # x2
+
+        output2_0 = self.level2_0(output1)  # x4
+        for i, layer in enumerate(self.level2):
+            if i == 0:
+                output2 = layer(output2_0)
+            else:
+                output2 = layer(output2)
+
+        output3_0 = self.level3_0(
+            self.BR2(paddle.concat([output2_0, output2], 1)))  # x8
+        for i, layer in enumerate(self.level3):
+            if i == 0:
+                output3 = layer(output3_0)
+            else:
+                output3 = layer(output3)
+
+        output3_cat = self.BR3(paddle.concat([output3_0, output3], 1))
+        classifier = self.classifier(output3_cat)
+        return classifier
--- a/paddlers/models/ppseg/models/stdcseg.py
+++ b/paddlers/models/ppseg/models/stdcseg.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/models/topformer.py
+++ b/paddlers/models/ppseg/models/topformer.py
@ -0,0 +1,155 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct
+
+
+@manager.MODELS.add_component
+class TopFormer(nn.Layer):
+    """
+    The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle.
+
+    The original article refers to
+    Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu,
+    and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation." 
+    In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
+    pp. 12083-12093. 2022.
+
+    This model refers to https://github.com/hustvl/TopFormer.
+
+    Args:
+        num_classes(int,optional): The unique number of target classes.
+        backbone(nn.Layer): Backbone network.
+        head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False.
+        align_corners (bool, optional): Set the align_corners in resizing. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 head_use_dw=False,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+
+        head_in_channels = [
+            i for i in backbone.injection_out_channels if i is not None
+        ]
+        self.decode_head = TopFormerHead(
+            num_classes=num_classes,
+            in_channels=head_in_channels,
+            use_dw=head_use_dw,
+            align_corners=align_corners)
+
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+        x = self.backbone(x)  # len=3, 1/8,1/16,1/32
+        x = self.decode_head(x)
+        x = F.interpolate(
+            x, x_hw, mode='bilinear', align_corners=self.align_corners)
+
+        return [x]
+
+
+class TopFormerHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 in_index=[0, 1, 2],
+                 in_transform='multiple_select',
+                 use_dw=False,
+                 dropout_ratio=0.1,
+                 align_corners=False):
+        super().__init__()
+
+        self.in_index = in_index
+        self.in_transform = in_transform
+        self.align_corners = align_corners
+
+        self._init_inputs(in_channels, in_index, in_transform)
+        self.linear_fuse = ConvBNAct(
+            in_channels=self.last_channels,
+            out_channels=self.last_channels,
+            kernel_size=1,
+            stride=1,
+            groups=self.last_channels if use_dw else 1,
+            act=nn.ReLU)
+        self.dropout = nn.Dropout2D(dropout_ratio)
+        self.conv_seg = nn.Conv2D(
+            self.last_channels, num_classes, kernel_size=1)
+
+    def _init_inputs(self, in_channels, in_index, in_transform):
+        assert in_transform in [None, 'resize_concat', 'multiple_select']
+        if in_transform is not None:
+            assert len(in_channels) == len(in_index)
+            if in_transform == 'resize_concat':
+                self.last_channels = sum(in_channels)
+            else:
+                self.last_channels = in_channels[0]
+        else:
+            assert isinstance(in_channels, int)
+            assert isinstance(in_index, int)
+            self.last_channels = in_channels
+
+    def _transform_inputs(self, inputs):
+        if self.in_transform == 'resize_concat':
+            inputs = [inputs[i] for i in self.in_index]
+            inputs = [
+                F.interpolate(
+                    input_data=x,
+                    size=paddle.shape(inputs[0])[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners) for x in inputs
+            ]
+            inputs = paddle.concat(inputs, axis=1)
+        elif self.in_transform == 'multiple_select':
+            inputs_tmp = [inputs[i] for i in self.in_index]
+            inputs = inputs_tmp[0]
+            for x in inputs_tmp[1:]:
+                x = F.interpolate(
+                    x,
+                    size=paddle.shape(inputs)[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners)
+                inputs += x
+        else:
+            inputs = inputs[self.in_index]
+
+        return inputs
+
+    def forward(self, x):
+        x = self._transform_inputs(x)
+        x = self.linear_fuse(x)
+        x = self.dropout(x)
+        x = self.conv_seg(x)
+        return x
--- a/paddlers/models/ppseg/models/u2net.py
+++ b/paddlers/models/ppseg/models/u2net.py
@ -34,15 +34,15 @@ class U2Net(nn.Layer):

    Args:
        num_classes (int): The unique number of target classes.
-        in_ch (int, optional): Input channels. Default: 3.
+        in_channels (int, optional): Input channels. Default: 3.
        pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.

    """

-    def __init__(self, num_classes, in_ch=3, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super(U2Net, self).__init__()

-        self.stage1 = RSU7(in_ch, 32, 64)
+        self.stage1 = RSU7(in_channels, 32, 64)
        self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)

        self.stage2 = RSU6(64, 32, 128)
@ -153,10 +153,10 @@ class U2Net(nn.Layer):
 class U2Netp(nn.Layer):
    """Please Refer to U2Net above."""

-    def __init__(self, num_classes, in_ch=3, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
        super(U2Netp, self).__init__()

-        self.stage1 = RSU7(in_ch, 16, 64)
+        self.stage1 = RSU7(in_channels, 16, 64)
        self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)

        self.stage2 = RSU6(64, 16, 64)
--- a/paddlers/models/ppseg/models/unet.py
+++ b/paddlers/models/ppseg/models/unet.py
@ -36,18 +36,19 @@ class UNet(nn.Layer):
            is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
        use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
            If False, use resize_bilinear. Default: False.
+        in_channels (int, optional): The channels of input image. Default: 3.
        pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
    """

    def __init__(self,
                 num_classes,
-                 input_channel=3,
                 align_corners=False,
                 use_deconv=False,
+                 in_channels=3,
                 pretrained=None):
        super().__init__()

-        self.encode = Encoder(input_channel)
+        self.encode = Encoder(in_channels)
        self.decode = Decoder(align_corners, use_deconv=use_deconv)
        self.cls = self.conv = nn.Conv2D(
            in_channels=64,
@ -73,12 +74,11 @@ class UNet(nn.Layer):


 class Encoder(nn.Layer):
-    def __init__(self, input_channel=3):
+    def __init__(self, in_channels=3):
        super().__init__()

        self.double_conv = nn.Sequential(
-            layers.ConvBNReLU(input_channel, 64, 3),
-            layers.ConvBNReLU(64, 64, 3))
+            layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
        down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
        self.down_sample_list = nn.LayerList([
            self.down_sampling(channel[0], channel[1])
--- a/paddlers/models/ppseg/models/unet_plusplus.py
+++ b/paddlers/models/ppseg/models/unet_plusplus.py
@ -31,8 +31,8 @@ class UNetPlusPlus(nn.Layer):
    (https://arxiv.org/abs/1807.10165).

    Args:
-        in_channels (int): The channel number of input image.
        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channel number of input image. Default: 3.
        use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
            If False, use resize_bilinear. Default: False.
        align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@ -42,8 +42,8 @@ class UNetPlusPlus(nn.Layer):
        """

    def __init__(self,
-                 in_channels,
                 num_classes,
+                 in_channels=3,
                 use_deconv=False,
                 align_corners=False,
                 pretrained=None,
--- a/paddlers/models/ppseg/models/upernet.py
+++ b/paddlers/models/ppseg/models/upernet.py
@ -0,0 +1,173 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+
+
+@manager.MODELS.add_component
+class UPerNet(nn.Layer):
+    """
+    The UPerNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding"
+    (https://arxiv.org/abs/1807.10221).
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
+        backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone.
+        channels (int): The channels of inter layers. Default: 512.
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        dropout_prob (float): Dropout ratio for upernet head. Default: 0.1.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices,
+                 channels=512,
+                 enable_auxiliary_loss=False,
+                 align_corners=False,
+                 dropout_prob=0.1,
+                 pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        self.in_channels = [
+            self.backbone.feat_channels[i] for i in backbone_indices
+        ]
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        fpn_inplanes = [
+            self.backbone.feat_channels[i] for i in backbone_indices
+        ]
+        self.head = UPerNetHead(
+            num_classes=num_classes,
+            fpn_inplanes=fpn_inplanes,
+            dropout_prob=dropout_prob,
+            channels=channels,
+            enable_auxiliary_loss=self.enable_auxiliary_loss)
+        self.init_weight()
+
+    def forward(self, x):
+        feats = self.backbone(x)
+        feats = [feats[i] for i in self.backbone_indices]
+        logit_list = self.head(feats)
+        logit_list = [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class UPerNetHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 fpn_inplanes,
+                 channels,
+                 dropout_prob=0.1,
+                 enable_auxiliary_loss=False,
+                 align_corners=True):
+        super(UPerNetHead, self).__init__()
+        self.align_corners = align_corners
+        self.ppm = layers.PPModule(
+            in_channels=fpn_inplanes[-1],
+            out_channels=channels,
+            bin_sizes=(1, 2, 3, 6),
+            dim_reduction=True,
+            align_corners=True)
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.lateral_convs = nn.LayerList()
+        self.fpn_convs = nn.LayerList()
+
+        for fpn_inplane in fpn_inplanes[:-1]:
+            self.lateral_convs.append(
+                layers.ConvBNReLU(fpn_inplane, channels, 1))
+            self.fpn_convs.append(
+                layers.ConvBNReLU(
+                    channels, channels, 3, bias_attr=False))
+
+        if self.enable_auxiliary_loss:
+            self.aux_head = layers.AuxLayer(
+                fpn_inplanes[2],
+                fpn_inplanes[2],
+                num_classes,
+                dropout_prob=dropout_prob)
+
+        self.fpn_bottleneck = layers.ConvBNReLU(
+            len(fpn_inplanes) * channels, channels, 3, padding=1)
+
+        self.conv_last = nn.Sequential(
+            layers.ConvBNReLU(
+                len(fpn_inplanes) * channels, channels, 3, bias_attr=False),
+            nn.Conv2D(
+                channels, num_classes, kernel_size=1))
+        self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1)
+
+    def forward(self, inputs):
+        laterals = []
+        for i, lateral_conv in enumerate(self.lateral_convs):
+            laterals.append(lateral_conv(inputs[i]))
+
+        laterals.append(self.ppm(inputs[-1]))
+        fpn_levels = len(laterals)
+        for i in range(fpn_levels - 1, 0, -1):
+            prev_shape = paddle.shape(laterals[i - 1])
+            laterals[i - 1] = laterals[i - 1] + F.interpolate(
+                laterals[i],
+                size=prev_shape[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+
+        fpn_outs = []
+        for i in range(fpn_levels - 1):
+            fpn_outs.append(self.fpn_convs[i](laterals[i]))
+        fpn_outs.append(laterals[-1])
+
+        for i in range(fpn_levels - 1, 0, -1):
+            fpn_outs[i] = F.interpolate(
+                fpn_outs[i],
+                size=paddle.shape(fpn_outs[0])[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+        fuse_out = paddle.concat(fpn_outs, axis=1)
+        x = self.fpn_bottleneck(fuse_out)
+
+        x = self.conv_seg(x)
+        logits_list = [x]
+        if self.enable_auxiliary_loss:
+            aux_out = self.aux_head(inputs[2])
+            logits_list.append(aux_out)
+            return logits_list
+        else:
+            return logits_list
--- a/paddlers/models/ppseg/transforms/functional.py
+++ b/paddlers/models/ppseg/transforms/functional.py
@ -15,7 +15,14 @@
 import cv2
 import numpy as np
 from PIL import Image, ImageEnhance
-from scipy.ndimage.morphology import distance_transform_edt
+from scipy.ndimage import distance_transform_edt
+
+
+def rescale_size(img_size, target_size):
+    scale = min(
+        max(target_size) / max(img_size), min(target_size) / min(img_size))
+    rescaled_size = [round(i * scale) for i in img_size]
+    return rescaled_size, scale


 def normalize(im, mean, std):
--- a/paddlers/models/ppseg/transforms/transforms.py
+++ b/paddlers/models/ppseg/transforms/transforms.py
--- a/paddlers/models/ppseg/utils/init.py
+++ b/paddlers/models/ppseg/utils/init.py
@ -19,5 +19,4 @@ from .env import seg_env, get_sys_env
 from .utils import *
 from .timer import TimeAverager, calculate_eta
 from . import visualize
-from .config_check import config_check
 from .ema import EMA
--- a/paddlers/models/ppseg/utils/config_check.py
+++ b/paddlers/models/ppseg/utils/config_check.py
@ -1,59 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-
-
-def config_check(cfg, train_dataset=None, val_dataset=None):
-    """
-    To check config。
-
-    Args:
-        cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config.
-        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
-        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
-    """
-
-    num_classes_check(cfg, train_dataset, val_dataset)
-
-
-def num_classes_check(cfg, train_dataset, val_dataset):
-    """"
-    Check that the num_classes in model, train_dataset and val_dataset is consistent.
-    """
-    num_classes_set = set()
-    if train_dataset and hasattr(train_dataset, 'num_classes'):
-        num_classes_set.add(train_dataset.num_classes)
-    if val_dataset and hasattr(val_dataset, 'num_classes'):
-        num_classes_set.add(val_dataset.num_classes)
-    if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None):
-        num_classes_set.add(cfg.dic['model'].get('num_classes'))
-    if (not cfg.train_dataset) and (not cfg.val_dataset):
-        raise ValueError(
-            'One of `train_dataset` or `val_dataset should be given, but there are none.'
-        )
-    if len(num_classes_set) == 0:
-        raise ValueError(
-            '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
-        )
-    elif len(num_classes_set) > 1:
-        raise ValueError(
-            '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
-            .format(num_classes_set))
-    else:
-        num_classes = num_classes_set.pop()
-        if train_dataset:
-            train_dataset.num_classes = num_classes
-        if val_dataset:
-            val_dataset.num_classes = num_classes
--- a/paddlers/models/ppseg/utils/env/init.py
+++ b/paddlers/models/ppseg/utils/env/init.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/utils/env/seg_env.py
+++ b/paddlers/models/ppseg/utils/env/seg_env.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/utils/env/sys_env.py
+++ b/paddlers/models/ppseg/utils/env/sys_env.py
@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -20,6 +20,7 @@ import sys

 import cv2
 import paddle
+import paddlers.models.ppseg as ppseg

 IS_WINDOWS = sys.platform == 'win32'

@ -57,8 +58,12 @@ def _get_nvcc_info(cuda_home):
    if cuda_home is not None and os.path.isdir(cuda_home):
        try:
            nvcc = os.path.join(cuda_home, 'bin/nvcc')
-            nvcc = subprocess.check_output(
-                "{} -V".format(nvcc), shell=True).decode()
+            if not IS_WINDOWS:
+                nvcc = subprocess.check_output(
+                    "{} -V".format(nvcc), shell=True).decode()
+            else:
+                nvcc = subprocess.check_output(
+                    "\"{}\" -V".format(nvcc), shell=True).decode()
            nvcc = nvcc.strip().split('\n')[-1]
        except subprocess.SubprocessError:
            nvcc = "Not Available"
@ -116,6 +121,7 @@ def get_sys_env():
    except:
        pass

+    env_info['PaddleSeg'] = ppseg.__version__
    env_info['PaddlePaddle'] = paddle.__version__
    env_info['OpenCV'] = cv2.__version__

--- a/paddlers/models/ppseg/utils/metrics.py
+++ b/paddlers/models/ppseg/utils/metrics.py
@ -135,37 +135,6 @@ def mean_iou(intersect_area, pred_area, label_area):
    return np.array(class_iou), miou


-def fwiou(intersect_area, pred_area, label_area):
-    """
-    Calculate iou.
-
-    Args:
-        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
-        pred_area (Tensor): The prediction area on all classes.
-        label_area (Tensor): The ground truth area on all classes.
-
-    Returns:
-        np.ndarray: iou on all classes.
-        float: Frequency Weighted iou of all classes.
-        np.ndarray: Frequency of all classes.
-    """
-    intersect_area = intersect_area.numpy()
-    pred_area = pred_area.numpy()
-    label_area = label_area.numpy()
-    union = pred_area + label_area - intersect_area
-    class_iou = []
-    for i in range(len(intersect_area)):
-        if union[i] == 0:
-            iou = 0
-        else:
-            iou = intersect_area[i] / union[i]
-        class_iou.append(iou)
-    fw = label_area / np.sum(label_area)
-    fwious = np.array(fw) * np.array(class_iou)
-    fwiou = np.sum(fwious)
-    return np.array(class_iou), fwiou, fw
-
-
 def dice(intersect_area, pred_area, label_area):
    """
    Calculate DICE.
@ -194,6 +163,7 @@ def dice(intersect_area, pred_area, label_area):
    return np.array(class_dice), mdice


+# This is a deprecated function, please use class_measurement function.
 def accuracy(intersect_area, pred_area):
    """
    Calculate accuracy
@ -219,6 +189,38 @@ def accuracy(intersect_area, pred_area):
    return np.array(class_acc), macc


+def class_measurement(intersect_area, pred_area, label_area):
+    """
+    Calculate accuracy, calss precision and class recall.
+
+    Args:
+        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
+        pred_area (Tensor): The prediction area on all classes.
+        label_area (Tensor): The ground truth area on all classes.
+
+    Returns:
+        float: The mean accuracy.
+        np.ndarray: The precision of all classes.
+        np.ndarray: The recall of all classes.
+    """
+    intersect_area = intersect_area.numpy()
+    pred_area = pred_area.numpy()
+    label_area = label_area.numpy()
+
+    mean_acc = np.sum(intersect_area) / np.sum(pred_area)
+    class_precision = []
+    class_recall = []
+    for i in range(len(intersect_area)):
+        precision = 0 if pred_area[i] == 0 \
+            else intersect_area[i] / pred_area[i]
+        recall = 0 if label_area[i] == 0 \
+            else intersect_area[i] / label_area[i]
+        class_precision.append(precision)
+        class_recall.append(recall)
+
+    return mean_acc, np.array(class_precision), np.array(class_recall)
+
+
 def kappa(intersect_area, pred_area, label_area):
    """
    Calculate kappa coefficient
@ -231,9 +233,9 @@ def kappa(intersect_area, pred_area, label_area):
    Returns:
        float: kappa coefficient.
    """
-    intersect_area = intersect_area.numpy()
-    pred_area = pred_area.numpy()
-    label_area = label_area.numpy()
+    intersect_area = intersect_area.numpy().astype(np.float64)
+    pred_area = pred_area.numpy().astype(np.float64)
+    label_area = label_area.numpy().astype(np.float64)
    total_area = np.sum(label_area)
    po = np.sum(intersect_area) / total_area
    pe = np.sum(pred_area * label_area) / (total_area * total_area)
--- a/paddlers/models/ppseg/utils/train_profiler.py
+++ b/paddlers/models/ppseg/utils/train_profiler.py
@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
--- a/paddlers/models/ppseg/utils/utils.py
+++ b/paddlers/models/ppseg/utils/utils.py
@ -160,6 +160,8 @@ def get_image_list(image_path):
            for f in files:
                if '.ipynb_checkpoints' in root:
                    continue
+                if f.startswith('.'):
+                    continue
                if os.path.splitext(f)[-1] in valid_suffix:
                    image_list.append(os.path.join(root, f))
    else:
--- a/paddlers/models/ppseg/utils/visualize.py
+++ b/paddlers/models/ppseg/utils/visualize.py
@ -63,7 +63,7 @@ def get_pseudo_color_map(pred, color_map=None):
        pred (numpy.ndarray): the origin predicted image.
        color_map (list, optional): the palette color map. Default: None,
            use paddleseg's default color map.
-    
+
    Returns:
        (numpy.ndarray): the pseduo image.
    """
@ -103,3 +103,41 @@ def get_color_map_list(num_classes, custom_color=None):
    if custom_color:
        color_map[:len(custom_color)] = custom_color
    return color_map
+
+
+def paste_images(image_list):
+    """
+    Paste all image to a image.
+    Args:
+        image_list (List or Tuple): The images to be pasted and their size are the same.
+    Returns:
+        result_img (PIL.Image): The pasted image.
+    """
+    assert isinstance(image_list,
+                      (list, tuple)), "image_list should be a list or tuple"
+    assert len(
+        image_list) > 1, "The length of image_list should be greater than 1"
+
+    pil_img_list = []
+    for img in image_list:
+        if isinstance(img, str):
+            assert os.path.exists(img), "The image is not existed: {}".format(
+                img)
+            img = PILImage.open(img)
+            img = np.array(img)
+        elif isinstance(img, np.ndarray):
+            img = PILImage.fromarray(img)
+        pil_img_list.append(img)
+
+    sample_img = pil_img_list[0]
+    size = sample_img.size
+    for img in pil_img_list:
+        assert size == img.size, "The image size in image_list should be the same"
+
+    width, height = sample_img.size
+    result_img = PILImage.new(sample_img.mode,
+                              (width * len(pil_img_list), height))
+    for i, img in enumerate(pil_img_list):
+        result_img.paste(img, box=(width * i, 0))
+
+    return result_img
--- a/paddlers/rs_models/cd/losses/fccdn_loss.py
+++ b/paddlers/rs_models/cd/losses/fccdn_loss.py
@ -43,42 +43,13 @@ class DiceLoss(nn.Layer):
        return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true)


-class MultiClassDiceLoss(nn.Layer):
-    def __init__(
-            self,
-            weight,
-            batch=True,
-            ignore_index=-1,
-            do_softmax=False,
-            **kwargs, ):
-        super(MultiClassDiceLoss, self).__init__()
-        self.ignore_index = ignore_index
-        self.weight = weight
-        self.do_softmax = do_softmax
-        self.binary_diceloss = DiceLoss(batch)
-
-    def forward(self, y_pred, y_true):
-        if self.do_softmax:
-            y_pred = paddle.nn.functional.softmax(y_pred, axis=1)
-        y_true = F.one_hot(y_true.long(), y_pred.shape[1]).permute(0, 3, 1, 2)
-        total_loss = 0.0
-        tmp_i = 0.0
-        for i in range(y_pred.shape[1]):
-            if i != self.ignore_index:
-                diceloss = self.binary_diceloss(y_pred[:, i, :, :],
-                                                y_true[:, i, :, :])
-                total_loss += paddle.multiply(diceloss, self.weight[i])
-                tmp_i += 1.0
-        return total_loss / tmp_i
-
-
 class DiceBCELoss(nn.Layer):
    """Binary change detection task loss"""

    def __init__(self):
        super(DiceBCELoss, self).__init__()
        self.bce_loss = nn.BCELoss()
-        self.binnary_dice = DiceLoss()
+        self.binary_dice = DiceLoss()

    def forward(self, scores, labels, do_sigmoid=True):
        if len(scores.shape) > 3:
@ -87,29 +58,11 @@ class DiceBCELoss(nn.Layer):
            labels = labels.squeeze(1)
        if do_sigmoid:
            scores = paddle.nn.functional.sigmoid(scores.clone())
-        diceloss = self.binnary_dice(scores, labels)
+        diceloss = self.binary_dice(scores, labels)
        bceloss = self.bce_loss(scores, labels)
        return diceloss + bceloss


-class McDiceBCELoss(nn.Layer):
-    """Multi-class change detection task loss"""
-
-    def __init__(self, weight, do_sigmoid=True):
-        super(McDiceBCELoss, self).__init__()
-        self.ce_loss = nn.CrossEntropyLoss(weight)
-        self.dice = MultiClassDiceLoss(weight, do_sigmoid)
-
-    def forward(self, scores, labels):
-        if len(scores.shape) < 4:
-            scores = scores.unsqueeze(1)
-        if len(labels.shape) < 4:
-            labels = labels.unsqueeze(1)
-        diceloss = self.dice(scores, labels)
-        bceloss = self.ce_loss(scores, labels)
-        return diceloss + bceloss
-
-
 def fccdn_ssl_loss(logits_list, labels):
    """
    Self-supervised learning loss for change detection.
@ -160,11 +113,11 @@ def fccdn_ssl_loss(logits_list, labels):

    # Seg loss
    labels_downsample = labels_downsample.astype(paddle.float32)
-    loss_aux = 0.2 * criterion_ssl(out1, pred_seg_post_tmp1, False)
-    loss_aux += 0.2 * criterion_ssl(out2, pred_seg_pre_tmp1, False)
-    loss_aux += 0.2 * criterion_ssl(
-        out3, labels_downsample - pred_seg_post_tmp2, False)
-    loss_aux += 0.2 * criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
-                                    False)
+    loss_aux = criterion_ssl(out1, pred_seg_post_tmp1, False)
+    loss_aux += criterion_ssl(out2, pred_seg_pre_tmp1, False)
+    loss_aux += criterion_ssl(out3, labels_downsample - pred_seg_post_tmp2,
+                              False)
+    loss_aux += criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
+                              False)

    return loss_aux
--- a/paddlers/rs_models/clas/init.py
+++ b/paddlers/rs_models/clas/init.py
@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .condensenet_v2 import CondenseNetV2_a, CondenseNetV2_b, CondenseNetV2_c
+from .condensenetv2 import CondenseNetV2_A, CondenseNetV2_B, CondenseNetV2_C
--- a/paddlers/rs_models/clas/condensenet_v2.py
+++ b/paddlers/rs_models/clas/condensenet_v2.py
@ -1,442 +1,442 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This code is based on https://github.com/AgentMaker/Paddle-Image-Models
-Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
-Apache License [see LICENSE for details]
-"""
-
-import paddle
-import paddle.nn as nn
-
-__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"]
-
-
-class SELayer(nn.Layer):
-    def __init__(self, inplanes, reduction=16):
-        super(SELayer, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.fc = nn.Sequential(
-            nn.Linear(
-                inplanes, inplanes // reduction, bias_attr=False),
-            nn.ReLU(),
-            nn.Linear(
-                inplanes // reduction, inplanes, bias_attr=False),
-            nn.Sigmoid(), )
-
-    def forward(self, x):
-        b, c, _, _ = x.shape
-        y = self.avg_pool(x).reshape((b, c))
-        y = self.fc(y).reshape((b, c, 1, 1))
-        return x * paddle.expand(y, shape=x.shape)
-
-
-class HS(nn.Layer):
-    def __init__(self):
-        super(HS, self).__init__()
-        self.relu6 = nn.ReLU6()
-
-    def forward(self, inputs):
-        return inputs * self.relu6(inputs + 3) / 6
-
-
-class Conv(nn.Sequential):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            groups=1,
-            activation="ReLU",
-            bn_momentum=0.9, ):
-        super(Conv, self).__init__()
-        self.add_sublayer(
-            "norm", nn.BatchNorm2D(
-                in_channels, momentum=bn_momentum))
-        if activation == "ReLU":
-            self.add_sublayer("activation", nn.ReLU())
-        elif activation == "HS":
-            self.add_sublayer("activation", HS())
-        else:
-            raise NotImplementedError
-        self.add_sublayer(
-            "conv",
-            nn.Conv2D(
-                in_channels,
-                out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                bias_attr=False,
-                groups=groups, ), )
-
-
-def ShuffleLayer(x, groups):
-    batchsize, num_channels, height, width = x.shape
-    channels_per_group = num_channels // groups
-    # Reshape
-    x = x.reshape((batchsize, groups, channels_per_group, height, width))
-    # Transpose
-    x = x.transpose((0, 2, 1, 3, 4))
-    # Reshape
-    x = x.reshape((batchsize, groups * channels_per_group, height, width))
-    return x
-
-
-def ShuffleLayerTrans(x, groups):
-    batchsize, num_channels, height, width = x.shape
-    channels_per_group = num_channels // groups
-    # Reshape
-    x = x.reshape((batchsize, channels_per_group, groups, height, width))
-    # Transpose
-    x = x.transpose((0, 2, 1, 3, 4))
-    # Reshape
-    x = x.reshape((batchsize, channels_per_group * groups, height, width))
-    return x
-
-
-class CondenseLGC(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            groups=1,
-            activation="ReLU", ):
-        super(CondenseLGC, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.groups = groups
-        self.norm = nn.BatchNorm2D(self.in_channels)
-        if activation == "ReLU":
-            self.activation = nn.ReLU()
-        elif activation == "HS":
-            self.activation = HS()
-        else:
-            raise NotImplementedError
-        self.conv = nn.Conv2D(
-            self.in_channels,
-            self.out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=self.groups,
-            bias_attr=False, )
-        self.register_buffer(
-            "index", paddle.zeros(
-                (self.in_channels, ), dtype="int64"))
-
-    def forward(self, x):
-        x = paddle.index_select(x, self.index, axis=1)
-        x = self.norm(x)
-        x = self.activation(x)
-        x = self.conv(x)
-        x = ShuffleLayer(x, self.groups)
-        return x
-
-
-class CondenseSFR(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            groups=1,
-            activation="ReLU", ):
-        super(CondenseSFR, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.groups = groups
-        self.norm = nn.BatchNorm2D(self.in_channels)
-        if activation == "ReLU":
-            self.activation = nn.ReLU()
-        elif activation == "HS":
-            self.activation = HS()
-        else:
-            raise NotImplementedError
-        self.conv = nn.Conv2D(
-            self.in_channels,
-            self.out_channels,
-            kernel_size=kernel_size,
-            padding=padding,
-            groups=self.groups,
-            bias_attr=False,
-            stride=stride, )
-        self.register_buffer("index",
-                             paddle.zeros(
-                                 (self.out_channels, self.out_channels)))
-
-    def forward(self, x):
-        x = self.norm(x)
-        x = self.activation(x)
-        x = ShuffleLayerTrans(x, self.groups)
-        x = self.conv(x)  # SIZE: N, C, H, W
-        N, C, H, W = x.shape
-        x = x.reshape((N, C, H * W))
-        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
-        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
-        x = paddle.matmul(x, self.index)
-        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
-        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
-        return x
-
-
-class _SFR_DenseLayer(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            growth_rate,
-            group_1x1,
-            group_3x3,
-            group_trans,
-            bottleneck,
-            activation,
-            use_se=False, ):
-        super(_SFR_DenseLayer, self).__init__()
-        self.group_1x1 = group_1x1
-        self.group_3x3 = group_3x3
-        self.group_trans = group_trans
-        self.use_se = use_se
-        # 1x1 conv i --> b*k
-        self.conv_1 = CondenseLGC(
-            in_channels,
-            bottleneck * growth_rate,
-            kernel_size=1,
-            groups=self.group_1x1,
-            activation=activation, )
-        # 3x3 conv b*k --> k
-        self.conv_2 = Conv(
-            bottleneck * growth_rate,
-            growth_rate,
-            kernel_size=3,
-            padding=1,
-            groups=self.group_3x3,
-            activation=activation, )
-        # 1x1 res conv k(8-16-32)--> i (k*l)
-        self.sfr = CondenseSFR(
-            growth_rate,
-            in_channels,
-            kernel_size=1,
-            groups=self.group_trans,
-            activation=activation, )
-        if self.use_se:
-            self.se = SELayer(inplanes=growth_rate, reduction=1)
-
-    def forward(self, x):
-        x_ = x
-        x = self.conv_1(x)
-        x = self.conv_2(x)
-        if self.use_se:
-            x = self.se(x)
-        sfr_feature = self.sfr(x)
-        y = x_ + sfr_feature
-        return paddle.concat([y, x], 1)
-
-
-class _SFR_DenseBlock(nn.Sequential):
-    def __init__(
-            self,
-            num_layers,
-            in_channels,
-            growth_rate,
-            group_1x1,
-            group_3x3,
-            group_trans,
-            bottleneck,
-            activation,
-            use_se, ):
-        super(_SFR_DenseBlock, self).__init__()
-        for i in range(num_layers):
-            layer = _SFR_DenseLayer(
-                in_channels + i * growth_rate,
-                growth_rate,
-                group_1x1,
-                group_3x3,
-                group_trans,
-                bottleneck,
-                activation,
-                use_se, )
-            self.add_sublayer("denselayer_%d" % (i + 1), layer)
-
-
-class _Transition(nn.Layer):
-    def __init__(self):
-        super(_Transition, self).__init__()
-        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
-
-    def forward(self, x):
-        x = self.pool(x)
-        return x
-
-
-class CondenseNetV2(nn.Layer):
-    def __init__(
-            self,
-            stages,
-            growth,
-            HS_start_block,
-            SE_start_block,
-            fc_channel,
-            group_1x1,
-            group_3x3,
-            group_trans,
-            bottleneck,
-            last_se_reduction,
-            in_channels=3,
-            class_num=1000, ):
-        super(CondenseNetV2, self).__init__()
-        self.stages = stages
-        self.growth = growth
-        self.in_channels = in_channels
-        self.class_num = class_num
-        self.last_se_reduction = last_se_reduction
-        assert len(self.stages) == len(self.growth)
-        self.progress = 0.0
-
-        self.init_stride = 2
-        self.pool_size = 7
-
-        self.features = nn.Sequential()
-        # Initial nChannels should be 3
-        self.num_features = 2 * self.growth[0]
-        # Dense-block 1 (224x224)
-        self.features.add_sublayer(
-            "init_conv",
-            nn.Conv2D(
-                in_channels,
-                self.num_features,
-                kernel_size=3,
-                stride=self.init_stride,
-                padding=1,
-                bias_attr=False, ), )
-        for i in range(len(self.stages)):
-            activation = "HS" if i >= HS_start_block else "ReLU"
-            use_se = True if i >= SE_start_block else False
-            # Dense-block i
-            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
-                           activation, use_se)
-
-        self.fc = nn.Linear(self.num_features, fc_channel)
-        self.fc_act = HS()
-
-        # Classifier layer
-        if class_num > 0:
-            self.classifier = nn.Linear(fc_channel, class_num)
-        self._initialize()
-
-    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
-                  activation, use_se):
-        # Check if ith is the last one
-        last = i == len(self.stages) - 1
-        block = _SFR_DenseBlock(
-            num_layers=self.stages[i],
-            in_channels=self.num_features,
-            growth_rate=self.growth[i],
-            group_1x1=group_1x1,
-            group_3x3=group_3x3,
-            group_trans=group_trans,
-            bottleneck=bottleneck,
-            activation=activation,
-            use_se=use_se, )
-        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
-        self.num_features += self.stages[i] * self.growth[i]
-        if not last:
-            trans = _Transition()
-            self.features.add_sublayer("transition_%d" % (i + 1), trans)
-        else:
-            self.features.add_sublayer("norm_last",
-                                       nn.BatchNorm2D(self.num_features))
-            self.features.add_sublayer("relu_last", nn.ReLU())
-            self.features.add_sublayer("pool_last",
-                                       nn.AvgPool2D(self.pool_size))
-            # if useSE:
-            self.features.add_sublayer(
-                "se_last",
-                SELayer(
-                    self.num_features, reduction=self.last_se_reduction))
-
-    def forward(self, x):
-        features = self.features(x)
-        out = features.reshape((features.shape[0], features.shape[1] *
-                                features.shape[2] * features.shape[3]))
-        out = self.fc(out)
-        out = self.fc_act(out)
-
-        if self.class_num > 0:
-            out = self.classifier(out)
-
-        return out
-
-    def _initialize(self):
-        # Initialize
-        for m in self.sublayers():
-            if isinstance(m, nn.Conv2D):
-                nn.initializer.KaimingNormal()(m.weight)
-            elif isinstance(m, nn.BatchNorm2D):
-                nn.initializer.Constant(value=1.0)(m.weight)
-                nn.initializer.Constant(value=0.0)(m.bias)
-
-
-def CondenseNetV2_a(**kwargs):
-    model = CondenseNetV2(
-        stages=[1, 1, 4, 6, 8],
-        growth=[8, 8, 16, 32, 64],
-        HS_start_block=2,
-        SE_start_block=3,
-        fc_channel=828,
-        group_1x1=8,
-        group_3x3=8,
-        group_trans=8,
-        bottleneck=4,
-        last_se_reduction=16,
-        **kwargs)
-    return model
-
-
-def CondenseNetV2_b(**kwargs):
-    model = CondenseNetV2(
-        stages=[2, 4, 6, 8, 6],
-        growth=[6, 12, 24, 48, 96],
-        HS_start_block=2,
-        SE_start_block=3,
-        fc_channel=1024,
-        group_1x1=6,
-        group_3x3=6,
-        group_trans=6,
-        bottleneck=4,
-        last_se_reduction=16,
-        **kwargs)
-    return model
-
-
-def CondenseNetV2_c(**kwargs):
-    model = CondenseNetV2(
-        stages=[4, 6, 8, 10, 8],
-        growth=[8, 16, 32, 64, 128],
-        HS_start_block=2,
-        SE_start_block=3,
-        fc_channel=1024,
-        group_1x1=8,
-        group_3x3=8,
-        group_trans=8,
-        bottleneck=4,
-        last_se_reduction=16,
-        **kwargs)
-    return model
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/AgentMaker/Paddle-Image-Models
+Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
+Apache License [see LICENSE for details]
+"""
+
+import paddle
+import paddle.nn as nn
+
+__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"]
+
+
+class SELayer(nn.Layer):
+    def __init__(self, inplanes, reduction=16):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        self.fc = nn.Sequential(
+            nn.Linear(
+                inplanes, inplanes // reduction, bias_attr=False),
+            nn.ReLU(),
+            nn.Linear(
+                inplanes // reduction, inplanes, bias_attr=False),
+            nn.Sigmoid(), )
+
+    def forward(self, x):
+        b, c, _, _ = x.shape
+        y = self.avg_pool(x).reshape((b, c))
+        y = self.fc(y).reshape((b, c, 1, 1))
+        return x * paddle.expand(y, shape=x.shape)
+
+
+class HS(nn.Layer):
+    def __init__(self):
+        super(HS, self).__init__()
+        self.relu6 = nn.ReLU6()
+
+    def forward(self, inputs):
+        return inputs * self.relu6(inputs + 3) / 6
+
+
+class Conv(nn.Sequential):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            groups=1,
+            activation="ReLU",
+            bn_momentum=0.9, ):
+        super(Conv, self).__init__()
+        self.add_sublayer(
+            "norm", nn.BatchNorm2D(
+                in_channels, momentum=bn_momentum))
+        if activation == "ReLU":
+            self.add_sublayer("activation", nn.ReLU())
+        elif activation == "HS":
+            self.add_sublayer("activation", HS())
+        else:
+            raise NotImplementedError
+        self.add_sublayer(
+            "conv",
+            nn.Conv2D(
+                in_channels,
+                out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                bias_attr=False,
+                groups=groups, ), )
+
+
+def ShuffleLayer(x, groups):
+    batchsize, num_channels, height, width = x.shape
+    channels_per_group = num_channels // groups
+    # Reshape
+    x = x.reshape((batchsize, groups, channels_per_group, height, width))
+    # Transpose
+    x = x.transpose((0, 2, 1, 3, 4))
+    # Reshape
+    x = x.reshape((batchsize, groups * channels_per_group, height, width))
+    return x
+
+
+def ShuffleLayerTrans(x, groups):
+    batchsize, num_channels, height, width = x.shape
+    channels_per_group = num_channels // groups
+    # Reshape
+    x = x.reshape((batchsize, channels_per_group, groups, height, width))
+    # Transpose
+    x = x.transpose((0, 2, 1, 3, 4))
+    # Reshape
+    x = x.reshape((batchsize, channels_per_group * groups, height, width))
+    return x
+
+
+class CondenseLGC(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            groups=1,
+            activation="ReLU", ):
+        super(CondenseLGC, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.groups = groups
+        self.norm = nn.BatchNorm2D(self.in_channels)
+        if activation == "ReLU":
+            self.activation = nn.ReLU()
+        elif activation == "HS":
+            self.activation = HS()
+        else:
+            raise NotImplementedError
+        self.conv = nn.Conv2D(
+            self.in_channels,
+            self.out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=self.groups,
+            bias_attr=False, )
+        self.register_buffer(
+            "index", paddle.zeros(
+                (self.in_channels, ), dtype="int64"))
+
+    def forward(self, x):
+        x = paddle.index_select(x, self.index, axis=1)
+        x = self.norm(x)
+        x = self.activation(x)
+        x = self.conv(x)
+        x = ShuffleLayer(x, self.groups)
+        return x
+
+
+class CondenseSFR(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            groups=1,
+            activation="ReLU", ):
+        super(CondenseSFR, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.groups = groups
+        self.norm = nn.BatchNorm2D(self.in_channels)
+        if activation == "ReLU":
+            self.activation = nn.ReLU()
+        elif activation == "HS":
+            self.activation = HS()
+        else:
+            raise NotImplementedError
+        self.conv = nn.Conv2D(
+            self.in_channels,
+            self.out_channels,
+            kernel_size=kernel_size,
+            padding=padding,
+            groups=self.groups,
+            bias_attr=False,
+            stride=stride, )
+        self.register_buffer("index",
+                             paddle.zeros(
+                                 (self.out_channels, self.out_channels)))
+
+    def forward(self, x):
+        x = self.norm(x)
+        x = self.activation(x)
+        x = ShuffleLayerTrans(x, self.groups)
+        x = self.conv(x)  # SIZE: N, C, H, W
+        N, C, H, W = x.shape
+        x = x.reshape((N, C, H * W))
+        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
+        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
+        x = paddle.matmul(x, self.index)
+        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
+        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
+        return x
+
+
+class _SFR_DenseLayer(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            growth_rate,
+            group_1x1,
+            group_3x3,
+            group_trans,
+            bottleneck,
+            activation,
+            use_se=False, ):
+        super(_SFR_DenseLayer, self).__init__()
+        self.group_1x1 = group_1x1
+        self.group_3x3 = group_3x3
+        self.group_trans = group_trans
+        self.use_se = use_se
+        # 1x1 conv i --> b*k
+        self.conv_1 = CondenseLGC(
+            in_channels,
+            bottleneck * growth_rate,
+            kernel_size=1,
+            groups=self.group_1x1,
+            activation=activation, )
+        # 3x3 conv b*k --> k
+        self.conv_2 = Conv(
+            bottleneck * growth_rate,
+            growth_rate,
+            kernel_size=3,
+            padding=1,
+            groups=self.group_3x3,
+            activation=activation, )
+        # 1x1 res conv k(8-16-32)--> i (k*l)
+        self.sfr = CondenseSFR(
+            growth_rate,
+            in_channels,
+            kernel_size=1,
+            groups=self.group_trans,
+            activation=activation, )
+        if self.use_se:
+            self.se = SELayer(inplanes=growth_rate, reduction=1)
+
+    def forward(self, x):
+        x_ = x
+        x = self.conv_1(x)
+        x = self.conv_2(x)
+        if self.use_se:
+            x = self.se(x)
+        sfr_feature = self.sfr(x)
+        y = x_ + sfr_feature
+        return paddle.concat([y, x], 1)
+
+
+class _SFR_DenseBlock(nn.Sequential):
+    def __init__(
+            self,
+            num_layers,
+            in_channels,
+            growth_rate,
+            group_1x1,
+            group_3x3,
+            group_trans,
+            bottleneck,
+            activation,
+            use_se, ):
+        super(_SFR_DenseBlock, self).__init__()
+        for i in range(num_layers):
+            layer = _SFR_DenseLayer(
+                in_channels + i * growth_rate,
+                growth_rate,
+                group_1x1,
+                group_3x3,
+                group_trans,
+                bottleneck,
+                activation,
+                use_se, )
+            self.add_sublayer("denselayer_%d" % (i + 1), layer)
+
+
+class _Transition(nn.Layer):
+    def __init__(self):
+        super(_Transition, self).__init__()
+        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
+
+    def forward(self, x):
+        x = self.pool(x)
+        return x
+
+
+class CondenseNetV2(nn.Layer):
+    def __init__(
+            self,
+            stages,
+            growth,
+            HS_start_block,
+            SE_start_block,
+            fc_channel,
+            group_1x1,
+            group_3x3,
+            group_trans,
+            bottleneck,
+            last_se_reduction,
+            in_channels=3,
+            class_num=1000, ):
+        super(CondenseNetV2, self).__init__()
+        self.stages = stages
+        self.growth = growth
+        self.in_channels = in_channels
+        self.class_num = class_num
+        self.last_se_reduction = last_se_reduction
+        assert len(self.stages) == len(self.growth)
+        self.progress = 0.0
+
+        self.init_stride = 2
+        self.pool_size = 7
+
+        self.features = nn.Sequential()
+        # Initial nChannels should be 3
+        self.num_features = 2 * self.growth[0]
+        # Dense-block 1 (224x224)
+        self.features.add_sublayer(
+            "init_conv",
+            nn.Conv2D(
+                in_channels,
+                self.num_features,
+                kernel_size=3,
+                stride=self.init_stride,
+                padding=1,
+                bias_attr=False, ), )
+        for i in range(len(self.stages)):
+            activation = "HS" if i >= HS_start_block else "ReLU"
+            use_se = True if i >= SE_start_block else False
+            # Dense-block i
+            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
+                           activation, use_se)
+
+        self.fc = nn.Linear(self.num_features, fc_channel)
+        self.fc_act = HS()
+
+        # Classifier layer
+        if class_num > 0:
+            self.classifier = nn.Linear(fc_channel, class_num)
+        self._initialize()
+
+    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
+                  activation, use_se):
+        # Check if ith is the last one
+        last = i == len(self.stages) - 1
+        block = _SFR_DenseBlock(
+            num_layers=self.stages[i],
+            in_channels=self.num_features,
+            growth_rate=self.growth[i],
+            group_1x1=group_1x1,
+            group_3x3=group_3x3,
+            group_trans=group_trans,
+            bottleneck=bottleneck,
+            activation=activation,
+            use_se=use_se, )
+        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
+        self.num_features += self.stages[i] * self.growth[i]
+        if not last:
+            trans = _Transition()
+            self.features.add_sublayer("transition_%d" % (i + 1), trans)
+        else:
+            self.features.add_sublayer("norm_last",
+                                       nn.BatchNorm2D(self.num_features))
+            self.features.add_sublayer("relu_last", nn.ReLU())
+            self.features.add_sublayer("pool_last",
+                                       nn.AvgPool2D(self.pool_size))
+            # if useSE:
+            self.features.add_sublayer(
+                "se_last",
+                SELayer(
+                    self.num_features, reduction=self.last_se_reduction))
+
+    def forward(self, x):
+        features = self.features(x)
+        out = features.reshape((features.shape[0], features.shape[1] *
+                                features.shape[2] * features.shape[3]))
+        out = self.fc(out)
+        out = self.fc_act(out)
+
+        if self.class_num > 0:
+            out = self.classifier(out)
+
+        return out
+
+    def _initialize(self):
+        # Initialize
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                nn.initializer.KaimingNormal()(m.weight)
+            elif isinstance(m, nn.BatchNorm2D):
+                nn.initializer.Constant(value=1.0)(m.weight)
+                nn.initializer.Constant(value=0.0)(m.bias)
+
+
+def CondenseNetV2_A(**kwargs):
+    model = CondenseNetV2(
+        stages=[1, 1, 4, 6, 8],
+        growth=[8, 8, 16, 32, 64],
+        HS_start_block=2,
+        SE_start_block=3,
+        fc_channel=828,
+        group_1x1=8,
+        group_3x3=8,
+        group_trans=8,
+        bottleneck=4,
+        last_se_reduction=16,
+        **kwargs)
+    return model
+
+
+def CondenseNetV2_B(**kwargs):
+    model = CondenseNetV2(
+        stages=[2, 4, 6, 8, 6],
+        growth=[6, 12, 24, 48, 96],
+        HS_start_block=2,
+        SE_start_block=3,
+        fc_channel=1024,
+        group_1x1=6,
+        group_3x3=6,
+        group_trans=6,
+        bottleneck=4,
+        last_se_reduction=16,
+        **kwargs)
+    return model
+
+
+def CondenseNetV2_C(**kwargs):
+    model = CondenseNetV2(
+        stages=[4, 6, 8, 10, 8],
+        growth=[8, 16, 32, 64, 128],
+        HS_start_block=2,
+        SE_start_block=3,
+        fc_channel=1024,
+        group_1x1=8,
+        group_3x3=8,
+        group_trans=8,
+        bottleneck=4,
+        last_se_reduction=16,
+        **kwargs)
+    return model
--- a/paddlers/tasks/change_detector.py
+++ b/paddlers/tasks/change_detector.py
@ -1067,7 +1067,7 @@ class FCCDN(BaseChangeDetector):
            return {
                'types':
                [seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss],
-                'coef': [1.0, 1.0]
+                'coef': [1.0, 0.2]
            }
        else:
            raise ValueError(
--- a/paddlers/tasks/classifier.py
+++ b/paddlers/tasks/classifier.py
@ -34,9 +34,7 @@ from paddlers.utils.checkpoint import cls_pretrain_weights_dict
 from paddlers.transforms import Resize, decode_image
 from .base import BaseModel

-__all__ = [
-    "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
-]
+__all__ = ["ResNet50_vd", "MobileNetV3", "HRNet", "CondenseNetV2"]


 class BaseClassifier(BaseModel):
@ -600,13 +598,13 @@ class ResNet50_vd(BaseClassifier):
            **params)


-class MobileNetV3_small_x1_0(BaseClassifier):
+class MobileNetV3(BaseClassifier):
    def __init__(self,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
                 **params):
-        super(MobileNetV3_small_x1_0, self).__init__(
+        super(MobileNetV3, self).__init__(
            model_name='MobileNetV3_small_x1_0',
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
@ -614,13 +612,13 @@ class MobileNetV3_small_x1_0(BaseClassifier):
            **params)


-class HRNet_W18_C(BaseClassifier):
+class HRNet(BaseClassifier):
    def __init__(self,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
                 **params):
-        super(HRNet_W18_C, self).__init__(
+        super(HRNet, self).__init__(
            model_name='HRNet_W18_C',
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
@ -628,15 +626,21 @@ class HRNet_W18_C(BaseClassifier):
            **params)


-class CondenseNetV2_b(BaseClassifier):
+class CondenseNetV2(BaseClassifier):
    def __init__(self,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
+                 in_channels=3,
+                 arch='A',
                 **params):
-        super(CondenseNetV2_b, self).__init__(
-            model_name='CondenseNetV2_b',
+        if arch not in ('A', 'B', 'C'):
+            raise ValueError("{} is not a supported architecture.".format(arch))
+        model_name = 'CondenseNetV2_' + arch
+        super(CondenseNetV2, self).__init__(
+            model_name=model_name,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
+            in_channels=in_channels,
            **params)
--- a/paddlers/tasks/restorer.py
+++ b/paddlers/tasks/restorer.py
@ -773,7 +773,7 @@ class LESRCNN(BaseRestorer):
                 group=1,
                 **params):
        params.update({
-            'scale': sr_factor,
+            'scale': sr_factor if sr_factor is not None else 1,
            'multi_scale': multi_scale,
            'group': group
        })
--- a/paddlers/tasks/segmenter.py
+++ b/paddlers/tasks/segmenter.py
@ -185,14 +185,7 @@ class BaseSegmenter(BaseModel):
                )
            losses = [getattr(seg_losses, loss)() for loss in losses]
            loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))]
-        if self.model_name == 'FastSCNN':
-            loss_type *= 2
-            loss_coef = [1.0, 0.4]
-        elif self.model_name == 'BiSeNetV2':
-            loss_type *= 5
-            loss_coef = [1.0] * 5
-        else:
-            loss_coef = [1.0]
+        loss_coef = [1.0]
        losses = {'types': loss_type, 'coef': loss_coef}
        return losses

@ -761,7 +754,7 @@ class UNet(BaseSegmenter):
        })
        super(UNet, self).__init__(
            model_name='UNet',
-            input_channel=in_channels,
+            in_channels=in_channels,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
@ -789,7 +782,7 @@ class DeepLabV3P(BaseSegmenter):
        if params.get('with_net', True):
            with DisablePrint():
                backbone = getattr(ppseg.models, backbone)(
-                    input_channel=in_channels, output_stride=output_stride)
+                    in_channels=in_channels, output_stride=output_stride)
        else:
            backbone = None
        params.update({
@ -809,6 +802,7 @@ class DeepLabV3P(BaseSegmenter):

 class FastSCNN(BaseSegmenter):
    def __init__(self,
+                 in_channels=3,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
@ -817,14 +811,22 @@ class FastSCNN(BaseSegmenter):
        params.update({'align_corners': align_corners})
        super(FastSCNN, self).__init__(
            model_name='FastSCNN',
+            in_channels=in_channels,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
            **params)

+    def default_loss(self):
+        losses = super(FastSCNN, self).default_loss()
+        losses['types'] *= 2
+        losses['coef'] = [1.0, 0.4]
+        return losses
+

 class HRNet(BaseSegmenter):
    def __init__(self,
+                 in_channels=3,
                 num_classes=2,
                 width=48,
                 use_mixed_loss=False,
@ -839,7 +841,7 @@ class HRNet(BaseSegmenter):
        if params.get('with_net', True):
            with DisablePrint():
                backbone = getattr(ppseg.models, self.backbone_name)(
-                    align_corners=align_corners)
+                    in_channels=in_channels, align_corners=align_corners)
        else:
            backbone = None

@ -855,6 +857,7 @@ class HRNet(BaseSegmenter):

 class BiSeNetV2(BaseSegmenter):
    def __init__(self,
+                 in_channels=3,
                 num_classes=2,
                 use_mixed_loss=False,
                 losses=None,
@ -863,11 +866,18 @@ class BiSeNetV2(BaseSegmenter):
        params.update({'align_corners': align_corners})
        super(BiSeNetV2, self).__init__(
            model_name='BiSeNetV2',
+            in_channels=in_channels,
            num_classes=num_classes,
            use_mixed_loss=use_mixed_loss,
            losses=losses,
            **params)

+    def default_loss(self):
+        losses = super(BiSeNetV2, self).default_loss()
+        losses['types'] *= 5
+        losses['coef'] = [1.0] * 5
+        return losses
+

 class FarSeg(BaseSegmenter):
    def __init__(self,
--- a/paddlers/utils/checkpoint.py
+++ b/paddlers/utils/checkpoint.py
@ -493,11 +493,12 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
            num_params_loaded = 0
            for k in model_state_dict:
                if k not in param_state_dict:
-                    logging.warning("{} is not in pretrained model".format(k))
+                    logging.warning("{} is not in the pretrained model.".format(
+                        k))
                elif list(param_state_dict[k].shape) != list(model_state_dict[k]
                                                             .shape):
                    logging.warning(
-                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
+                        "[SKIP] Shape of parameters {} do not match. (pretrained: {} vs actual: {})"
                        .format(k, param_state_dict[k].shape, model_state_dict[
                            k].shape))
                else:
@ -507,11 +508,11 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
            logging.info("There are {}/{} variables loaded into {}.".format(
                num_params_loaded, len(model_state_dict), model_name))
        else:
-            raise ValueError('The pretrained model directory is not Found: {}'.
+            raise ValueError('The pretrained model directory is not found: {}'.
                             format(pretrain_weights))
    else:
        logging.info(
-            'No pretrained model to load, {} will be trained from scratch.'.
+            'No pretrained model to load. {} will be trained from scratch.'.
            format(model_name))


--- a/test_tipc/README.md
+++ b/test_tipc/README.md
@ -32,6 +32,7 @@
 | 变化检测 | FC-Siam-conc | 支持 | - | - | - |
 | 变化检测 | FC-Siam-diff | 支持 | - | - | - |
 | 变化检测 | ChangeFormer | 支持 | - | - | - |
+| 场景分类 | CondenseNet V2 | 支持 | - | - | - |
 | 场景分类 | HRNet | 支持 | - | - | - |
 | 场景分类 | MobileNetV3 | 支持 | - | - | - |
 | 场景分类 | ResNet50-vd | 支持 | - | - | - |
@ -43,8 +44,11 @@
 | 目标检测 | PP-YOLO Tiny | 支持 | - | - | - |
 | 目标检测 | PP-YOLOv2 | 支持 | - | - | - |
 | 目标检测 | YOLOv3 | 支持 | - | - | - |
+| 图像分割 | BiSeNet V2 | 支持 | - | - | - |
 | 图像分割 | DeepLab V3+ | 支持 | - | - | - |
 | 图像分割 | FarSeg | 支持 | - | - | - |
+| 图像分割 | Fast-SCNN | 支持 | - | - | - |
+| 图像分割 | HRNet | 支持 | - | - | - |
 | 图像分割 | UNet | 支持 | - | - | - |

 ## 3 测试工具简介
--- a/test_tipc/config_utils.py
+++ b/test_tipc/config_utils.py
@ -119,6 +119,7 @@ def parse_args(*args, **kwargs):
    # Global settings
    parser.add_argument('cmd', choices=['train', 'eval'])
    parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
+    parser.add_argument('--seed', type=int, default=None)

    # Data
    parser.add_argument('--datasets', type=dict, default={})
--- a/test_tipc/configs/cd/_base_/airchange.yaml
+++ b/test_tipc/configs/cd/_base_/airchange.yaml
@ -1,5 +1,7 @@
 # Basic configurations of AirChange dataset

+seed: 1024
+
 datasets:
    train: !Node
        type: CDDataset
--- a/test_tipc/configs/cd/_base_/levircd.yaml
+++ b/test_tipc/configs/cd/_base_/levircd.yaml
@ -1,5 +1,7 @@
 # Basic configurations of LEVIR-CD dataset

+seed: 1024
+
 datasets:
    train: !Node
        type: CDDataset
--- a/test_tipc/configs/cd/bit/bit.yaml
+++ b/test_tipc/configs/cd/bit/bit.yaml
@ -1,8 +0,0 @@
-# Basic configurations of BIT
-
-_base_: ../_base_/airchange.yaml
-
-save_dir: ./test_tipc/output/cd/bit/
-
-model: !Node
-    type: BIT
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef`