From 19cf02c1c27ab0730e087e288ecc9e469731a1de Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Mon, 12 Sep 2022 14:16:05 +0800 Subject: [PATCH 01/15] Update FarSeg doc --- docs/apis/train.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/apis/train.md b/docs/apis/train.md index d28aa94..db9528f 100644 --- a/docs/apis/train.md +++ b/docs/apis/train.md @@ -34,7 +34,7 @@ ### 初始化`BaseSegmenter`子类对象 -- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数,分别表示输入通道数、输出类别数以及是否使用预置的混合损失。部分模型如`FarSeg`暂不支持对`in_channels`参数的设置。 +- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数,分别表示输入通道数、输出类别数以及是否使用预置的混合损失。 - `use_mixed_loss`参将在未来被弃用,因此不建议使用。 - 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典,其中`'types'`键和`'coef'`键对应的值为两个等长的列表,分别表示损失函数对象(一个可调用对象)和损失函数的权重。例如:`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数:`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`,其中`logits`和`labels`分别是模型输出和真值标签。 - 不同的子类支持与模型相关的输入参数,详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。 From 5834df2fada3a8553c1797a6fc0e9718804b9144 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Mon, 12 Sep 2022 14:16:58 +0800 Subject: [PATCH 02/15] Update ppseg --- paddlers/models/ppseg/__init__.py | 2 +- paddlers/models/ppseg/core/infer.py | 107 +- paddlers/models/ppseg/core/predict.py | 36 +- paddlers/models/ppseg/core/train.py | 78 +- paddlers/models/ppseg/core/val.py | 106 +- paddlers/models/ppseg/cvlibs/callbacks.py | 4 +- paddlers/models/ppseg/cvlibs/config.py | 211 +++- paddlers/models/ppseg/cvlibs/manager.py | 4 +- paddlers/models/ppseg/cvlibs/param_init.py | 34 +- paddlers/models/ppseg/datasets/__init__.py | 1 + paddlers/models/ppseg/datasets/ade.py | 36 +- paddlers/models/ppseg/datasets/chase_db1.py | 10 +- paddlers/models/ppseg/datasets/cityscapes.py | 6 +- paddlers/models/ppseg/datasets/cocostuff.py | 6 +- paddlers/models/ppseg/datasets/dataset.py | 62 +- paddlers/models/ppseg/datasets/drive.py | 10 +- paddlers/models/ppseg/datasets/eg1800.py | 12 +- paddlers/models/ppseg/datasets/hrf.py | 10 +- .../mini_deep_globe_road_extraction.py | 8 +- .../models/ppseg/datasets/optic_disc_seg.py | 8 +- .../models/ppseg/datasets/pascal_context.py | 6 +- .../models/ppseg/datasets/pp_humanseg14k.py | 4 +- paddlers/models/ppseg/datasets/pssl.py | 135 +++ paddlers/models/ppseg/datasets/stare.py | 10 +- paddlers/models/ppseg/datasets/supervisely.py | 12 +- paddlers/models/ppseg/datasets/voc.py | 10 +- paddlers/models/ppseg/models/__init__.py | 9 + paddlers/models/ppseg/models/ann.py | 6 +- .../models/ppseg/models/attention_unet.py | 12 +- .../models/ppseg/models/backbones/__init__.py | 4 + .../models/ppseg/models/backbones/ghostnet.py | 318 ++++++ .../models/ppseg/models/backbones/hrnet.py | 10 +- .../ppseg/models/backbones/lite_hrnet.py | 974 ++++++++++++++++++ .../ppseg/models/backbones/mix_transformer.py | 12 +- .../ppseg/models/backbones/mobilenetv2.py | 333 +++--- .../ppseg/models/backbones/mobilenetv3.py | 500 +++++---- .../ppseg/models/backbones/resnet_vd.py | 13 +- .../ppseg/models/backbones/shufflenetv2.py | 315 ++++++ .../models/ppseg/models/backbones/stdcnet.py | 186 ++-- .../models/backbones/swin_transformer.py | 14 +- .../ppseg/models/backbones/top_transformer.py | 716 +++++++++++++ .../models/backbones/transformer_utils.py | 4 +- .../models/backbones/vision_transformer.py | 12 +- .../models/backbones/xception_deeplab.py | 17 +- paddlers/models/ppseg/models/bisenet.py | 24 +- paddlers/models/ppseg/models/bisenetv1.py | 6 +- paddlers/models/ppseg/models/ccnet.py | 174 ++++ paddlers/models/ppseg/models/danet.py | 6 +- paddlers/models/ppseg/models/ddrnet.py | 403 ++++++++ .../models/ppseg/models/decoupled_segnet.py | 10 +- paddlers/models/ppseg/models/deeplab.py | 6 +- paddlers/models/ppseg/models/dmnet.py | 6 +- paddlers/models/ppseg/models/dnlnet.py | 6 +- paddlers/models/ppseg/models/emanet.py | 10 +- paddlers/models/ppseg/models/encnet.py | 6 +- paddlers/models/ppseg/models/enet.py | 10 +- paddlers/models/ppseg/models/espnet.py | 6 +- paddlers/models/ppseg/models/espnetv1.py | 6 +- paddlers/models/ppseg/models/fast_scnn.py | 22 +- paddlers/models/ppseg/models/fastfcn.py | 6 +- paddlers/models/ppseg/models/fcn.py | 6 +- paddlers/models/ppseg/models/gcnet.py | 6 +- paddlers/models/ppseg/models/ginet.py | 8 +- paddlers/models/ppseg/models/glore.py | 198 ++++ paddlers/models/ppseg/models/gscnn.py | 10 +- paddlers/models/ppseg/models/hardnet.py | 10 +- .../models/ppseg/models/hrnet_contrast.py | 6 +- paddlers/models/ppseg/models/isanet.py | 6 +- .../models/ppseg/models/layers/__init__.py | 3 +- .../models/ppseg/models/layers/activation.py | 2 +- .../models/ppseg/models/layers/attention.py | 128 ++- .../models/ppseg/models/layers/layer_libs.py | 59 +- .../models/ppseg/models/layers/nonlocal2d.py | 2 +- .../ppseg/models/layers/pyramid_pool.py | 2 +- .../ppseg/models/layers/tensor_fusion.py | 285 +++++ .../models/layers/tensor_fusion_helper.py | 133 +++ .../losses/binary_cross_entropy_loss.py | 4 +- .../losses/bootstrapped_cross_entropy.py | 2 +- .../ppseg/models/losses/cross_entropy_loss.py | 6 +- .../decoupledsegnet_relax_boundary_loss.py | 4 +- .../models/losses/detail_aggregate_loss.py | 4 +- .../models/ppseg/models/losses/dice_loss.py | 67 +- .../models/losses/edge_attention_loss.py | 4 +- .../models/ppseg/models/losses/focal_loss.py | 124 ++- .../models/losses/gscnn_dual_task_loss.py | 2 +- .../models/ppseg/models/losses/kl_loss.py | 2 +- .../models/ppseg/models/losses/l1_loss.py | 24 +- .../models/ppseg/models/losses/lovasz_loss.py | 18 +- .../models/losses/mean_square_error_loss.py | 2 +- .../models/ppseg/models/losses/mixed_loss.py | 2 +- .../models/losses/ohem_cross_entropy_loss.py | 4 +- .../models/losses/ohem_edge_attention_loss.py | 4 +- .../pixel_contrast_cross_entropy_loss.py | 7 +- .../models/losses/point_cross_entropy_loss.py | 2 +- .../models/ppseg/models/losses/rmi_loss.py | 2 +- .../losses/semantic_connectivity_loss.py | 14 +- .../semantic_encode_cross_entropy_loss.py | 2 +- paddlers/models/ppseg/models/lraspp.py | 162 +++ .../models/ppseg/models/mla_transformer.py | 8 +- paddlers/models/ppseg/models/mobileseg.py | 289 ++++++ paddlers/models/ppseg/models/ocrnet.py | 6 +- paddlers/models/ppseg/models/pfpnnet.py | 6 +- paddlers/models/ppseg/models/pointrend.py | 8 +- paddlers/models/ppseg/models/portraitnet.py | 20 +- paddlers/models/ppseg/models/pp_liteseg.py | 273 +++++ .../models/ppseg/models/pphumanseg_lite.py | 16 +- paddlers/models/ppseg/models/pspnet.py | 6 +- paddlers/models/ppseg/models/segformer.py | 54 +- paddlers/models/ppseg/models/segmenter.py | 6 +- paddlers/models/ppseg/models/segnet.py | 10 +- paddlers/models/ppseg/models/setr.py | 6 +- paddlers/models/ppseg/models/sfnet.py | 6 +- paddlers/models/ppseg/models/sinet.py | 449 ++++++++ paddlers/models/ppseg/models/stdcseg.py | 10 +- paddlers/models/ppseg/models/topformer.py | 155 +++ paddlers/models/ppseg/models/u2net.py | 16 +- paddlers/models/ppseg/models/unet.py | 16 +- paddlers/models/ppseg/models/unet_3plus.py | 6 +- paddlers/models/ppseg/models/unet_plusplus.py | 12 +- paddlers/models/ppseg/models/upernet.py | 173 ++++ .../models/ppseg/transforms/functional.py | 9 +- .../models/ppseg/transforms/transforms.py | 760 ++++++-------- paddlers/models/ppseg/utils/__init__.py | 1 - paddlers/models/ppseg/utils/config_check.py | 59 -- paddlers/models/ppseg/utils/env/__init__.py | 2 +- paddlers/models/ppseg/utils/env/seg_env.py | 4 +- paddlers/models/ppseg/utils/env/sys_env.py | 12 +- paddlers/models/ppseg/utils/metrics.py | 70 +- paddlers/models/ppseg/utils/train_profiler.py | 2 +- paddlers/models/ppseg/utils/utils.py | 6 +- paddlers/models/ppseg/utils/visualize.py | 40 +- 131 files changed, 7340 insertions(+), 1592 deletions(-) create mode 100644 paddlers/models/ppseg/datasets/pssl.py create mode 100644 paddlers/models/ppseg/models/backbones/ghostnet.py create mode 100644 paddlers/models/ppseg/models/backbones/lite_hrnet.py create mode 100644 paddlers/models/ppseg/models/backbones/shufflenetv2.py create mode 100644 paddlers/models/ppseg/models/backbones/top_transformer.py create mode 100644 paddlers/models/ppseg/models/ccnet.py create mode 100644 paddlers/models/ppseg/models/ddrnet.py create mode 100644 paddlers/models/ppseg/models/glore.py create mode 100644 paddlers/models/ppseg/models/layers/tensor_fusion.py create mode 100644 paddlers/models/ppseg/models/layers/tensor_fusion_helper.py create mode 100644 paddlers/models/ppseg/models/lraspp.py create mode 100644 paddlers/models/ppseg/models/mobileseg.py create mode 100644 paddlers/models/ppseg/models/pp_liteseg.py create mode 100644 paddlers/models/ppseg/models/sinet.py create mode 100644 paddlers/models/ppseg/models/topformer.py create mode 100644 paddlers/models/ppseg/models/upernet.py delete mode 100644 paddlers/models/ppseg/utils/config_check.py diff --git a/paddlers/models/ppseg/__init__.py b/paddlers/models/ppseg/__init__.py index 39b3b80..f5d3451 100644 --- a/paddlers/models/ppseg/__init__.py +++ b/paddlers/models/ppseg/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/paddlers/models/ppseg/core/infer.py b/paddlers/models/ppseg/core/infer.py index cacdb9d..aa6eb42 100644 --- a/paddlers/models/ppseg/core/infer.py +++ b/paddlers/models/ppseg/core/infer.py @@ -21,88 +21,16 @@ import paddle import paddle.nn.functional as F -def get_reverse_list(ori_shape, transforms): - """ - get reverse list of transform. - - Args: - ori_shape (list): Origin shape of image. - transforms (list): List of transform. - - Returns: - list: List of tuple, there are two format: - ('resize', (h, w)) The image shape before resize, - ('padding', (h, w)) The image shape before padding. - """ - reverse_list = [] - h, w = ori_shape[0], ori_shape[1] - for op in transforms: - if op.__class__.__name__ in ['Resize']: - reverse_list.append(('resize', (h, w))) - h, w = op.target_size[0], op.target_size[1] - if op.__class__.__name__ in ['ResizeByLong']: - reverse_list.append(('resize', (h, w))) - long_edge = max(h, w) - short_edge = min(h, w) - short_edge = int(round(short_edge * op.long_size / long_edge)) - long_edge = op.long_size - if h > w: - h = long_edge - w = short_edge - else: - w = long_edge - h = short_edge - if op.__class__.__name__ in ['ResizeByShort']: - reverse_list.append(('resize', (h, w))) - long_edge = max(h, w) - short_edge = min(h, w) - long_edge = int(round(long_edge * op.short_size / short_edge)) - short_edge = op.short_size - if h > w: - h = long_edge - w = short_edge - else: - w = long_edge - h = short_edge - if op.__class__.__name__ in ['Pad']: - reverse_list.append(('padding', (h, w))) - w, h = op.target_size[0], op.target_size[1] - if op.__class__.__name__ in ['PadByAspectRatio']: - reverse_list.append(('padding', (h, w))) - ratio = w / h - if ratio == op.aspect_ratio: - pass - elif ratio > op.aspect_ratio: - h = int(w / op.aspect_ratio) - else: - w = int(h * op.aspect_ratio) - if op.__class__.__name__ in ['LimitLong']: - long_edge = max(h, w) - short_edge = min(h, w) - if ((op.max_long is not None) and (long_edge > op.max_long)): - reverse_list.append(('resize', (h, w))) - long_edge = op.max_long - short_edge = int(round(short_edge * op.max_long / long_edge)) - elif ((op.min_long is not None) and (long_edge < op.min_long)): - reverse_list.append(('resize', (h, w))) - long_edge = op.min_long - short_edge = int(round(short_edge * op.min_long / long_edge)) - if h > w: - h = long_edge - w = short_edge - else: - w = long_edge - h = short_edge - return reverse_list - - -def reverse_transform(pred, ori_shape, transforms, mode='nearest'): +def reverse_transform(pred, trans_info, mode='nearest'): """recover pred to origin shape""" - reverse_list = get_reverse_list(ori_shape, transforms) intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64] dtype = pred.dtype - for item in reverse_list[::-1]: - if item[0] == 'resize': + for item in trans_info[::-1]: + if isinstance(item[0], list): + trans_mode = item[0][0] + else: + trans_mode = item[0] + if trans_mode == 'resize': h, w = item[1][0], item[1][1] if paddle.get_device() == 'cpu' and dtype in intTypeList: pred = paddle.cast(pred, 'float32') @@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'): pred = paddle.cast(pred, dtype) else: pred = F.interpolate(pred, (h, w), mode=mode) - elif item[0] == 'padding': + elif trans_mode == 'padding': h, w = item[1][0], item[1][1] pred = pred[:, :, 0:h, 0:w] else: @@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride): def inference(model, im, - ori_shape=None, - transforms=None, + trans_info=None, is_slide=False, stride=None, crop_size=None): @@ -216,8 +143,7 @@ def inference(model, Args: model (paddle.nn.Layer): model to get logits of image. im (Tensor): the input image. - ori_shape (list): Origin shape of image. - transforms (list): Transforms for image. + trans_info (list): Image shape informating changed process. Default: None. is_slide (bool): Whether to infer by sliding window. Default: False. crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. @@ -239,8 +165,8 @@ def inference(model, logit = slide_inference(model, im, crop_size=crop_size, stride=stride) if hasattr(model, 'data_format') and model.data_format == 'NHWC': logit = logit.transpose((0, 3, 1, 2)) - if ori_shape is not None: - logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear') + if trans_info is not None: + logit = reverse_transform(logit, trans_info, mode='bilinear') pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') return pred, logit else: @@ -249,8 +175,7 @@ def inference(model, def aug_inference(model, im, - ori_shape, - transforms, + trans_info, scales=1.0, flip_horizontal=False, flip_vertical=False, @@ -263,8 +188,7 @@ def aug_inference(model, Args: model (paddle.nn.Layer): model to get logits of image. im (Tensor): the input image. - ori_shape (list): Origin shape of image. - transforms (list): Transforms for image. + trans_info (list): Transforms for image. scales (float|tuple|list): Scales for resize. Default: 1. flip_horizontal (bool): Whether to flip horizontally. Default: False. flip_vertical (bool): Whether to flip vertically. Default: False. @@ -302,8 +226,7 @@ def aug_inference(model, logit = F.softmax(logit, axis=1) final_logit = final_logit + logit - final_logit = reverse_transform( - final_logit, ori_shape, transforms, mode='bilinear') + final_logit = reverse_transform(final_logit, trans_info, mode='bilinear') pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32') return pred, final_logit diff --git a/paddlers/models/ppseg/core/predict.py b/paddlers/models/ppseg/core/predict.py index 9880a3b..98097c7 100644 --- a/paddlers/models/ppseg/core/predict.py +++ b/paddlers/models/ppseg/core/predict.py @@ -19,9 +19,9 @@ import cv2 import numpy as np import paddle -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.core import infer -from paddlers.models.ppseg.utils import logger, progbar, visualize +from paddleseg import utils +from paddleseg.core import infer +from paddleseg.utils import logger, progbar, visualize def mkdir(path): @@ -36,6 +36,15 @@ def partition_list(arr, m): return [arr[i:i + n] for i in range(0, len(arr), n)] +def preprocess(im_path, transforms): + data = {} + data['img'] = im_path + data = transforms(data) + data['img'] = data['img'][np.newaxis, ...] + data['img'] = paddle.to_tensor(data['img']) + return data + + def predict(model, model_path, transforms, @@ -89,18 +98,13 @@ def predict(model, color_map = visualize.get_color_map_list(256, custom_color=custom_color) with paddle.no_grad(): for i, im_path in enumerate(img_lists[local_rank]): - im = cv2.imread(im_path) - ori_shape = im.shape[:2] - im, _ = transforms(im) - im = im[np.newaxis, ...] - im = paddle.to_tensor(im) + data = preprocess(im_path, transforms) if aug_pred: pred, _ = infer.aug_inference( model, - im, - ori_shape=ori_shape, - transforms=transforms.transforms, + data['img'], + trans_info=data['trans_info'], scales=scales, flip_horizontal=flip_horizontal, flip_vertical=flip_vertical, @@ -110,9 +114,8 @@ def predict(model, else: pred, _ = infer.inference( model, - im, - ori_shape=ori_shape, - transforms=transforms.transforms, + data['img'], + trans_info=data['trans_info'], is_slide=is_slide, stride=stride, crop_size=crop_size) @@ -141,9 +144,4 @@ def predict(model, mkdir(pred_saved_path) pred_mask.save(pred_saved_path) - # pred_im = utils.visualize(im_path, pred, weight=0.0) - # pred_saved_path = os.path.join(pred_saved_dir, im_file) - # mkdir(pred_saved_path) - # cv2.imwrite(pred_saved_path, pred_im) - progbar_pred.update(i + 1) diff --git a/paddlers/models/ppseg/core/train.py b/paddlers/models/ppseg/core/train.py index 09c3810..fae72d2 100644 --- a/paddlers/models/ppseg/core/train.py +++ b/paddlers/models/ppseg/core/train.py @@ -20,10 +20,9 @@ import shutil import paddle import paddle.nn.functional as F -from paddlers.models.ppseg.utils import (TimeAverager, calculate_eta, resume, - logger, worker_init_fn, train_profiler, - op_flops_funs) -from paddlers.models.ppseg.core.val import evaluate +from paddleseg.utils import (TimeAverager, calculate_eta, resume, logger, + worker_init_fn, train_profiler, op_flops_funs) +from paddleseg.core.val import evaluate def check_logits_losses(logits_list, losses): @@ -35,17 +34,15 @@ def check_logits_losses(logits_list, losses): .format(len_logits, len_losses)) -def loss_computation(logits_list, labels, losses, edges=None): +def loss_computation(logits_list, labels, edges, losses): check_logits_losses(logits_list, losses) loss_list = [] for i in range(len(logits_list)): logits = logits_list[i] loss_i = losses['types'][i] coef_i = losses['coef'][i] - - if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss' - ) and loss_i.edge_label: - # If use edges as labels According to loss type. + if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label: + # Use edges as labels According to loss type. loss_list.append(coef_i * loss_i(logits, edges)) elif loss_i.__class__.__name__ == 'MixedLoss': mixed_loss_list = loss_i(logits, labels) @@ -75,13 +72,14 @@ def train(model, keep_checkpoint_max=5, test_config=None, precision='fp32', + amp_level='O1', profiler_options=None, to_static_training=False): """ Launch training. Args: - model(nn.Layer): A sementic segmentation model. + model(nn.Layer): A semantic segmentation model. train_dataset (paddle.io.Dataset): Used to read and process training datasets. val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. optimizer (paddle.optimizer.Optimizer): The optimizer. @@ -98,6 +96,9 @@ def train(model, keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. test_config(dict, optional): Evaluation config. precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal. + amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, + the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators + parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp) profiler_options (str, optional): The option of train profiler. to_static_training (bool, optional): Whether to use @to_static for training. """ @@ -112,7 +113,18 @@ def train(model, if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) - os.makedirs(save_dir) + os.makedirs(save_dir, exist_ok=True) + + # use amp + if precision == 'fp16': + logger.info('use AMP to train. AMP level = {}'.format(amp_level)) + scaler = paddle.amp.GradScaler(init_loss_scaling=1024) + if amp_level == 'O2': + model, optimizer = paddle.amp.decorate( + models=model, + optimizers=optimizer, + level='O2', + save_dtype='float32') if nranks > 1: paddle.distributed.fleet.init(is_collective=True) @@ -130,18 +142,13 @@ def train(model, return_list=True, worker_init_fn=worker_init_fn, ) - # use amp - if precision == 'fp16': - logger.info('use amp to train') - scaler = paddle.amp.GradScaler(init_loss_scaling=1024) - if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) if to_static_training: model = paddle.jit.to_static(model) - logger.info("Successfully to apply @to_static") + logger.info("Successfully applied @to_static") avg_loss = 0.0 avg_loss_list = [] @@ -164,30 +171,29 @@ def train(model, else: break reader_cost_averager.record(time.time() - batch_start) - images = data[0] - labels = data[1].astype('int64') + images = data['img'] + labels = data['label'].astype('int64') edges = None - if len(data) == 3: - edges = data[2].astype('int64') + if 'edge' in data.keys(): + edges = data['edge'].astype('int64') if hasattr(model, 'data_format') and model.data_format == 'NHWC': images = images.transpose((0, 2, 3, 1)) if precision == 'fp16': with paddle.amp.auto_cast( + level=amp_level, enable=True, custom_white_list={ "elementwise_add", "batch_norm", "sync_batch_norm" }, custom_black_list={'bilinear_interp_v2'}): - if nranks > 1: - logits_list = ddp_model(images) - else: - logits_list = model(images) + logits_list = ddp_model(images) if nranks > 1 else model( + images) loss_list = loss_computation( logits_list=logits_list, labels=labels, - losses=losses, - edges=edges) + edges=edges, + losses=losses) loss = sum(loss_list) scaled = scaler.scale(loss) # scale the loss @@ -197,15 +203,12 @@ def train(model, else: scaler.minimize(optimizer, scaled) # update parameters else: - if nranks > 1: - logits_list = ddp_model(images) - else: - logits_list = model(images) + logits_list = ddp_model(images) if nranks > 1 else model(images) loss_list = loss_computation( logits_list=logits_list, labels=labels, - losses=losses, - edges=edges) + edges=edges, + losses=losses) loss = sum(loss_list) loss.backward() # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step. @@ -278,7 +281,12 @@ def train(model, test_config = {} mean_iou, acc, _, _, _ = evaluate( - model, val_dataset, num_workers=num_workers, **test_config) + model, + val_dataset, + num_workers=num_workers, + precision=precision, + amp_level=amp_level, + **test_config) model.train() @@ -314,7 +322,7 @@ def train(model, batch_start = time.time() # Calculate flops. - if local_rank == 0: + if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'): _, c, h, w = images.shape _ = paddle.flops( model, [1, c, h, w], diff --git a/paddlers/models/ppseg/core/val.py b/paddlers/models/ppseg/core/val.py index 6f40d69..80a820b 100644 --- a/paddlers/models/ppseg/core/val.py +++ b/paddlers/models/ppseg/core/val.py @@ -19,8 +19,8 @@ import time import paddle import paddle.nn.functional as F -from paddlers.models.ppseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar -from paddlers.models.ppseg.core import infer +from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar +from paddleseg.core import infer np.set_printoptions(suppress=True) @@ -34,6 +34,8 @@ def evaluate(model, is_slide=False, stride=None, crop_size=None, + precision='fp32', + amp_level='O1', num_workers=0, print_detail=True, auc_roc=False): @@ -41,7 +43,7 @@ def evaluate(model, Launch evalution. Args: - model(nn.Layer): A sementic segmentation model. + model(nn.Layer): A semantic segmentation model. eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False. scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0. @@ -52,6 +54,8 @@ def evaluate(model, It should be provided when `is_slide` is True. crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. It should be provided when `is_slide` is True. + precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal. + amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp) num_workers (int, optional): Num workers for data loader. Default: 0. print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. auc_roc(bool, optional): whether add auc_roc metric @@ -93,32 +97,66 @@ def evaluate(model, batch_cost_averager = TimeAverager() batch_start = time.time() with paddle.no_grad(): - for iter, (im, label) in enumerate(loader): + for iter, data in enumerate(loader): reader_cost_averager.record(time.time() - batch_start) - label = label.astype('int64') + label = data['label'].astype('int64') - ori_shape = label.shape[-2:] if aug_eval: - pred, logits = infer.aug_inference( - model, - im, - ori_shape=ori_shape, - transforms=eval_dataset.transforms.transforms, - scales=scales, - flip_horizontal=flip_horizontal, - flip_vertical=flip_vertical, - is_slide=is_slide, - stride=stride, - crop_size=crop_size) + if precision == 'fp16': + with paddle.amp.auto_cast( + level=amp_level, + enable=True, + custom_white_list={ + "elementwise_add", "batch_norm", + "sync_batch_norm" + }, + custom_black_list={'bilinear_interp_v2'}): + pred, logits = infer.aug_inference( + model, + data['img'], + trans_info=data['trans_info'], + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred, logits = infer.aug_inference( + model, + data['img'], + trans_info=data['trans_info'], + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) else: - pred, logits = infer.inference( - model, - im, - ori_shape=ori_shape, - transforms=eval_dataset.transforms.transforms, - is_slide=is_slide, - stride=stride, - crop_size=crop_size) + if precision == 'fp16': + with paddle.amp.auto_cast( + level=amp_level, + enable=True, + custom_white_list={ + "elementwise_add", "batch_norm", + "sync_batch_norm" + }, + custom_black_list={'bilinear_interp_v2'}): + pred, logits = infer.inference( + model, + data['img'], + trans_info=data['trans_info'], + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred, logits = infer.inference( + model, + data['img'], + trans_info=data['trans_info'], + is_slide=is_slide, + stride=stride, + crop_size=crop_size) intersect_area, pred_area, label_area = metrics.calculate_area( pred, @@ -175,12 +213,12 @@ def evaluate(model, batch_cost_averager.reset() batch_start = time.time() - class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all, - label_area_all) - class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all) - kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all) - class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all, - label_area_all) + metrics_input = (intersect_area_all, pred_area_all, label_area_all) + class_iou, miou = metrics.mean_iou(*metrics_input) + acc, class_precision, class_recall = metrics.class_measurement( + *metrics_input) + kappa = metrics.kappa(*metrics_input) + class_dice, mdice = metrics.dice(*metrics_input) if auc_roc: auc_roc = metrics.auc_roc( @@ -193,5 +231,7 @@ def evaluate(model, infor = infor + auc_infor if auc_roc else infor logger.info(infor) logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4))) - logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4))) - return miou, acc, class_iou, class_acc, kappa + logger.info("[EVAL] Class Precision: \n" + str( + np.round(class_precision, 4))) + logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4))) + return miou, acc, class_iou, class_precision, kappa diff --git a/paddlers/models/ppseg/cvlibs/callbacks.py b/paddlers/models/ppseg/cvlibs/callbacks.py index 075e1eb..1188b2c 100644 --- a/paddlers/models/ppseg/cvlibs/callbacks.py +++ b/paddlers/models/ppseg/cvlibs/callbacks.py @@ -19,8 +19,8 @@ import numpy as np import paddle from paddle.distributed.parallel import ParallelEnv from visualdl import LogWriter -from paddlers.models.ppseg.utils.progbar import Progbar -import paddlers.models.ppseg.utils.logger as logger +from paddleseg.utils.progbar import Progbar +import paddleseg.utils.logger as logger class CallbackList(object): diff --git a/paddlers/models/ppseg/cvlibs/config.py b/paddlers/models/ppseg/cvlibs/config.py index 7d66c60..af9c76c 100644 --- a/paddlers/models/ppseg/cvlibs/config.py +++ b/paddlers/models/ppseg/cvlibs/config.py @@ -15,12 +15,15 @@ import codecs import os from typing import Any, Dict, Generic +import warnings +from ast import literal_eval import paddle import yaml +import six -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import logger +from paddleseg.cvlibs import manager +from paddleseg.utils import logger class Config(object): @@ -51,7 +54,7 @@ class Config(object): Examples: - from paddlers.models.ppseg.cvlibs.config import Config + from paddleseg.cvlibs.config import Config # Create a cfg object with yaml file path. cfg = Config(yaml_cfg_path) @@ -69,7 +72,8 @@ class Config(object): path: str, learning_rate: float=None, batch_size: int=None, - iters: int=None): + iters: int=None, + opts: list=None): if not path: raise ValueError('Please specify the configuration file path.') @@ -84,7 +88,18 @@ class Config(object): raise RuntimeError('Config file should in yaml format!') self.update( - learning_rate=learning_rate, batch_size=batch_size, iters=iters) + learning_rate=learning_rate, + batch_size=batch_size, + iters=iters, + opts=opts) + + model_cfg = self.dic.get('model', None) + if model_cfg is None: + raise RuntimeError('No model specified in the configuration file.') + if (not self.train_dataset_config) and (not self.val_dataset_config): + raise ValueError( + 'One of `train_dataset` or `val_dataset should be given, but there are none.' + ) def _update_dic(self, dic, base_dic): """ @@ -121,7 +136,8 @@ class Config(object): def update(self, learning_rate: float=None, batch_size: int=None, - iters: int=None): + iters: int=None, + opts: list=None): '''Update config''' if learning_rate: if 'lr_scheduler' in self.dic: @@ -135,6 +151,27 @@ class Config(object): if iters: self.dic['iters'] = iters + # fix parameters by --opts of command + if opts is not None: + if len(opts) % 2 != 0 or len(opts) == 0: + raise ValueError( + "Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}". + format(opts)) + for key, value in zip(opts[0::2], opts[1::2]): + if isinstance(value, six.string_types): + try: + value = literal_eval(value) + except ValueError: + pass + except SyntaxError: + pass + key_list = key.split('.') + dic = self.dic + for subkey in key_list[:-1]: + dic.setdefault(subkey, dict()) + dic = dic[subkey] + dic[key_list[-1]] = value + @property def batch_size(self) -> int: return self.dic.get('batch_size', 1) @@ -153,13 +190,32 @@ class Config(object): 'No `lr_scheduler` specified in the configuration file.') params = self.dic.get('lr_scheduler') + use_warmup = False + if 'warmup_iters' in params: + use_warmup = True + warmup_iters = params.pop('warmup_iters') + assert 'warmup_start_lr' in params, \ + "When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler" + warmup_start_lr = params.pop('warmup_start_lr') + end_lr = params['learning_rate'] + lr_type = params.pop('type') if lr_type == 'PolynomialDecay': - params.setdefault('decay_steps', self.iters) + iters = self.iters - warmup_iters if use_warmup else self.iters + iters = max(iters, 1) + params.setdefault('decay_steps', iters) params.setdefault('end_lr', 0) params.setdefault('power', 0.9) + lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params) + + if use_warmup: + lr_sche = paddle.optimizer.lr.LinearWarmup( + learning_rate=lr_sche, + warmup_steps=warmup_iters, + start_lr=warmup_start_lr, + end_lr=end_lr) - return getattr(paddle.optimizer.lr, lr_type)(**params) + return lr_sche @property def learning_rate(self) -> paddle.optimizer.lr.LRScheduler: @@ -202,15 +258,33 @@ class Config(object): args = self.optimizer_args optimizer_type = args.pop('type') + params = self.model.parameters() + if 'backbone_lr_mult' in args: + if not hasattr(self.model, 'backbone'): + logger.warning('The backbone_lr_mult is not effective because' + ' the model does not have backbone') + else: + backbone_lr_mult = args.pop('backbone_lr_mult') + backbone_params = self.model.backbone.parameters() + backbone_params_id = [id(x) for x in backbone_params] + other_params = [ + x for x in params if id(x) not in backbone_params_id + ] + params = [{ + 'params': backbone_params, + 'learning_rate': backbone_lr_mult + }, { + 'params': other_params + }] + if optimizer_type == 'sgd': - return paddle.optimizer.Momentum( - lr, parameters=self.model.parameters(), **args) + return paddle.optimizer.Momentum(lr, parameters=params, **args) elif optimizer_type == 'adam': - return paddle.optimizer.Adam( - lr, parameters=self.model.parameters(), **args) + return paddle.optimizer.Adam(lr, parameters=params, **args) elif optimizer_type in paddle.optimizer.__all__: - return getattr(paddle.optimizer, optimizer_type)( - lr, parameters=self.model.parameters(), **args) + return getattr(paddle.optimizer, optimizer_type)(lr, + parameters=params, + **args) raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type)) @@ -295,24 +369,6 @@ class Config(object): @property def model(self) -> paddle.nn.Layer: model_cfg = self.dic.get('model').copy() - if not model_cfg: - raise RuntimeError('No model specified in the configuration file.') - if not 'num_classes' in model_cfg: - num_classes = None - if self.train_dataset_config: - if hasattr(self.train_dataset_class, 'NUM_CLASSES'): - num_classes = self.train_dataset_class.NUM_CLASSES - elif hasattr(self.train_dataset, 'num_classes'): - num_classes = self.train_dataset.num_classes - elif self.val_dataset_config: - if hasattr(self.val_dataset_class, 'NUM_CLASSES'): - num_classes = self.val_dataset_class.NUM_CLASSES - elif hasattr(self.val_dataset, 'num_classes'): - num_classes = self.val_dataset.num_classes - - if num_classes is not None: - model_cfg['num_classes'] = num_classes - if not self._model: self._model = self._load_object(model_cfg) return self._model @@ -401,3 +457,94 @@ class Config(object): def __str__(self) -> str: return yaml.dump(self.dic) + + @property + def val_transforms(self) -> list: + """Get val_transform from val_dataset""" + _val_dataset = self.val_dataset_config + if not _val_dataset: + return [] + _transforms = _val_dataset.get('transforms', []) + transforms = [] + for i in _transforms: + transforms.append(self._load_object(i)) + return transforms + + def check_sync_info(self) -> None: + """ + Check and sync the info, such as num_classes and img_channels, + between the config of model, train_dataset and val_dataset. + """ + self._check_sync_num_classes() + self._check_sync_img_channels() + + def _check_sync_num_classes(self): + num_classes_set = set() + + if self.dic['model'].get('num_classes', None) is not None: + num_classes_set.add(self.dic['model'].get('num_classes')) + if self.train_dataset_config: + if hasattr(self.train_dataset_class, 'NUM_CLASSES'): + num_classes_set.add(self.train_dataset_class.NUM_CLASSES) + elif 'num_classes' in self.train_dataset_config: + num_classes_set.add(self.train_dataset_config['num_classes']) + if self.val_dataset_config: + if hasattr(self.val_dataset_class, 'NUM_CLASSES'): + num_classes_set.add(self.val_dataset_class.NUM_CLASSES) + elif 'num_classes' in self.val_dataset_config: + num_classes_set.add(self.val_dataset_config['num_classes']) + + if len(num_classes_set) == 0: + raise ValueError( + '`num_classes` is not found. Please set it in model, train_dataset or val_dataset' + ) + elif len(num_classes_set) > 1: + raise ValueError( + '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset' + .format(num_classes_set)) + + num_classes = num_classes_set.pop() + self.dic['model']['num_classes'] = num_classes + if self.train_dataset_config and \ + (not hasattr(self.train_dataset_class, 'NUM_CLASSES')): + self.dic['train_dataset']['num_classes'] = num_classes + if self.val_dataset_config and \ + (not hasattr(self.val_dataset_class, 'NUM_CLASSES')): + self.dic['val_dataset']['num_classes'] = num_classes + + def _check_sync_img_channels(self): + img_channels_set = set() + model_cfg = self.dic['model'] + + # If the model has backbone, in_channels is the input params of backbone. + # Otherwise, in_channels is the input params of the model. + if 'backbone' in model_cfg: + x = model_cfg['backbone'].get('in_channels', None) + if x is not None: + img_channels_set.add(x) + elif model_cfg.get('in_channels', None) is not None: + img_channels_set.add(model_cfg.get('in_channels')) + if self.train_dataset_config and \ + ('img_channels' in self.train_dataset_config): + img_channels_set.add(self.train_dataset_config['img_channels']) + if self.val_dataset_config and \ + ('img_channels' in self.val_dataset_config): + img_channels_set.add(self.val_dataset_config['img_channels']) + + if len(img_channels_set) > 1: + raise ValueError( + '`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset' + .format(img_channels_set)) + + img_channels = 3 if len(img_channels_set) == 0 \ + else img_channels_set.pop() + if 'backbone' in model_cfg: + self.dic['model']['backbone']['in_channels'] = img_channels + else: + self.dic['model']['in_channels'] = img_channels + if self.train_dataset_config and \ + self.train_dataset_config['type'] == "Dataset": + self.dic['train_dataset']['img_channels'] = img_channels + if self.val_dataset_config and \ + self.val_dataset_config['type'] == "Dataset": + self.dic['val_dataset']['img_channels'] = img_channels diff --git a/paddlers/models/ppseg/cvlibs/manager.py b/paddlers/models/ppseg/cvlibs/manager.py index 18c8e21..8437445 100644 --- a/paddlers/models/ppseg/cvlibs/manager.py +++ b/paddlers/models/ppseg/cvlibs/manager.py @@ -31,7 +31,7 @@ class ComponentManager: Examples 1: - from paddlers.models.ppseg.cvlibs.manager import ComponentManager + from paddleseg.cvlibs.manager import ComponentManager model_manager = ComponentManager() @@ -49,7 +49,7 @@ class ComponentManager: Examples 2: # Or an easier way, using it as a Python decorator, while just add it above the class declaration. - from paddlers.models.ppseg.cvlibs.manager import ComponentManager + from paddleseg.cvlibs.manager import ComponentManager model_manager = ComponentManager() diff --git a/paddlers/models/ppseg/cvlibs/param_init.py b/paddlers/models/ppseg/cvlibs/param_init.py index 2213a1a..59dce4d 100644 --- a/paddlers/models/ppseg/cvlibs/param_init.py +++ b/paddlers/models/ppseg/cvlibs/param_init.py @@ -24,7 +24,7 @@ def constant_init(param, **kwargs): Examples: - from paddlers.models.ppseg.cvlibs import param_init + from paddleseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -46,7 +46,7 @@ def normal_init(param, **kwargs): Examples: - from paddlers.models.ppseg.cvlibs import param_init + from paddleseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -79,7 +79,7 @@ def kaiming_normal_init(param, **kwargs): Examples: - from paddlers.models.ppseg.cvlibs import param_init + from paddleseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -109,7 +109,7 @@ def kaiming_uniform(param, **kwargs): Examples: - from paddlers.models.ppseg.cvlibs import param_init + from paddleseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs): initializer = nn.initializer.KaimingUniform(**kwargs) initializer(param, param.block) + + +def xavier_uniform(param, **kwargs): + r""" + This implements the Xavier weight initializer from the paper + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. + This initializer is designed to keep the scale of the gradients + approximately same in all the layers. In case of Uniform distribution, + the range is [-x, x], where + .. math:: + x = \sqrt{\frac{6.0}{fan\_in + fan\_out}} + Args: + param (Tensor): Tensor that needs to be initialized. + + Examples: + + from paddleseg.cvlibs import param_init + import paddle.nn as nn + + linear = nn.Linear(2, 4) + param_init.xavier_uniform(linear.weight) + """ + initializer = nn.initializer.XavierUniform(**kwargs) + initializer(param, param.block) diff --git a/paddlers/models/ppseg/datasets/__init__.py b/paddlers/models/ppseg/datasets/__init__.py index ee79d0f..ad52685 100644 --- a/paddlers/models/ppseg/datasets/__init__.py +++ b/paddlers/models/ppseg/datasets/__init__.py @@ -27,3 +27,4 @@ from .drive import DRIVE from .hrf import HRF from .chase_db1 import CHASEDB1 from .pp_humanseg14k import PPHumanSeg14K +from .pssl import PSSLDataset diff --git a/paddlers/models/ppseg/datasets/ade.py b/paddlers/models/ppseg/datasets/ade.py index 9a9682d..8a9a2e9 100644 --- a/paddlers/models/ppseg/datasets/ade.py +++ b/paddlers/models/ppseg/datasets/ade.py @@ -17,12 +17,12 @@ import os import numpy as np from PIL import Image -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -import paddlers.models.ppseg.transforms.functional as F +from paddleseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +import paddleseg.transforms.functional as F URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip" @@ -89,23 +89,31 @@ class ADE20K(Dataset): self.file_list.append([img_path, label_path]) def __getitem__(self, idx): + data = {} + data['trans_info'] = [] image_path, label_path = self.file_list[idx] + data['img'] = image_path + data['gt_fields'] = [ + ] # If key in gt_fields, the data[key] have transforms synchronous. + if self.mode == 'val': - im, _ = self.transforms(im=image_path) + data = self.transforms(data) label = np.asarray(Image.open(label_path)) # The class 0 is ignored. And it will equal to 255 after # subtracted 1, because the dtype of label is uint8. label = label - 1 label = label[np.newaxis, :, :] - return im, label + data['label'] = label + return data else: - im, label = self.transforms(im=image_path, label=label_path) - label = label - 1 + data['label'] = label_path + data['gt_fields'].append('label') + data = self.transforms(data) + data['label'] = data['label'] - 1 # Recover the ignore pixels adding by transform - label[label == 254] = 255 + data['label'][data['label'] == 254] = 255 if self.edge: edge_mask = F.mask_to_binary_edge( label, radius=2, num_classes=self.num_classes) - return im, label, edge_mask - else: - return im, label + data['edge'] = edge_mask + return data diff --git a/paddlers/models/ppseg/datasets/chase_db1.py b/paddlers/models/ppseg/datasets/chase_db1.py index 9ddec59..1b25380 100644 --- a/paddlers/models/ppseg/datasets/chase_db1.py +++ b/paddlers/models/ppseg/datasets/chase_db1.py @@ -14,11 +14,11 @@ import os -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -from paddlers.models.ppseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +from paddleseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/chase_db1/chase_db1.zip' diff --git a/paddlers/models/ppseg/datasets/cityscapes.py b/paddlers/models/ppseg/datasets/cityscapes.py index c750bb2..564926e 100644 --- a/paddlers/models/ppseg/datasets/cityscapes.py +++ b/paddlers/models/ppseg/datasets/cityscapes.py @@ -15,9 +15,9 @@ import os import glob -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.datasets import Dataset +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/cocostuff.py b/paddlers/models/ppseg/datasets/cocostuff.py index 2b37176..ae66461 100644 --- a/paddlers/models/ppseg/datasets/cocostuff.py +++ b/paddlers/models/ppseg/datasets/cocostuff.py @@ -15,9 +15,9 @@ import os import glob -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.datasets import Dataset +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/dataset.py b/paddlers/models/ppseg/datasets/dataset.py index b4faf6a..d518f5b 100644 --- a/paddlers/models/ppseg/datasets/dataset.py +++ b/paddlers/models/ppseg/datasets/dataset.py @@ -18,9 +18,9 @@ import paddle import numpy as np from PIL import Image -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -import paddlers.models.ppseg.transforms.functional as F +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +import paddleseg.transforms.functional as F @manager.DATASETS.add_component @@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset): Examples: - import paddlers.models.ppseg.transforms as T - from paddlers.models.ppseg.datasets import Dataset + import paddleseg.transforms as T + from paddleseg.datasets import Dataset - transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()] + transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()] dataset_root = 'dataset_root_path' train_path = 'train_path' num_classes = 2 @@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset): """ def __init__(self, - transforms, + mode, dataset_root, + transforms, num_classes, - mode='train', + img_channels=3, train_path=None, val_path=None, test_path=None, @@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset): ignore_index=255, edge=False): self.dataset_root = dataset_root - self.transforms = Compose(transforms) + self.transforms = Compose(transforms, img_channels=img_channels) self.file_list = list() self.mode = mode.lower() self.num_classes = num_classes + self.img_channels = img_channels self.ignore_index = ignore_index self.edge = edge @@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset): raise ValueError( "mode should be 'train', 'val' or 'test', but got {}.".format( self.mode)) - - if self.transforms is None: - raise ValueError("`transforms` is necessary, but it is None.") - if not os.path.exists(self.dataset_root): raise FileNotFoundError('there is not `dataset_root`: {}.'.format( self.dataset_root)) + if self.transforms is None: + raise ValueError("`transforms` is necessary, but it is None.") + if num_classes < 1: + raise ValueError( + "`num_classes` should be greater than 1, but got {}".format( + num_classes)) + if img_channels not in [1, 3]: + raise ValueError("`img_channels` should in [1, 3], but got {}". + format(img_channels)) if self.mode == 'train': if train_path is None: @@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset): self.file_list.append([image_path, label_path]) def __getitem__(self, idx): + data = {} + data['trans_info'] = [] image_path, label_path = self.file_list[idx] - if self.mode == 'test': - im, _ = self.transforms(im=image_path) - im = im[np.newaxis, ...] - return im, image_path - elif self.mode == 'val': - im, _ = self.transforms(im=image_path) - label = np.asarray(Image.open(label_path)) - label = label[np.newaxis, :, :] - return im, label + data['img'] = image_path + data['label'] = label_path + # If key in gt_fields, the data[key] have transforms synchronous. + data['gt_fields'] = [] + if self.mode == 'val': + data = self.transforms(data) + data['label'] = data['label'][np.newaxis, :, :] + else: - im, label = self.transforms(im=image_path, label=label_path) + data['gt_fields'].append('label') + data = self.transforms(data) if self.edge: edge_mask = F.mask_to_binary_edge( - label, radius=2, num_classes=self.num_classes) - return im, label, edge_mask - else: - return im, label + data['label'], radius=2, num_classes=self.num_classes) + data['edge'] = edge_mask + return data def __len__(self): return len(self.file_list) diff --git a/paddlers/models/ppseg/datasets/drive.py b/paddlers/models/ppseg/datasets/drive.py index f4180fc..8984aa0 100644 --- a/paddlers/models/ppseg/datasets/drive.py +++ b/paddlers/models/ppseg/datasets/drive.py @@ -14,11 +14,11 @@ import os -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -from paddlers.models.ppseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +from paddleseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/drive/drive.zip' diff --git a/paddlers/models/ppseg/datasets/eg1800.py b/paddlers/models/ppseg/datasets/eg1800.py index b43850a..9005083 100644 --- a/paddlers/models/ppseg/datasets/eg1800.py +++ b/paddlers/models/ppseg/datasets/eg1800.py @@ -18,12 +18,12 @@ import copy import cv2 import numpy as np -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -import paddlers.models.ppseg.transforms.functional as F +from paddleseg.datasets import Dataset +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +import paddleseg.transforms.functional as F URL = "https://paddleseg.bj.bcebos.com/dataset/EG1800.zip" diff --git a/paddlers/models/ppseg/datasets/hrf.py b/paddlers/models/ppseg/datasets/hrf.py index eadd8b2..fb378a7 100644 --- a/paddlers/models/ppseg/datasets/hrf.py +++ b/paddlers/models/ppseg/datasets/hrf.py @@ -14,11 +14,11 @@ import os -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -from paddlers.models.ppseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +from paddleseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/hrf/hrf.zip' diff --git a/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py b/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py index 253967a..7180a9d 100644 --- a/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py +++ b/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py @@ -15,10 +15,10 @@ import os from .dataset import Dataset -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose URL = "https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip" diff --git a/paddlers/models/ppseg/datasets/optic_disc_seg.py b/paddlers/models/ppseg/datasets/optic_disc_seg.py index 805c80d..36332b0 100644 --- a/paddlers/models/ppseg/datasets/optic_disc_seg.py +++ b/paddlers/models/ppseg/datasets/optic_disc_seg.py @@ -15,10 +15,10 @@ import os from .dataset import Dataset -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" diff --git a/paddlers/models/ppseg/datasets/pascal_context.py b/paddlers/models/ppseg/datasets/pascal_context.py index b7156a3..d76ce31 100644 --- a/paddlers/models/ppseg/datasets/pascal_context.py +++ b/paddlers/models/ppseg/datasets/pascal_context.py @@ -15,9 +15,9 @@ import os from PIL import Image -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.datasets import Dataset +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/pp_humanseg14k.py b/paddlers/models/ppseg/datasets/pp_humanseg14k.py index ba124a3..e809611 100644 --- a/paddlers/models/ppseg/datasets/pp_humanseg14k.py +++ b/paddlers/models/ppseg/datasets/pp_humanseg14k.py @@ -15,8 +15,8 @@ import os from .dataset import Dataset -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/pssl.py b/paddlers/models/ppseg/datasets/pssl.py new file mode 100644 index 0000000..6ebe7fb --- /dev/null +++ b/paddlers/models/ppseg/datasets/pssl.py @@ -0,0 +1,135 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +from paddleseg.datasets import Dataset +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + + +@manager.DATASETS.add_component +class PSSLDataset(Dataset): + """ + The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label + is computed by the Consensus explanation algorithm. + + The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation + Models" (https://arxiv.org/abs/2207.03335). + + The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification + Models: An Empirical Study" (https://arxiv.org/abs/2109.00707). + + To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure + as follows: + + imagenet_root + | + |--train + | |--n01440764 + | | |--n01440764_10026.JPEG + | | |--... + | |--nxxxxxxxx + | |--... + + where only the "train" set is needed. + + The PSSL dataset has the folder structure as follows: + + pssl_root + | + |--train + | |--n01440764 + | | |--n01440764_10026.JPEG_eiseg.npz + | | |--... + | |--nxxxxxxxx + | |--... + | + |--imagenet_lsvrc_2015_synsets.txt + |--train.txt + + where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset. + + Args: + transforms (list): Transforms for image. + imagenet_root (str): The path to the original ImageNet dataset. + pssl_root (str): The path to the PSSL dataset. + mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. + edge (bool, optional): Whether to compute edge while training. Default: False. + """ + ignore_index = 1001 # 0~999 is target class, 1000 is bg + NUM_CLASSES = 1001 # consider target class and bg + + def __init__(self, + transforms, + imagenet_root, + pssl_root, + mode='train', + edge=False): + mode = mode.lower() + if mode not in ['train']: + raise ValueError("mode should be 'train', but got {}.".format(mode)) + if transforms is None: + raise ValueError("`transforms` is necessary, but it is None.") + + self.transforms = Compose(transforms) + self.mode = mode + self.edge = edge + + self.num_classes = self.NUM_CLASSES + self.ignore_index = self.num_classes # 1001 + self.file_list = [] + self.class_id_dict = {} + + if imagenet_root is None or not os.path.isdir(pssl_root): + raise ValueError( + "The dataset is not Found or the folder structure is nonconfoumance." + ) + + train_list_file = os.path.join(pssl_root, "train.txt") + if not os.path.exists(train_list_file): + raise ValueError("Train list file isn't exists.") + for idx, line in enumerate(open(train_list_file)): + # line: train/n04118776/n04118776_45912.JPEG_eiseg.npz + label_path = line.strip() + img_path = label_path.split('.JPEG')[0] + '.JPEG' + label_path = os.path.join(pssl_root, label_path) + img_path = os.path.join(imagenet_root, img_path) + self.file_list.append([img_path, label_path]) + + # mapping class name to class id. + class_id_file = os.path.join(pssl_root, + "imagenet_lsvrc_2015_synsets.txt") + if not os.path.exists(class_id_file): + raise ValueError("Class id file isn't exists.") + for idx, line in enumerate(open(class_id_file)): + class_name = line.strip() + self.class_id_dict[class_name] = idx + + def __getitem__(self, idx): + image_path, label_path = self.file_list[idx] + + # transform label + class_name = (image_path.split('/')[-1]).split('_')[0] + class_id = self.class_id_dict[class_name] + + pssl_seg = np.load(label_path)['arr_0'] + gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000 + # [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training. + gt_semantic_seg[pssl_seg == 1] = class_id + + im, label = self.transforms(im=image_path, label=gt_semantic_seg) + + return im, label diff --git a/paddlers/models/ppseg/datasets/stare.py b/paddlers/models/ppseg/datasets/stare.py index 1acf64e..5de8be5 100644 --- a/paddlers/models/ppseg/datasets/stare.py +++ b/paddlers/models/ppseg/datasets/stare.py @@ -14,11 +14,11 @@ import os -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -from paddlers.models.ppseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +from paddleseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/stare/stare.zip' diff --git a/paddlers/models/ppseg/datasets/supervisely.py b/paddlers/models/ppseg/datasets/supervisely.py index 3508cdd..accfa46 100644 --- a/paddlers/models/ppseg/datasets/supervisely.py +++ b/paddlers/models/ppseg/datasets/supervisely.py @@ -18,12 +18,12 @@ import copy import cv2 import numpy as np -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -import paddlers.models.ppseg.transforms.functional as F +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose +from paddleseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +import paddleseg.transforms.functional as F URL = "https://paddleseg.bj.bcebos.com/dataset/Supervisely_face.zip" diff --git a/paddlers/models/ppseg/datasets/voc.py b/paddlers/models/ppseg/datasets/voc.py index ffaf5d3..f48ad50 100644 --- a/paddlers/models/ppseg/datasets/voc.py +++ b/paddlers/models/ppseg/datasets/voc.py @@ -14,11 +14,11 @@ import os -from paddlers.models.ppseg.datasets import Dataset -from paddlers.models.ppseg.utils.download import download_file_and_uncompress -from paddlers.models.ppseg.utils import seg_env -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import Compose +from paddleseg.datasets import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.utils import seg_env +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" diff --git a/paddlers/models/ppseg/models/__init__.py b/paddlers/models/ppseg/models/__init__.py index 4a7d535..a43505d 100644 --- a/paddlers/models/ppseg/models/__init__.py +++ b/paddlers/models/ppseg/models/__init__.py @@ -49,9 +49,18 @@ from .segnet import SegNet from .encnet import ENCNet from .hrnet_contrast import HRNetW48Contrast from .espnet import ESPNetV2 +from .pp_liteseg import PPLiteSeg from .dmnet import DMNet from .espnetv1 import ESPNetV1 from .enet import ENet from .bisenetv1 import BiseNetV1 from .fastfcn import FastFCN from .pfpnnet import PFPNNet +from .glore import GloRe +from .ddrnet import DDRNet_23 +from .ccnet import CCNet +from .mobileseg import MobileSeg +from .upernet import UPerNet +from .sinet import SINet +from .lraspp import LRASPP +from .topformer import TopFormer diff --git a/paddlers/models/ppseg/models/ann.py b/paddlers/models/ppseg/models/ann.py index 20b81b3..aa2af1e 100644 --- a/paddlers/models/ppseg/models/ann.py +++ b/paddlers/models/ppseg/models/ann.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/attention_unet.py b/paddlers/models/ppseg/models/attention_unet.py index 8d9fd2d..9b05c87 100644 --- a/paddlers/models/ppseg/models/attention_unet.py +++ b/paddlers/models/ppseg/models/attention_unet.py @@ -14,9 +14,9 @@ import paddle import paddle.nn as nn -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg import utils import numpy as np @@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer): Args: num_classes (int): The unique number of target classes. + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path or url of pretrained model. Default: None. """ - def __init__(self, num_classes, pretrained=None): + def __init__(self, num_classes, in_channels=3, pretrained=None): super().__init__() - n_channels = 3 - self.encoder = Encoder(n_channels, [64, 128, 256, 512]) + self.encoder = Encoder(in_channels, [64, 128, 256, 512]) filters = np.array([64, 128, 256, 512, 1024]) self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3]) self.att5 = AttentionBlock( diff --git a/paddlers/models/ppseg/models/backbones/__init__.py b/paddlers/models/ppseg/models/backbones/__init__.py index 108f87d..a3ba15e 100644 --- a/paddlers/models/ppseg/models/backbones/__init__.py +++ b/paddlers/models/ppseg/models/backbones/__init__.py @@ -21,3 +21,7 @@ from .swin_transformer import * from .mobilenetv2 import * from .mix_transformer import * from .stdcnet import * +from .lite_hrnet import * +from .shufflenetv2 import * +from .ghostnet import * +from .top_transformer import * diff --git a/paddlers/models/ppseg/models/backbones/ghostnet.py b/paddlers/models/ppseg/models/backbones/ghostnet.py new file mode 100644 index 0000000..c545515 --- /dev/null +++ b/paddlers/models/ppseg/models/backbones/ghostnet.py @@ -0,0 +1,318 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch + +import math +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear +from paddle.regularizer import L2Decay +from paddle.nn.initializer import Uniform, KaimingNormal + +from paddleseg.cvlibs import manager +from paddleseg.utils import utils, logger + +__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"] + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + groups=1, + act="relu", + name=None): + super(ConvBNLayer, self).__init__() + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr( + initializer=KaimingNormal(), name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + + self._batch_norm = BatchNorm( + num_channels=out_channels, + act=act, + param_attr=ParamAttr( + name=bn_name + "_scale", regularizer=L2Decay(0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", regularizer=L2Decay(0.0)), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class SEBlock(nn.Layer): + def __init__(self, num_channels, reduction_ratio=4, name=None): + super(SEBlock, self).__init__() + self.pool2d_gap = AdaptiveAvgPool2D(1) + self._num_channels = num_channels + stdv = 1.0 / math.sqrt(num_channels * 1.0) + med_ch = num_channels // reduction_ratio + self.squeeze = Linear( + num_channels, + med_ch, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_1_weights"), + bias_attr=ParamAttr(name=name + "_1_offset")) + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_channels, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_2_weights"), + bias_attr=ParamAttr(name=name + "_2_offset")) + + def forward(self, inputs): + pool = self.pool2d_gap(inputs) + pool = paddle.squeeze(pool, axis=[2, 3]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = paddle.clip(x=excitation, min=0, max=1) + excitation = paddle.unsqueeze(excitation, axis=[2, 3]) + out = paddle.multiply(inputs, excitation) + return out + + +class GhostModule(nn.Layer): + def __init__(self, + in_channels, + output_channels, + kernel_size=1, + ratio=2, + dw_size=3, + stride=1, + relu=True, + name=None): + super(GhostModule, self).__init__() + init_channels = int(math.ceil(output_channels / ratio)) + new_channels = int(init_channels * (ratio - 1)) + self.primary_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=init_channels, + kernel_size=kernel_size, + stride=stride, + groups=1, + act="relu" if relu else None, + name=name + "_primary_conv") + self.cheap_operation = ConvBNLayer( + in_channels=init_channels, + out_channels=new_channels, + kernel_size=dw_size, + stride=1, + groups=init_channels, + act="relu" if relu else None, + name=name + "_cheap_operation") + + def forward(self, inputs): + x = self.primary_conv(inputs) + y = self.cheap_operation(x) + out = paddle.concat([x, y], axis=1) + return out + + +class GhostBottleneck(nn.Layer): + def __init__(self, + in_channels, + hidden_dim, + output_channels, + kernel_size, + stride, + use_se, + name=None): + super(GhostBottleneck, self).__init__() + self._stride = stride + self._use_se = use_se + self._num_channels = in_channels + self._output_channels = output_channels + self.ghost_module_1 = GhostModule( + in_channels=in_channels, + output_channels=hidden_dim, + kernel_size=1, + stride=1, + relu=True, + name=name + "_ghost_module_1") + if stride == 2: + self.depthwise_conv = ConvBNLayer( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=kernel_size, + stride=stride, + groups=hidden_dim, + act=None, + name=name + + "_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. + ) + if use_se: + self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se") + self.ghost_module_2 = GhostModule( + in_channels=hidden_dim, + output_channels=output_channels, + kernel_size=1, + relu=False, + name=name + "_ghost_module_2") + if stride != 1 or in_channels != output_channels: + self.shortcut_depthwise = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + stride=stride, + groups=in_channels, + act=None, + name=name + + "_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. + ) + self.shortcut_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=output_channels, + kernel_size=1, + stride=1, + groups=1, + act=None, + name=name + "_shortcut_conv") + + def forward(self, inputs): + x = self.ghost_module_1(inputs) + if self._stride == 2: + x = self.depthwise_conv(x) + if self._use_se: + x = self.se_block(x) + x = self.ghost_module_2(x) + if self._stride == 1 and self._num_channels == self._output_channels: + shortcut = inputs + else: + shortcut = self.shortcut_depthwise(inputs) + shortcut = self.shortcut_conv(shortcut) + return paddle.add(x=x, y=shortcut) + + +class GhostNet(nn.Layer): + def __init__(self, scale, in_channels=3, pretrained=None): + super(GhostNet, self).__init__() + self.cfgs = [ + # k, t, c, SE, s + [3, 16, 16, 0, 1], + [3, 48, 24, 0, 2], + [3, 72, 24, 0, 1], # x4 + [5, 72, 40, 1, 2], + [5, 120, 40, 1, 1], # x8 + [3, 240, 80, 0, 2], + [3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 1, 1], + [3, 672, 112, 1, 1], # x16 + [5, 672, 160, 1, 2], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1] # x32 + ] + self.scale = scale + self.pretrained = pretrained + + output_channels = int(self._make_divisible(16 * self.scale, 4)) + self.conv1 = ConvBNLayer( + in_channels=in_channels, + out_channels=output_channels, + kernel_size=3, + stride=2, + groups=1, + act="relu", + name="conv1") + + # build inverted residual blocks + self.out_index = [2, 4, 10, 15] + self.feat_channels = [] + self.ghost_bottleneck_list = [] + for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs): + in_channels = output_channels + output_channels = int(self._make_divisible(c * self.scale, 4)) + hidden_dim = int(self._make_divisible(exp_size * self.scale, 4)) + ghost_bottleneck = self.add_sublayer( + name="_ghostbottleneck_" + str(idx), + sublayer=GhostBottleneck( + in_channels=in_channels, + hidden_dim=hidden_dim, + output_channels=output_channels, + kernel_size=k, + stride=s, + use_se=use_se, + name="_ghostbottleneck_" + str(idx))) + self.ghost_bottleneck_list.append(ghost_bottleneck) + if idx in self.out_index: + self.feat_channels.append(output_channels) + + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def forward(self, inputs): + feat_list = [] + x = self.conv1(inputs) + for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list): + x = ghost_bottleneck(x) + if idx in self.out_index: + feat_list.append(x) + return feat_list + + def _make_divisible(self, v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +@manager.BACKBONES.add_component +def GhostNet_x0_5(**kwargs): + model = GhostNet(scale=0.5, **kwargs) + return model + + +@manager.BACKBONES.add_component +def GhostNet_x1_0(**kwargs): + model = GhostNet(scale=1.0, **kwargs) + return model + + +@manager.BACKBONES.add_component +def GhostNet_x1_3(**kwargs): + model = GhostNet(scale=1.3, **kwargs) + return model diff --git a/paddlers/models/ppseg/models/backbones/hrnet.py b/paddlers/models/ppseg/models/backbones/hrnet.py index 5a98ea7..96eb80f 100644 --- a/paddlers/models/ppseg/models/backbones/hrnet.py +++ b/paddlers/models/ppseg/models/backbones/hrnet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils __all__ = [ "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30", @@ -37,6 +37,7 @@ class HRNet(nn.Layer): (https://arxiv.org/pdf/1908.07919.pdf). Args: + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path of pretrained model. stage1_num_modules (int, optional): Number of modules for stage1. Default 1. stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). @@ -56,6 +57,7 @@ class HRNet(nn.Layer): """ def __init__(self, + in_channels=3, pretrained=None, stage1_num_modules=1, stage1_num_blocks=(4, ), @@ -91,7 +93,7 @@ class HRNet(nn.Layer): self.feat_channels = [sum(stage4_num_channels)] self.conv_layer1_1 = layers.ConvBNReLU( - in_channels=3, + in_channels=in_channels, out_channels=64, kernel_size=3, stride=2, diff --git a/paddlers/models/ppseg/models/backbones/lite_hrnet.py b/paddlers/models/ppseg/models/backbones/lite_hrnet.py new file mode 100644 index 0000000..36e575b --- /dev/null +++ b/paddlers/models/ppseg/models/backbones/lite_hrnet.py @@ -0,0 +1,974 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on +https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py +""" + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from numbers import Integral +from paddle import ParamAttr +from paddle.regularizer import L2Decay +from paddle.nn.initializer import Normal, Constant + +from paddleseg.cvlibs import manager +from paddleseg import utils + +__all__ = [ + "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive", + "Lite_HRNet_wider_naive", "LiteHRNet" +] + + +def Conv2d(in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + weight_init=Normal(std=0.001), + bias_init=Constant(0.)): + weight_attr = paddle.framework.ParamAttr(initializer=weight_init) + if bias: + bias_attr = paddle.framework.ParamAttr(initializer=bias_init) + else: + bias_attr = False + conv = nn.Conv2D( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + weight_attr=weight_attr, + bias_attr=bias_attr) + return conv + + +def channel_shuffle(x, groups): + x_shape = paddle.shape(x) + batch_size, height, width = x_shape[0], x_shape[2], x_shape[3] + num_channels = x.shape[1] + channels_per_group = num_channels // groups + + x = paddle.reshape( + x=x, shape=[batch_size, groups, channels_per_group, height, width]) + x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) + x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width]) + + return x + + +class ConvNormLayer(nn.Layer): + def __init__(self, + ch_in, + ch_out, + filter_size, + stride=1, + groups=1, + norm_type=None, + norm_groups=32, + norm_decay=0., + freeze_norm=False, + act=None): + super(ConvNormLayer, self).__init__() + self.act = act + norm_lr = 0. if freeze_norm else 1. + if norm_type is not None: + assert norm_type in ['bn', 'sync_bn', 'gn'], \ + "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type) + param_attr = ParamAttr( + initializer=Constant(1.0), + learning_rate=norm_lr, + regularizer=L2Decay(norm_decay), ) + bias_attr = ParamAttr( + learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + global_stats = True if freeze_norm else None + if norm_type in ['bn', 'sync_bn']: + self.norm = nn.BatchNorm2D( + ch_out, + weight_attr=param_attr, + bias_attr=bias_attr, + use_global_stats=global_stats, ) + elif norm_type == 'gn': + self.norm = nn.GroupNorm( + num_groups=norm_groups, + num_channels=ch_out, + weight_attr=param_attr, + bias_attr=bias_attr) + norm_params = self.norm.parameters() + if freeze_norm: + for param in norm_params: + param.stop_gradient = True + conv_bias_attr = False + else: + conv_bias_attr = True + self.norm = None + + self.conv = nn.Conv2D( + in_channels=ch_in, + out_channels=ch_out, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(initializer=Normal( + mean=0., std=0.001)), + bias_attr=conv_bias_attr) + + def forward(self, inputs): + out = self.conv(inputs) + if self.norm is not None: + out = self.norm(out) + + if self.act == 'relu': + out = F.relu(out) + elif self.act == 'sigmoid': + out = F.sigmoid(out) + return out + + +class DepthWiseSeparableConvNormLayer(nn.Layer): + def __init__(self, + ch_in, + ch_out, + filter_size, + stride=1, + dw_norm_type=None, + pw_norm_type=None, + norm_decay=0., + freeze_norm=False, + dw_act=None, + pw_act=None): + super(DepthWiseSeparableConvNormLayer, self).__init__() + self.depthwise_conv = ConvNormLayer( + ch_in=ch_in, + ch_out=ch_in, + filter_size=filter_size, + stride=stride, + groups=ch_in, + norm_type=dw_norm_type, + act=dw_act, + norm_decay=norm_decay, + freeze_norm=freeze_norm, ) + self.pointwise_conv = ConvNormLayer( + ch_in=ch_in, + ch_out=ch_out, + filter_size=1, + stride=1, + norm_type=pw_norm_type, + act=pw_act, + norm_decay=norm_decay, + freeze_norm=freeze_norm, ) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x + + +class CrossResolutionWeightingModule(nn.Layer): + def __init__(self, + channels, + ratio=16, + norm_type='bn', + freeze_norm=False, + norm_decay=0.): + super(CrossResolutionWeightingModule, self).__init__() + self.channels = channels + total_channel = sum(channels) + self.conv1 = ConvNormLayer( + ch_in=total_channel, + ch_out=total_channel // ratio, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + self.conv2 = ConvNormLayer( + ch_in=total_channel // ratio, + ch_out=total_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='sigmoid', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + + def forward(self, x): + out = [] + for idx, xi in enumerate(x[:-1]): + kernel_size = stride = pow(2, len(x) - idx - 1) + xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride) + out.append(xi) + out.append(x[-1]) + + out = paddle.concat(out, 1) + out = self.conv1(out) + out = self.conv2(out) + out = paddle.split(out, self.channels, 1) + out = [ + s * F.interpolate( + a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out) + ] + return out + + +class SpatialWeightingModule(nn.Layer): + def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.): + super(SpatialWeightingModule, self).__init__() + self.global_avgpooling = nn.AdaptiveAvgPool2D(1) + self.conv1 = ConvNormLayer( + ch_in=in_channel, + ch_out=in_channel // ratio, + filter_size=1, + stride=1, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + self.conv2 = ConvNormLayer( + ch_in=in_channel // ratio, + ch_out=in_channel, + filter_size=1, + stride=1, + act='sigmoid', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + + def forward(self, x): + out = self.global_avgpooling(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out + + +class ConditionalChannelWeightingBlock(nn.Layer): + def __init__(self, + in_channels, + stride, + reduce_ratio, + norm_type='bn', + freeze_norm=False, + norm_decay=0.): + super(ConditionalChannelWeightingBlock, self).__init__() + assert stride in [1, 2] + branch_channels = [channel // 2 for channel in in_channels] + + self.cross_resolution_weighting = CrossResolutionWeightingModule( + branch_channels, + ratio=reduce_ratio, + norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay) + self.depthwise_convs = nn.LayerList([ + ConvNormLayer( + channel, + channel, + filter_size=3, + stride=stride, + groups=channel, + norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay) for channel in branch_channels + ]) + + self.spatial_weighting = nn.LayerList([ + SpatialWeightingModule( + channel, + ratio=4, + freeze_norm=freeze_norm, + norm_decay=norm_decay) for channel in branch_channels + ]) + + def forward(self, x): + x = [s.chunk(2, axis=1) for s in x] + x1 = [s[0] for s in x] + x2 = [s[1] for s in x] + + x2 = self.cross_resolution_weighting(x2) + x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)] + x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)] + + out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)] + out = [channel_shuffle(s, groups=2) for s in out] + return out + + +class ShuffleUnit(nn.Layer): + def __init__(self, + in_channel, + out_channel, + stride, + norm_type='bn', + freeze_norm=False, + norm_decay=0.): + super(ShuffleUnit, self).__init__() + branch_channel = out_channel // 2 + self.stride = stride + if self.stride == 1: + assert in_channel == branch_channel * 2, \ + "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2) + if stride > 1: + self.branch1 = nn.Sequential( + ConvNormLayer( + ch_in=in_channel, + ch_out=in_channel, + filter_size=3, + stride=self.stride, + groups=in_channel, + norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay), + ConvNormLayer( + ch_in=in_channel, + ch_out=branch_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay), ) + self.branch2 = nn.Sequential( + ConvNormLayer( + ch_in=branch_channel if stride == 1 else in_channel, + ch_out=branch_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay), + ConvNormLayer( + ch_in=branch_channel, + ch_out=branch_channel, + filter_size=3, + stride=self.stride, + groups=branch_channel, + norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay), + ConvNormLayer( + ch_in=branch_channel, + ch_out=branch_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay), ) + + def forward(self, x): + if self.stride > 1: + x1 = self.branch1(x) + x2 = self.branch2(x) + else: + x1, x2 = x.chunk(2, axis=1) + x2 = self.branch2(x2) + out = paddle.concat([x1, x2], axis=1) + out = channel_shuffle(out, groups=2) + return out + + +class IterativeHead(nn.Layer): + def __init__(self, + in_channels, + norm_type='bn', + freeze_norm=False, + norm_decay=0.): + super(IterativeHead, self).__init__() + num_branches = len(in_channels) + self.in_channels = in_channels[::-1] + + projects = [] + for i in range(num_branches): + if i != num_branches - 1: + projects.append( + DepthWiseSeparableConvNormLayer( + ch_in=self.in_channels[i], + ch_out=self.in_channels[i + 1], + filter_size=3, + stride=1, + dw_act=None, + pw_act='relu', + dw_norm_type=norm_type, + pw_norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay)) + else: + projects.append( + DepthWiseSeparableConvNormLayer( + ch_in=self.in_channels[i], + ch_out=self.in_channels[i], + filter_size=3, + stride=1, + dw_act=None, + pw_act='relu', + dw_norm_type=norm_type, + pw_norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay)) + self.projects = nn.LayerList(projects) + + def forward(self, x): + x = x[::-1] + y = [] + last_x = None + for i, s in enumerate(x): + if last_x is not None: + last_x = F.interpolate( + last_x, + size=paddle.shape(s)[-2:], + mode='bilinear', + align_corners=True) + s = s + last_x + s = self.projects[i](s) + y.append(s) + last_x = s + + return y[::-1] + + +class Stem(nn.Layer): + def __init__(self, + in_channel, + stem_channel, + out_channel, + expand_ratio, + norm_type='bn', + freeze_norm=False, + norm_decay=0.): + super(Stem, self).__init__() + self.conv1 = ConvNormLayer( + in_channel, + stem_channel, + filter_size=3, + stride=2, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + mid_channel = int(round(stem_channel * expand_ratio)) + branch_channel = stem_channel // 2 + if stem_channel == out_channel: + inc_channel = out_channel - branch_channel + else: + inc_channel = out_channel - stem_channel + self.branch1 = nn.Sequential( + ConvNormLayer( + ch_in=branch_channel, + ch_out=branch_channel, + filter_size=3, + stride=2, + groups=branch_channel, + norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay), + ConvNormLayer( + ch_in=branch_channel, + ch_out=inc_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay), ) + self.expand_conv = ConvNormLayer( + ch_in=branch_channel, + ch_out=mid_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + self.depthwise_conv = ConvNormLayer( + ch_in=mid_channel, + ch_out=mid_channel, + filter_size=3, + stride=2, + groups=mid_channel, + norm_type=norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay) + self.linear_conv = ConvNormLayer( + ch_in=mid_channel, + ch_out=branch_channel + if stem_channel == out_channel else stem_channel, + filter_size=1, + stride=1, + norm_type=norm_type, + act='relu', + freeze_norm=freeze_norm, + norm_decay=norm_decay) + + def forward(self, x): + x = self.conv1(x) + x1, x2 = x.chunk(2, axis=1) + x1 = self.branch1(x1) + x2 = self.expand_conv(x2) + x2 = self.depthwise_conv(x2) + x2 = self.linear_conv(x2) + out = paddle.concat([x1, x2], axis=1) + out = channel_shuffle(out, groups=2) + + return out + + +class LiteHRNetModule(nn.Layer): + def __init__(self, + num_branches, + num_blocks, + in_channels, + reduce_ratio, + module_type, + multiscale_output=False, + with_fuse=True, + norm_type='bn', + freeze_norm=False, + norm_decay=0.): + super(LiteHRNetModule, self).__init__() + assert num_branches == len(in_channels),\ + "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels)) + assert module_type in [ + 'LITE', 'NAIVE' + ], "module_type should be one of ['LITE', 'NAIVE']" + self.num_branches = num_branches + self.in_channels = in_channels + self.multiscale_output = multiscale_output + self.with_fuse = with_fuse + self.norm_type = 'bn' + self.module_type = module_type + + if self.module_type == 'LITE': + self.layers = self._make_weighting_blocks( + num_blocks, + reduce_ratio, + freeze_norm=freeze_norm, + norm_decay=norm_decay) + elif self.module_type == 'NAIVE': + self.layers = self._make_naive_branches( + num_branches, + num_blocks, + freeze_norm=freeze_norm, + norm_decay=norm_decay) + + if self.with_fuse: + self.fuse_layers = self._make_fuse_layers( + freeze_norm=freeze_norm, norm_decay=norm_decay) + self.relu = nn.ReLU() + + def _make_weighting_blocks(self, + num_blocks, + reduce_ratio, + stride=1, + freeze_norm=False, + norm_decay=0.): + layers = [] + for i in range(num_blocks): + layers.append( + ConditionalChannelWeightingBlock( + self.in_channels, + stride=stride, + reduce_ratio=reduce_ratio, + norm_type=self.norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay)) + return nn.Sequential(*layers) + + def _make_naive_branches(self, + num_branches, + num_blocks, + freeze_norm=False, + norm_decay=0.): + branches = [] + for branch_idx in range(num_branches): + layers = [] + for i in range(num_blocks): + layers.append( + ShuffleUnit( + self.in_channels[branch_idx], + self.in_channels[branch_idx], + stride=1, + norm_type=self.norm_type, + freeze_norm=freeze_norm, + norm_decay=norm_decay)) + branches.append(nn.Sequential(*layers)) + return nn.LayerList(branches) + + def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.): + if self.num_branches == 1: + return None + fuse_layers = [] + num_out_branches = self.num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(self.num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + Conv2d( + self.in_channels[j], + self.in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False, ), + nn.BatchNorm2D(self.in_channels[i]), + nn.Upsample( + scale_factor=2**(j - i), mode='nearest'))) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + Conv2d( + self.in_channels[j], + self.in_channels[j], + kernel_size=3, + stride=2, + padding=1, + groups=self.in_channels[j], + bias=False, ), + nn.BatchNorm2D(self.in_channels[j]), + Conv2d( + self.in_channels[j], + self.in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False, ), + nn.BatchNorm2D(self.in_channels[i]))) + else: + conv_downsamples.append( + nn.Sequential( + Conv2d( + self.in_channels[j], + self.in_channels[j], + kernel_size=3, + stride=2, + padding=1, + groups=self.in_channels[j], + bias=False, ), + nn.BatchNorm2D(self.in_channels[j]), + Conv2d( + self.in_channels[j], + self.in_channels[j], + kernel_size=1, + stride=1, + padding=0, + bias=False, ), + nn.BatchNorm2D(self.in_channels[j]), + nn.ReLU())) + + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.LayerList(fuse_layer)) + + return nn.LayerList(fuse_layers) + + def forward(self, x): + if self.num_branches == 1: + return [self.layers[0](x[0])] + if self.module_type == 'LITE': + out = self.layers(x) + elif self.module_type == 'NAIVE': + for i in range(self.num_branches): + x[i] = self.layers[i](x[i]) + out = x + if self.with_fuse: + out_fuse = [] + for i in range(len(self.fuse_layers)): + y = out[0] if i == 0 else self.fuse_layers[i][0](out[0]) + for j in range(self.num_branches): + if j == 0: + y += y + elif i == j: + y += out[j] + else: + y += self.fuse_layers[i][j](out[j]) + if i == 0: + out[i] = y + out_fuse.append(self.relu(y)) + out = out_fuse + elif not self.multiscale_output: + out = [out[0]] + return out + + +class LiteHRNet(nn.Layer): + """ + @inproceedings{Yulitehrnet21, + title={Lite-HRNet: A Lightweight High-Resolution Network}, + author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong}, + booktitle={CVPR},year={2021} + } + + Args: + network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"], + "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet. + "wider_naive": Naive network with wider channels in each block. + "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting. + "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18. + in_channels (int, optional): The channels of input image. Default: 3. + freeze_at (int): the stage to freeze + freeze_norm (bool): whether to freeze norm in HRNet + norm_decay (float): weight decay for normalization layer weights + return_idx (List): the stage to return + """ + + def __init__(self, + network_type, + in_channels=3, + freeze_at=0, + freeze_norm=True, + norm_decay=0., + return_idx=[0, 1, 2, 3], + use_head=False, + pretrained=None): + super(LiteHRNet, self).__init__() + if isinstance(return_idx, Integral): + return_idx = [return_idx] + assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \ + "the network_type should be one of [lite_18, lite_30, naive, wider_naive]" + assert len(return_idx) > 0, "need one or more return index" + self.freeze_at = freeze_at + self.freeze_norm = freeze_norm + self.norm_decay = norm_decay + self.return_idx = return_idx + self.norm_type = 'bn' + self.use_head = use_head + self.pretrained = pretrained + + self.module_configs = { + "lite_18": { + "num_modules": [2, 4, 2], + "num_branches": [2, 3, 4], + "num_blocks": [2, 2, 2], + "module_type": ["LITE", "LITE", "LITE"], + "reduce_ratios": [8, 8, 8], + "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], + }, + "lite_30": { + "num_modules": [3, 8, 3], + "num_branches": [2, 3, 4], + "num_blocks": [2, 2, 2], + "module_type": ["LITE", "LITE", "LITE"], + "reduce_ratios": [8, 8, 8], + "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], + }, + "naive": { + "num_modules": [2, 4, 2], + "num_branches": [2, 3, 4], + "num_blocks": [2, 2, 2], + "module_type": ["NAIVE", "NAIVE", "NAIVE"], + "reduce_ratios": [1, 1, 1], + "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]], + }, + "wider_naive": { + "num_modules": [2, 4, 2], + "num_branches": [2, 3, 4], + "num_blocks": [2, 2, 2], + "module_type": ["NAIVE", "NAIVE", "NAIVE"], + "reduce_ratios": [1, 1, 1], + "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], + }, + } + + self.stages_config = self.module_configs[network_type] + + self.stem = Stem(in_channels, 32, 32, 1) + num_channels_pre_layer = [32] + for stage_idx in range(3): + num_channels = self.stages_config["num_channels"][stage_idx] + setattr(self, 'transition{}'.format(stage_idx), + self._make_transition_layer(num_channels_pre_layer, + num_channels, self.freeze_norm, + self.norm_decay)) + stage, num_channels_pre_layer = self._make_stage( + self.stages_config, stage_idx, num_channels, True, + self.freeze_norm, self.norm_decay) + setattr(self, 'stage{}'.format(stage_idx), stage) + + num_channels = self.stages_config["num_channels"][-1] + self.feat_channels = num_channels + + if self.use_head: + self.head_layer = IterativeHead(num_channels_pre_layer, 'bn', + self.freeze_norm, self.norm_decay) + + self.feat_channels = [num_channels[0]] + for i in range(1, len(num_channels)): + self.feat_channels.append(num_channels[i] // 2) + + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def _make_transition_layer(self, + num_channels_pre_layer, + num_channels_cur_layer, + freeze_norm=False, + norm_decay=0.): + num_branches_pre = len(num_channels_pre_layer) + num_branches_cur = len(num_channels_cur_layer) + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + Conv2d( + num_channels_pre_layer[i], + num_channels_pre_layer[i], + kernel_size=3, + stride=1, + padding=1, + groups=num_channels_pre_layer[i], + bias=False), + nn.BatchNorm2D(num_channels_pre_layer[i]), + Conv2d( + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=1, + stride=1, + padding=0, + bias=False, ), + nn.BatchNorm2D(num_channels_cur_layer[i]), + nn.ReLU())) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + conv_downsamples.append( + nn.Sequential( + Conv2d( + num_channels_pre_layer[-1], + num_channels_pre_layer[-1], + groups=num_channels_pre_layer[-1], + kernel_size=3, + stride=2, + padding=1, + bias=False, ), + nn.BatchNorm2D(num_channels_pre_layer[-1]), + Conv2d( + num_channels_pre_layer[-1], + num_channels_cur_layer[i] + if j == i - num_branches_pre else + num_channels_pre_layer[-1], + kernel_size=1, + stride=1, + padding=0, + bias=False, ), + nn.BatchNorm2D(num_channels_cur_layer[i] + if j == i - num_branches_pre else + num_channels_pre_layer[-1]), + nn.ReLU())) + transition_layers.append(nn.Sequential(*conv_downsamples)) + return nn.LayerList(transition_layers) + + def _make_stage(self, + stages_config, + stage_idx, + in_channels, + multiscale_output, + freeze_norm=False, + norm_decay=0.): + num_modules = stages_config["num_modules"][stage_idx] + num_branches = stages_config["num_branches"][stage_idx] + num_blocks = stages_config["num_blocks"][stage_idx] + reduce_ratio = stages_config['reduce_ratios'][stage_idx] + module_type = stages_config['module_type'][stage_idx] + + modules = [] + for i in range(num_modules): + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + modules.append( + LiteHRNetModule( + num_branches, + num_blocks, + in_channels, + reduce_ratio, + module_type, + multiscale_output=reset_multiscale_output, + with_fuse=True, + freeze_norm=freeze_norm, + norm_decay=norm_decay)) + in_channels = modules[-1].in_channels + return nn.Sequential(*modules), in_channels + + def forward(self, x): + x = self.stem(x) + + y_list = [x] + for stage_idx in range(3): + x_list = [] + transition = getattr(self, 'transition{}'.format(stage_idx)) + for j in range(self.stages_config["num_branches"][stage_idx]): + if transition[j] is not None: + if j >= len(y_list): + x_list.append(transition[j](y_list[-1])) + else: + x_list.append(transition[j](y_list[j])) + else: + x_list.append(y_list[j]) + y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list) + + if self.use_head: + y_list = self.head_layer(y_list) + + res = [] + for i, layer in enumerate(y_list): + if i == self.freeze_at: + layer.stop_gradient = True + if i in self.return_idx: + res.append(layer) + return res + + +@manager.BACKBONES.add_component +def Lite_HRNet_18(**kwargs): + model = LiteHRNet(network_type="lite_18", **kwargs) + return model + + +@manager.BACKBONES.add_component +def Lite_HRNet_30(**kwargs): + model = LiteHRNet(network_type="lite_30", **kwargs) + return model + + +@manager.BACKBONES.add_component +def Lite_HRNet_naive(**kwargs): + model = LiteHRNet(network_type="naive", **kwargs) + return model + + +@manager.BACKBONES.add_component +def Lite_HRNet_wider_naive(**kwargs): + model = LiteHRNet(network_type="wider_naive", **kwargs) + return model diff --git a/paddlers/models/ppseg/models/backbones/mix_transformer.py b/paddlers/models/ppseg/models/backbones/mix_transformer.py index 2773ec1..62e4e99 100644 --- a/paddlers/models/ppseg/models/backbones/mix_transformer.py +++ b/paddlers/models/ppseg/models/backbones/mix_transformer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,9 +20,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle.nn.initializer as paddle_init -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.models.backbones.transformer_utils import * +from paddleseg.cvlibs import manager +from paddleseg.utils import utils +from paddleseg.models.backbones.transformer_utils import * class Mlp(nn.Layer): @@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer): def __init__(self, img_size=224, patch_size=16, - in_chans=3, + in_channels=3, num_classes=1000, embed_dims=[64, 128, 256, 512], num_heads=[1, 2, 4, 8], @@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer): img_size=img_size, patch_size=7, stride=4, - in_chans=in_chans, + in_chans=in_channels, embed_dim=embed_dims[0]) self.patch_embed2 = OverlapPatchEmbed( img_size=img_size // 4, diff --git a/paddlers/models/ppseg/models/backbones/mobilenetv2.py b/paddlers/models/ppseg/models/backbones/mobilenetv2.py index c155a05..16cf4e0 100644 --- a/paddlers/models/ppseg/models/backbones/mobilenetv2.py +++ b/paddlers/models/ppseg/models/backbones/mobilenetv2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle +from paddle import ParamAttr import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg import utils +from paddleseg.cvlibs import manager +from paddleseg import utils + +__all__ = [ + "MobileNetV2_x0_25", + "MobileNetV2_x0_5", + "MobileNetV2_x0_75", + "MobileNetV2_x1_0", + "MobileNetV2_x1_5", + "MobileNetV2_x2_0", +] -@manager.BACKBONES.add_component class MobileNetV2(nn.Layer): """ The MobileNetV2 implementation based on PaddlePaddle. @@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer): (https://arxiv.org/abs/1801.04381). Args: - channel_ratio (float, optional): The ratio of channel. Default: 1.0 - min_channel (int, optional): The minimum of channel. Default: 16 + scale (float, optional): The scale of channel. Default: 1.0 + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path or url of pretrained model. Default: None """ - def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None): - super(MobileNetV2, self).__init__() - self.channel_ratio = channel_ratio - self.min_channel = min_channel + def __init__(self, scale=1.0, in_channels=3, pretrained=None): + super().__init__() + self.scale = scale self.pretrained = pretrained + prefix_name = "" - self.stage0 = conv_bn(3, self.depth(32), 3, 2) - - self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1) - - self.stage2 = nn.Sequential( - InvertedResidual(self.depth(16), self.depth(24), 2, 6), - InvertedResidual(self.depth(24), self.depth(24), 1, 6), ) - - self.stage3 = nn.Sequential( - InvertedResidual(self.depth(24), self.depth(32), 2, 6), - InvertedResidual(self.depth(32), self.depth(32), 1, 6), - InvertedResidual(self.depth(32), self.depth(32), 1, 6), ) + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), # x4 + (6, 32, 3, 2), # x8 + (6, 64, 4, 2), + (6, 96, 3, 1), # x16 + (6, 160, 3, 2), + (6, 320, 1, 1), # x32 + ] + self.out_index = [1, 2, 4, 6] - self.stage4 = nn.Sequential( - InvertedResidual(self.depth(32), self.depth(64), 2, 6), - InvertedResidual(self.depth(64), self.depth(64), 1, 6), - InvertedResidual(self.depth(64), self.depth(64), 1, 6), - InvertedResidual(self.depth(64), self.depth(64), 1, 6), ) + self.conv1 = ConvBNLayer( + num_channels=in_channels, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + name=prefix_name + "conv1_1") - self.stage5 = nn.Sequential( - InvertedResidual(self.depth(64), self.depth(96), 1, 6), - InvertedResidual(self.depth(96), self.depth(96), 1, 6), - InvertedResidual(self.depth(96), self.depth(96), 1, 6), ) + self.block_list = [] + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + block = self.add_sublayer( + prefix_name + "conv" + str(i), + sublayer=InvresiBlocks( + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name=prefix_name + "conv" + str(i))) + self.block_list.append(block) + in_c = int(c * scale) - self.stage6 = nn.Sequential( - InvertedResidual(self.depth(96), self.depth(160), 2, 6), - InvertedResidual(self.depth(160), self.depth(160), 1, 6), - InvertedResidual(self.depth(160), self.depth(160), 1, 6), ) - - self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6) + out_channels = [ + bottleneck_params_list[idx][1] for idx in self.out_index + ] + self.feat_channels = [int(c * scale) for c in out_channels] self.init_weight() - def depth(self, channels): - min_channel = min(channels, self.min_channel) - return max(min_channel, int(channels * self.channel_ratio)) - - def forward(self, x): + def forward(self, inputs): feat_list = [] - feature_1_2 = self.stage0(x) - feature_1_2 = self.stage1(feature_1_2) - feature_1_4 = self.stage2(feature_1_2) - feature_1_8 = self.stage3(feature_1_4) - feature_1_16 = self.stage4(feature_1_8) - feature_1_16 = self.stage5(feature_1_16) - feature_1_32 = self.stage6(feature_1_16) - feature_1_32 = self.stage7(feature_1_32) - feat_list.append(feature_1_4) - feat_list.append(feature_1_8) - feat_list.append(feature_1_16) - feat_list.append(feature_1_32) + y = self.conv1(inputs, if_act=True) + for idx, block in enumerate(self.block_list): + y = block(y) + if idx in self.out_index: + feat_list.append(y) + return feat_list def init_weight(self): @@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer): utils.load_entire_model(self, self.pretrained) -def conv_bn(inp, oup, kernel, stride): - return nn.Sequential( - nn.Conv2D( - in_channels=inp, - out_channels=oup, - kernel_size=kernel, +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + name=None, + use_cudnn=True): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs, if_act=True): + y = self._conv(inputs) + y = self._batch_norm(y) + if if_act: + y = F.relu6(y) + return y + + +class InvertedResidualUnit(nn.Layer): + def __init__(self, num_channels, num_in_filter, num_filters, stride, + filter_size, padding, expansion_factor, name): + super(InvertedResidualUnit, self).__init__() + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_expand") + + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, stride=stride, - padding=(kernel - 1) // 2, - bias_attr=False), - nn.BatchNorm2D( - num_features=oup, epsilon=1e-05, momentum=0.1), - nn.ReLU()) - - -class InvertedResidual(nn.Layer): - def __init__(self, inp, oup, stride, expand_ratio, dilation=1): - super(InvertedResidual, self).__init__() - self.stride = stride - assert stride in [1, 2] - self.use_res_connect = self.stride == 1 and inp == oup - - self.conv = nn.Sequential( - nn.Conv2D( - inp, - inp * expand_ratio, - kernel_size=1, - stride=1, - padding=0, - dilation=1, - groups=1, - bias_attr=False), - nn.BatchNorm2D( - num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1), - nn.ReLU(), - nn.Conv2D( - inp * expand_ratio, - inp * expand_ratio, - kernel_size=3, - stride=stride, - padding=dilation, - dilation=dilation, - groups=inp * expand_ratio, - bias_attr=False), - nn.BatchNorm2D( - num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1), - nn.ReLU(), - nn.Conv2D( - inp * expand_ratio, - oup, - kernel_size=1, - stride=1, - padding=0, - dilation=1, - groups=1, - bias_attr=False), - nn.BatchNorm2D( - num_features=oup, epsilon=1e-05, momentum=0.1), ) - - def forward(self, x): - if self.use_res_connect: - return x + self.conv(x) - else: - return self.conv(x) + padding=padding, + num_groups=num_expfilter, + use_cudnn=False, + name=name + "_dwise") + + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_linear") + + def forward(self, inputs, ifshortcut): + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) + if ifshortcut: + y = paddle.add(inputs, y) + return y + + +class InvresiBlocks(nn.Layer): + def __init__(self, in_c, t, c, n, s, name): + super(InvresiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_1") + + self._block_list = [] + for i in range(1, n): + block = self.add_sublayer( + name + "_" + str(i + 1), + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_" + str(i + 1))) + self._block_list.append(block) + + def forward(self, inputs): + y = self._first_block(inputs, ifshortcut=False) + for block in self._block_list: + y = block(y, ifshortcut=True) + return y + + +@manager.BACKBONES.add_component +def MobileNetV2_x0_25(**kwargs): + model = MobileNetV2(scale=0.25, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV2_x0_5(**kwargs): + model = MobileNetV2(scale=0.5, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV2_x0_75(**kwargs): + model = MobileNetV2(scale=0.75, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV2_x1_0(**kwargs): + model = MobileNetV2(scale=1.0, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV2_x1_5(**kwargs): + model = MobileNetV2(scale=1.5, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV2_x2_0(**kwargs): + model = MobileNetV2(scale=2.0, **kwargs) + return model diff --git a/paddlers/models/ppseg/models/backbones/mobilenetv3.py b/paddlers/models/ppseg/models/backbones/mobilenetv3.py index 6436f3e..a8d37db 100644 --- a/paddlers/models/ppseg/models/backbones/mobilenetv3.py +++ b/paddlers/models/ppseg/models/backbones/mobilenetv3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,11 +14,13 @@ import paddle import paddle.nn as nn -import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.regularizer import L2Decay +from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils, logger +from paddleseg.models import layers __all__ = [ "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5", @@ -28,8 +30,92 @@ __all__ = [ "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25" ] - -def make_divisible(v, divisor=8, min_value=None): +MODEL_STAGES_PATTERN = { + "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"], + "MobileNetV3_large": + ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"] +} + +# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively. +# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s. +# k: kernel_size +# exp: middle channel number in depthwise block +# c: output channel number in depthwise block +# se: whether to use SE block +# act: which activation to use +# s: stride in depthwise block +# d: dilation rate in depthwise block +NET_CONFIG = { + "large": [ + # k, exp, c, se, act, s + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], # x4 + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], # x8 + [3, 240, 80, False, "hardswish", 2], + [3, 200, 80, False, "hardswish", 1], + [3, 184, 80, False, "hardswish", 1], + [3, 184, 80, False, "hardswish", 1], + [3, 480, 112, True, "hardswish", 1], + [3, 672, 112, True, "hardswish", 1], # x16 + [5, 672, 160, True, "hardswish", 2], + [5, 960, 160, True, "hardswish", 1], + [5, 960, 160, True, "hardswish", 1], # x32 + ], + "small": [ + # k, exp, c, se, act, s + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hardswish", 2], + [5, 240, 40, True, "hardswish", 1], + [5, 240, 40, True, "hardswish", 1], + [5, 120, 48, True, "hardswish", 1], + [5, 144, 48, True, "hardswish", 1], + [5, 288, 96, True, "hardswish", 2], + [5, 576, 96, True, "hardswish", 1], + [5, 576, 96, True, "hardswish", 1], + ], + "large_os8": [ + # k, exp, c, se, act, s, {d} + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], # x4 + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], # x8 + [3, 240, 80, False, "hardswish", 1], + [3, 200, 80, False, "hardswish", 1, 2], + [3, 184, 80, False, "hardswish", 1, 2], + [3, 184, 80, False, "hardswish", 1, 2], + [3, 480, 112, True, "hardswish", 1, 2], + [3, 672, 112, True, "hardswish", 1, 2], + [5, 672, 160, True, "hardswish", 1, 2], + [5, 960, 160, True, "hardswish", 1, 4], + [5, 960, 160, True, "hardswish", 1, 4], + ], + "small_os8": [ + # k, exp, c, se, act, s, {d} + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hardswish", 1], + [5, 240, 40, True, "hardswish", 1, 2], + [5, 240, 40, True, "hardswish", 1, 2], + [5, 120, 48, True, "hardswish", 1, 2], + [5, 144, 48, True, "hardswish", 1, 2], + [5, 288, 96, True, "hardswish", 1, 2], + [5, 576, 96, True, "hardswish", 1, 4], + [5, 576, 96, True, "hardswish", 1, 4], + ] +} + +OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]} + + +def _make_divisible(v, divisor=8, min_value=None): if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) @@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None): return new_v -class MobileNetV3(nn.Layer): - """ - The MobileNetV3 implementation based on PaddlePaddle. +def _create_act(act): + if act == "hardswish": + return nn.Hardswish() + elif act == "relu": + return nn.ReLU() + elif act is None: + return None + else: + raise RuntimeError( + "The activation function is not supported: {}".format(act)) - The original article refers to Jingdong - Andrew Howard, et, al. "Searching for MobileNetV3" - (https://arxiv.org/pdf/1905.02244.pdf). +class MobileNetV3(nn.Layer): + """ + MobileNetV3 Args: - pretrained (str, optional): The path of pretrained model. - scale (float, optional): The scale of channels . Default: 1.0. - model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'. - output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None. - + config: list. MobileNetV3 depthwise blocks config. + in_channels (int, optional): The channels of input image. Default: 3. + scale: float=1.0. The coefficient that controls the size of network parameters. + Returns: + model: nn.Layer. Specific MobileNetV3 model depends on args. """ def __init__(self, - pretrained=None, + config, + stages_pattern, + out_index, + in_channels=3, scale=1.0, - model_name="small", - output_stride=None): - super(MobileNetV3, self).__init__() + pretrained=None): + super().__init__() + self.cfg = config + self.out_index = out_index + self.scale = scale + self.pretrained = pretrained inplanes = 16 - if model_name == "large": - self.cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, "relu", 1], - [3, 64, 24, False, "relu", 2], - [3, 72, 24, False, "relu", 1], # output 1 -> out_index=2 - [5, 72, 40, True, "relu", 2], - [5, 120, 40, True, "relu", 1], - [5, 120, 40, True, "relu", 1], # output 2 -> out_index=5 - [3, 240, 80, False, "hard_swish", 2], - [3, 200, 80, False, "hard_swish", 1], - [3, 184, 80, False, "hard_swish", 1], - [3, 184, 80, False, "hard_swish", 1], - [3, 480, 112, True, "hard_swish", 1], - [3, 672, 112, True, "hard_swish", - 1], # output 3 -> out_index=11 - [5, 672, 160, True, "hard_swish", 2], - [5, 960, 160, True, "hard_swish", 1], - [5, 960, 160, True, "hard_swish", - 1], # output 3 -> out_index=14 - ] - self.out_indices = [2, 5, 11, 14] - self.feat_channels = [ - make_divisible(i * scale) for i in [24, 40, 112, 160] - ] - - self.cls_ch_squeeze = 960 - self.cls_ch_expand = 1280 - elif model_name == "small": - self.cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, "relu", 2], # output 1 -> out_index=0 - [3, 72, 24, False, "relu", 2], - [3, 88, 24, False, "relu", 1], # output 2 -> out_index=3 - [5, 96, 40, True, "hard_swish", 2], - [5, 240, 40, True, "hard_swish", 1], - [5, 240, 40, True, "hard_swish", 1], - [5, 120, 48, True, "hard_swish", 1], - [5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7 - [5, 288, 96, True, "hard_swish", 2], - [5, 576, 96, True, "hard_swish", 1], - [5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10 - ] - self.out_indices = [0, 3, 7, 10] - self.feat_channels = [ - make_divisible(i * scale) for i in [16, 24, 48, 96] - ] - - self.cls_ch_squeeze = 576 - self.cls_ch_expand = 1280 - else: - raise NotImplementedError( - "mode[{}_model] is not implemented!".format(model_name)) - - ################################################### - # modify stride and dilation based on output_stride - self.dilation_cfg = [1] * len(self.cfg) - self.modify_bottle_params(output_stride=output_stride) - ################################################### - - self.conv1 = ConvBNLayer( - in_c=3, - out_c=make_divisible(inplanes * scale), + + self.conv = ConvBNLayer( + in_c=in_channels, + out_c=_make_divisible(inplanes * self.scale), filter_size=3, stride=2, padding=1, num_groups=1, if_act=True, - act="hard_swish") - - self.block_list = [] - - inplanes = make_divisible(inplanes * scale) - for i, (k, exp, c, se, nl, s) in enumerate(self.cfg): - ###################################### - # add dilation rate - dilation_rate = self.dilation_cfg[i] - ###################################### - self.block_list.append( - ResidualUnit( - in_c=inplanes, - mid_c=make_divisible(scale * exp), - out_c=make_divisible(scale * c), - filter_size=k, - stride=s, - dilation=dilation_rate, - use_se=se, - act=nl, - name="conv" + str(i + 2))) - self.add_sublayer( - sublayer=self.block_list[-1], name="conv" + str(i + 2)) - inplanes = make_divisible(scale * c) - - self.pretrained = pretrained + act="hardswish") + self.blocks = nn.Sequential(*[ + ResidualUnit( + in_c=_make_divisible(inplanes * self.scale if i == 0 else + self.cfg[i - 1][2] * self.scale), + mid_c=_make_divisible(self.scale * exp), + out_c=_make_divisible(self.scale * c), + filter_size=k, + stride=s, + use_se=se, + act=act, + dilation=td[0] if td else 1) + for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg) + ]) + + out_channels = [config[idx][2] for idx in self.out_index] + self.feat_channels = [ + _make_divisible(self.scale * c) for c in out_channels + ] + + self.init_res(stages_pattern) self.init_weight() - def modify_bottle_params(self, output_stride=None): - - if output_stride is not None and output_stride % 2 != 0: - raise ValueError("output stride must to be even number") - if output_stride is not None: - stride = 2 - rate = 1 - for i, _cfg in enumerate(self.cfg): - stride = stride * _cfg[-1] - if stride > output_stride: - rate = rate * _cfg[-1] - self.cfg[i][-1] = 1 + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def init_res(self, stages_pattern, return_patterns=None, + return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + logger.warning(msg) + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min( + return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + logger.warning(msg) + return_stages = [ + val for val in return_stages + if val >= 0 and val < len(stages_pattern) + ] + return_patterns = [stages_pattern[i] for i in return_stages] - self.dilation_cfg[i] = rate + def forward(self, x): + x = self.conv(x) - def forward(self, inputs, label=None): - x = self.conv1(inputs) - # A feature list saves each downsampling feature. feat_list = [] - for i, block in enumerate(self.block_list): + for idx, block in enumerate(self.blocks): x = block(x) - if i in self.out_indices: + if idx in self.out_index: feat_list.append(x) return feat_list - def init_weight(self): - if self.pretrained is not None: - utils.load_pretrained_model(self, self.pretrained) - class ConvBNLayer(nn.Layer): def __init__(self, @@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer): filter_size, stride, padding, - dilation=1, num_groups=1, if_act=True, - act=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act + act=None, + dilation=1): + super().__init__() - self.conv = nn.Conv2D( + self.conv = Conv2D( in_channels=in_c, out_channels=out_c, kernel_size=filter_size, stride=stride, padding=padding, - dilation=dilation, groups=num_groups, - bias_attr=False) - self.bn = layers.SyncBatchNorm( - num_features=out_c, - weight_attr=paddle.ParamAttr( - regularizer=paddle.regularizer.L2Decay(0.0)), - bias_attr=paddle.ParamAttr( - regularizer=paddle.regularizer.L2Decay(0.0))) - self._act_op = layers.Activation(act='hardswish') + bias_attr=False, + dilation=dilation) + self.bn = BatchNorm( + num_channels=out_c, + act=None, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.if_act = if_act + self.act = _create_act(act) def forward(self, x): x = self.conv(x) x = self.bn(x) if self.if_act: - x = self._act_op(x) + x = self.act(x) return x @@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer): filter_size, stride, use_se, - dilation=1, act=None, - name=''): - super(ResidualUnit, self).__init__() + dilation=1): + super().__init__() self.if_shortcut = stride == 1 and in_c == out_c self.if_se = use_se @@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer): padding=0, if_act=True, act=act) - self.bottleneck_conv = ConvBNLayer( in_c=mid_c, out_c=mid_c, filter_size=filter_size, stride=stride, - padding='same', - dilation=dilation, + padding=int((filter_size - 1) // 2) * dilation, num_groups=mid_c, if_act=True, - act=act) + act=act, + dilation=dilation) if self.if_se: - self.mid_se = SEModule(mid_c, name=name + "_se") + self.mid_se = SEModule(mid_c) self.linear_conv = ConvBNLayer( in_c=mid_c, out_c=out_c, @@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer): padding=0, if_act=False, act=None) - self.dilation = dilation - def forward(self, inputs): - x = self.expand_conv(inputs) + def forward(self, x): + identity = x + x = self.expand_conv(x) x = self.bottleneck_conv(x) if self.if_se: x = self.mid_se(x) x = self.linear_conv(x) if self.if_shortcut: - x = inputs + x + x = paddle.add(identity, x) return x +# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid +class Hardsigmoid(nn.Layer): + def __init__(self, slope=0.2, offset=0.5): + super().__init__() + self.slope = slope + self.offset = offset + + def forward(self, x): + return nn.functional.hardsigmoid( + x, slope=self.slope, offset=self.offset) + + class SEModule(nn.Layer): - def __init__(self, channel, reduction=4, name=""): - super(SEModule, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.conv1 = nn.Conv2D( + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) - self.conv2 = nn.Conv2D( + self.relu = nn.ReLU() + self.conv2 = Conv2D( in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5) - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hardsigmoid(outputs) - return paddle.multiply(x=inputs, y=outputs) + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + return paddle.multiply(x=identity, y=x) +@manager.BACKBONES.add_component def MobileNetV3_small_x0_35(**kwargs): - model = MobileNetV3(model_name="small", scale=0.35, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=0.35, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["small"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_small_x0_5(**kwargs): - model = MobileNetV3(model_name="small", scale=0.5, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=0.5, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["small"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_small_x0_75(**kwargs): - model = MobileNetV3(model_name="small", scale=0.75, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=0.75, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["small"], + **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_small_x1_0(**kwargs): - model = MobileNetV3(model_name="small", scale=1.0, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["small"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_small_x1_25(**kwargs): - model = MobileNetV3(model_name="small", scale=1.25, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=1.25, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["small"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_large_x0_35(**kwargs): - model = MobileNetV3(model_name="large", scale=0.35, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=0.35, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["large"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_large_x0_5(**kwargs): - model = MobileNetV3(model_name="large", scale=0.5, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=0.5, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + out_index=OUT_INDEX["large"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_large_x0_75(**kwargs): - model = MobileNetV3(model_name="large", scale=0.75, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=0.75, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + out_index=OUT_INDEX["large"], + **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x1_0(**kwargs): - model = MobileNetV3(model_name="large", scale=1.0, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + out_index=OUT_INDEX["large"], + **kwargs) return model +@manager.BACKBONES.add_component def MobileNetV3_large_x1_25(**kwargs): - model = MobileNetV3(model_name="large", scale=1.25, **kwargs) + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=1.25, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + out_index=OUT_INDEX["large"], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV3_large_x1_0_os8(**kwargs): + model = MobileNetV3( + config=NET_CONFIG["large_os8"], + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + out_index=OUT_INDEX["large"], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV3_small_x1_0_os8(**kwargs): + model = MobileNetV3( + config=NET_CONFIG["small_os8"], + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + out_index=OUT_INDEX["small"], + **kwargs) return model diff --git a/paddlers/models/ppseg/models/backbones/resnet_vd.py b/paddlers/models/ppseg/models/backbones/resnet_vd.py index 2122bf5..0820e61 100644 --- a/paddlers/models/ppseg/models/backbones/resnet_vd.py +++ b/paddlers/models/ppseg/models/backbones/resnet_vd.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils __all__ = [ "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd" @@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer): layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path of pretrained model. """ def __init__(self, - input_channel=3, layers=50, output_stride=8, multi_grid=(1, 1, 1), + in_channels=3, pretrained=None, data_format='NCHW'): super(ResNet_vd, self).__init__() @@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer): dilation_dict = {3: 2} self.conv1_1 = ConvBNLayer( - in_channels=input_channel, + in_channels=in_channels, out_channels=32, kernel_size=3, stride=2, diff --git a/paddlers/models/ppseg/models/backbones/shufflenetv2.py b/paddlers/models/ppseg/models/backbones/shufflenetv2.py new file mode 100644 index 0000000..0921e73 --- /dev/null +++ b/paddlers/models/ppseg/models/backbones/shufflenetv2.py @@ -0,0 +1,315 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import ParamAttr, reshape, transpose, concat, split +from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear +from paddle.nn.initializer import KaimingNormal +from paddle.nn.functional import swish + +from paddleseg.cvlibs import manager +from paddleseg.utils import utils, logger + +__all__ = [ + 'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', + 'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0', + 'ShuffleNetV2_swish' +] + + +def channel_shuffle(x, groups): + x_shape = paddle.shape(x) + batch_size, height, width = x_shape[0], x_shape[2], x_shape[3] + num_channels = x.shape[1] + channels_per_group = num_channels // groups + + # reshape + x = reshape( + x=x, shape=[batch_size, groups, channels_per_group, height, width]) + + # transpose + x = transpose(x=x, perm=[0, 2, 1, 3, 4]) + + # flatten + x = reshape(x=x, shape=[batch_size, num_channels, height, width]) + + return x + + +class ConvBNLayer(Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups=1, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr( + initializer=KaimingNormal(), name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + out_channels, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + act=act, + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class InvertedResidual(Layer): + def __init__(self, in_channels, out_channels, stride, act="relu", + name=None): + super(InvertedResidual, self).__init__() + self._conv_pw = ConvBNLayer( + in_channels=in_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv1') + self._conv_dw = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=stride, + padding=1, + groups=out_channels // 2, + act=None, + name='stage_' + name + '_conv2') + self._conv_linear = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv3') + + def forward(self, inputs): + x1, x2 = split( + inputs, + num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], + axis=1) + x2 = self._conv_pw(x2) + x2 = self._conv_dw(x2) + x2 = self._conv_linear(x2) + out = concat([x1, x2], axis=1) + return channel_shuffle(out, 2) + + +class InvertedResidualDS(Layer): + def __init__(self, in_channels, out_channels, stride, act="relu", + name=None): + super(InvertedResidualDS, self).__init__() + + # branch1 + self._conv_dw_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + act=None, + name='stage_' + name + '_conv4') + self._conv_linear_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv5') + # branch2 + self._conv_pw_2 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv1') + self._conv_dw_2 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=stride, + padding=1, + groups=out_channels // 2, + act=None, + name='stage_' + name + '_conv2') + self._conv_linear_2 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv3') + + def forward(self, inputs): + x1 = self._conv_dw_1(inputs) + x1 = self._conv_linear_1(x1) + x2 = self._conv_pw_2(inputs) + x2 = self._conv_dw_2(x2) + x2 = self._conv_linear_2(x2) + out = concat([x1, x2], axis=1) + + return channel_shuffle(out, 2) + + +class ShuffleNet(Layer): + def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None): + super(ShuffleNet, self).__init__() + self.scale = scale + self.pretrained = pretrained + stage_repeats = [4, 8, 4] + + if scale == 0.25: + stage_out_channels = [-1, 24, 24, 48, 96, 512] + elif scale == 0.33: + stage_out_channels = [-1, 24, 32, 64, 128, 512] + elif scale == 0.5: + stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif scale == 1.0: + stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif scale == 1.5: + stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif scale == 2.0: + stage_out_channels = [-1, 24, 224, 488, 976, 2048] + else: + raise NotImplementedError("This scale size:[" + str(scale) + + "] is not implemented!") + + self.out_index = [3, 11, 15] + self.feat_channels = stage_out_channels[1:5] + + # 1. conv1 + self._conv1 = ConvBNLayer( + in_channels=in_channels, + out_channels=stage_out_channels[1], + kernel_size=3, + stride=2, + padding=1, + act=act, + name='stage1_conv') + self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + # 2. bottleneck sequences + self._block_list = [] + for stage_id, num_repeat in enumerate(stage_repeats): + for i in range(num_repeat): + if i == 0: + block = self.add_sublayer( + name=str(stage_id + 2) + '_' + str(i + 1), + sublayer=InvertedResidualDS( + in_channels=stage_out_channels[stage_id + 1], + out_channels=stage_out_channels[stage_id + 2], + stride=2, + act=act, + name=str(stage_id + 2) + '_' + str(i + 1))) + else: + block = self.add_sublayer( + name=str(stage_id + 2) + '_' + str(i + 1), + sublayer=InvertedResidual( + in_channels=stage_out_channels[stage_id + 2], + out_channels=stage_out_channels[stage_id + 2], + stride=1, + act=act, + name=str(stage_id + 2) + '_' + str(i + 1))) + self._block_list.append(block) + + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def forward(self, inputs): + feat_list = [] + + y = self._conv1(inputs) + y = self._max_pool(y) + feat_list.append(y) + + for idx, inv in enumerate(self._block_list): + y = inv(y) + if idx in self.out_index: + feat_list.append(y) + return feat_list + + +@manager.BACKBONES.add_component +def ShuffleNetV2_x0_25(**kwargs): + model = ShuffleNet(scale=0.25, **kwargs) + return model + + +@manager.BACKBONES.add_component +def ShuffleNetV2_x0_33(**kwargs): + model = ShuffleNet(scale=0.33, **kwargs) + return model + + +@manager.BACKBONES.add_component +def ShuffleNetV2_x0_5(**kwargs): + model = ShuffleNet(scale=0.5, **kwargs) + return model + + +@manager.BACKBONES.add_component +def ShuffleNetV2_x1_0(**kwargs): + model = ShuffleNet(scale=1.0, **kwargs) + return model + + +@manager.BACKBONES.add_component +def ShuffleNetV2_x1_5(**kwargs): + model = ShuffleNet(scale=1.5, **kwargs) + return model + + +@manager.BACKBONES.add_component +def ShuffleNetV2_x2_0(**kwargs): + model = ShuffleNet(scale=2.0, **kwargs) + return model + + +@manager.BACKBONES.add_component +def ShuffleNetV2_swish(**kwargs): + model = ShuffleNet(scale=1.0, act="swish", **kwargs) + return model diff --git a/paddlers/models/ppseg/models/backbones/stdcnet.py b/paddlers/models/ppseg/models/backbones/stdcnet.py index 4135722..7dc89cf 100644 --- a/paddlers/models/ppseg/models/backbones/stdcnet.py +++ b/paddlers/models/ppseg/models/backbones/stdcnet.py @@ -17,9 +17,9 @@ import math import paddle import paddle.nn as nn -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm +from paddleseg.utils import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models.layers.layer_libs import SyncBatchNorm __all__ = ["STDC1", "STDC2"] @@ -37,9 +37,9 @@ class STDCNet(nn.Layer): layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3]. block_num(int,optional): block_num of features block. Default: 4. type(str,optional): feature fusion method "cat"/"add". Default: "cat". - num_classes(int, optional): class number for image classification. Default: 1000. - dropout(float,optional): dropout ratio. if >0,use dropout ratio. Default: 0.20. - use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False. + relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective + learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0. + in_channels (int, optional): The channels of input image. Default: 3. pretrained(str, optional): the path of pretrained model. """ @@ -48,34 +48,18 @@ class STDCNet(nn.Layer): layers=[4, 5, 3], block_num=4, type="cat", - num_classes=1000, - dropout=0.20, - use_conv_last=False, + relative_lr=1.0, + in_channels=3, pretrained=None): super(STDCNet, self).__init__() if type == "cat": block = CatBottleneck elif type == "add": block = AddBottleneck - self.use_conv_last = use_conv_last - self.features = self._make_layers(base, layers, block_num, block) - self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1) - - if (layers == [4, 5, 3]): #stdc1446 - self.x2 = nn.Sequential(self.features[:1]) - self.x4 = nn.Sequential(self.features[1:2]) - self.x8 = nn.Sequential(self.features[2:6]) - self.x16 = nn.Sequential(self.features[6:11]) - self.x32 = nn.Sequential(self.features[11:]) - elif (layers == [2, 2, 2]): #stdc813 - self.x2 = nn.Sequential(self.features[:1]) - self.x4 = nn.Sequential(self.features[1:2]) - self.x8 = nn.Sequential(self.features[2:4]) - self.x16 = nn.Sequential(self.features[4:6]) - self.x32 = nn.Sequential(self.features[6:]) - else: - raise NotImplementedError( - "model with layers:{} is not implemented!".format(layers)) + self.layers = layers + self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16] + self.features = self._make_layers(in_channels, base, layers, block_num, + block, relative_lr) self.pretrained = pretrained self.init_weight() @@ -84,32 +68,42 @@ class STDCNet(nn.Layer): """ forward function for feature extract. """ - feat2 = self.x2(x) - feat4 = self.x4(feat2) - feat8 = self.x8(feat4) - feat16 = self.x16(feat8) - feat32 = self.x32(feat16) - if self.use_conv_last: - feat32 = self.conv_last(feat32) - return feat2, feat4, feat8, feat16, feat32 - - def _make_layers(self, base, layers, block_num, block): + out_feats = [] + + x = self.features[0](x) + out_feats.append(x) + x = self.features[1](x) + out_feats.append(x) + + idx = [[2, 2 + self.layers[0]], + [2 + self.layers[0], 2 + sum(self.layers[0:2])], + [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]] + for start_idx, end_idx in idx: + for i in range(start_idx, end_idx): + x = self.features[i](x) + out_feats.append(x) + + return out_feats + + def _make_layers(self, in_channels, base, layers, block_num, block, + relative_lr): features = [] - features += [ConvBNRelu(3, base // 2, 3, 2)] - features += [ConvBNRelu(base // 2, base, 3, 2)] + features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)] + features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)] for i, layer in enumerate(layers): for j in range(layer): if i == 0 and j == 0: - features.append(block(base, base * 4, block_num, 2)) + features.append( + block(base, base * 4, block_num, 2, relative_lr)) elif j == 0: features.append( block(base * int(math.pow(2, i + 1)), base * int( - math.pow(2, i + 2)), block_num, 2)) + math.pow(2, i + 2)), block_num, 2, relative_lr)) else: features.append( block(base * int(math.pow(2, i + 2)), base * int( - math.pow(2, i + 2)), block_num, 1)) + math.pow(2, i + 2)), block_num, 1, relative_lr)) return nn.Sequential(*features) @@ -125,16 +119,24 @@ class STDCNet(nn.Layer): class ConvBNRelu(nn.Layer): - def __init__(self, in_planes, out_planes, kernel=3, stride=1): + def __init__(self, + in_planes, + out_planes, + kernel=3, + stride=1, + relative_lr=1.0): super(ConvBNRelu, self).__init__() + param_attr = paddle.ParamAttr(learning_rate=relative_lr) self.conv = nn.Conv2D( in_planes, out_planes, kernel_size=kernel, stride=stride, padding=kernel // 2, + weight_attr=param_attr, bias_attr=False) - self.bn = SyncBatchNorm(out_planes, data_format='NCHW') + self.bn = nn.BatchNorm2D( + out_planes, weight_attr=param_attr, bias_attr=param_attr) self.relu = nn.ReLU() def forward(self, x): @@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer): class AddBottleneck(nn.Layer): - def __init__(self, in_planes, out_planes, block_num=3, stride=1): + def __init__(self, + in_planes, + out_planes, + block_num=3, + stride=1, + relative_lr=1.0): super(AddBottleneck, self).__init__() assert block_num > 1, "block number should be larger than 1." self.conv_list = nn.LayerList() self.stride = stride + param_attr = paddle.ParamAttr(learning_rate=relative_lr) if stride == 2: self.avd_layer = nn.Sequential( nn.Conv2D( @@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer): stride=2, padding=1, groups=out_planes // 2, + weight_attr=param_attr, bias_attr=False), - nn.BatchNorm2D(out_planes // 2), ) + nn.BatchNorm2D( + out_planes // 2, + weight_attr=param_attr, + bias_attr=param_attr), ) self.skip = nn.Sequential( nn.Conv2D( in_planes, @@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer): stride=2, padding=1, groups=in_planes, + weight_attr=param_attr, bias_attr=False), - nn.BatchNorm2D(in_planes), + nn.BatchNorm2D( + in_planes, weight_attr=param_attr, bias_attr=param_attr), nn.Conv2D( - in_planes, out_planes, kernel_size=1, bias_attr=False), - nn.BatchNorm2D(out_planes), ) + in_planes, + out_planes, + kernel_size=1, + bias_attr=False, + weight_attr=param_attr), + nn.BatchNorm2D( + out_planes, weight_attr=param_attr, bias_attr=param_attr), ) stride = 1 for idx in range(block_num): if idx == 0: self.conv_list.append( ConvBNRelu( - in_planes, out_planes // 2, kernel=1)) + in_planes, + out_planes // 2, + kernel=1, + relative_lr=relative_lr)) elif idx == 1 and block_num == 2: self.conv_list.append( ConvBNRelu( - out_planes // 2, out_planes // 2, stride=stride)) + out_planes // 2, + out_planes // 2, + stride=stride, + relative_lr=relative_lr)) elif idx == 1 and block_num > 2: self.conv_list.append( ConvBNRelu( - out_planes // 2, out_planes // 4, stride=stride)) + out_planes // 2, + out_planes // 4, + stride=stride, + relative_lr=relative_lr)) elif idx < block_num - 1: self.conv_list.append( - ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes - // int(math.pow(2, idx + 1)))) + ConvBNRelu( + out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx + 1)), + relative_lr=relative_lr)) else: self.conv_list.append( - ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes - // int(math.pow(2, idx)))) + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx))), + relative_lr=relative_lr) def forward(self, x): out_list = [] @@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer): class CatBottleneck(nn.Layer): - def __init__(self, in_planes, out_planes, block_num=3, stride=1): + def __init__(self, + in_planes, + out_planes, + block_num=3, + stride=1, + relative_lr=1.0): super(CatBottleneck, self).__init__() assert block_num > 1, "block number should be larger than 1." self.conv_list = nn.LayerList() self.stride = stride + param_attr = paddle.ParamAttr(learning_rate=relative_lr) if stride == 2: self.avd_layer = nn.Sequential( nn.Conv2D( @@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer): stride=2, padding=1, groups=out_planes // 2, + weight_attr=param_attr, bias_attr=False), - nn.BatchNorm2D(out_planes // 2), ) + nn.BatchNorm2D( + out_planes // 2, + weight_attr=param_attr, + bias_attr=param_attr), ) self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1) stride = 1 @@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer): if idx == 0: self.conv_list.append( ConvBNRelu( - in_planes, out_planes // 2, kernel=1)) + in_planes, + out_planes // 2, + kernel=1, + relative_lr=relative_lr)) elif idx == 1 and block_num == 2: self.conv_list.append( ConvBNRelu( - out_planes // 2, out_planes // 2, stride=stride)) + out_planes // 2, + out_planes // 2, + stride=stride, + relative_lr=relative_lr)) elif idx == 1 and block_num > 2: self.conv_list.append( ConvBNRelu( - out_planes // 2, out_planes // 4, stride=stride)) + out_planes // 2, + out_planes // 4, + stride=stride, + relative_lr=relative_lr)) elif idx < block_num - 1: self.conv_list.append( - ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes - // int(math.pow(2, idx + 1)))) + ConvBNRelu( + out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx + 1)), + relative_lr=relative_lr)) else: self.conv_list.append( - ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes - // int(math.pow(2, idx)))) + ConvBNRelu( + out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx)), + relative_lr=relative_lr)) def forward(self, x): out_list = [] diff --git a/paddlers/models/ppseg/models/backbones/swin_transformer.py b/paddlers/models/ppseg/models/backbones/swin_transformer.py index eae23d3..ab5cb53 100644 --- a/paddlers/models/ppseg/models/backbones/swin_transformer.py +++ b/paddlers/models/ppseg/models/backbones/swin_transformer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,9 +17,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.models.backbones.transformer_utils import * +from paddleseg.cvlibs import manager +from paddleseg.utils import utils +from paddleseg.models.backbones.transformer_utils import * class Mlp(nn.Layer): @@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer): Args: pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224. patch_size (int | tuple(int)): Patch size. Default: 4. - in_chans (int): Number of input image channels. Default: 3. + in_channels (int): Number of input image channels. Default: 3. embed_dim (int): Number of linear projection output channels. Default: 96. depths (tuple[int]): Depths of each Swin Transformer stage. num_heads (tuple[int]): Number of attention head of each stage. @@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer): def __init__(self, pretrain_img_size=224, patch_size=4, - in_chans=3, + in_channels=3, embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], @@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer): # split image into non-overlapping patches self.patch_embed = PatchEmbed( patch_size=patch_size, - in_chans=in_chans, + in_chans=in_channels, embed_dim=embed_dim, norm_layer=norm_layer if self.patch_norm else None) diff --git a/paddlers/models/ppseg/models/backbones/top_transformer.py b/paddlers/models/ppseg/models/backbones/top_transformer.py new file mode 100644 index 0000000..7f13ae5 --- /dev/null +++ b/paddlers/models/ppseg/models/backbones/top_transformer.py @@ -0,0 +1,716 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT +""" + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager +from paddleseg import utils +from paddleseg.models.backbones.transformer_utils import Identity, DropPath + +__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"] + + +def make_divisible(val, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(val + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * val: + new_v += divisor + return new_v + + +class HSigmoid(nn.Layer): + def __init__(self, inplace=True): + super().__init__() + self.relu = nn.ReLU6() + + def forward(self, x): + return self.relu(x + 3) / 6 + + +class Conv2DBN(nn.Layer): + def __init__(self, + in_channels, + out_channels, + ks=1, + stride=1, + pad=0, + dilation=1, + groups=1, + bn_weight_init=1, + lr_mult=1.0): + super().__init__() + conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult) + self.c = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=ks, + stride=stride, + padding=pad, + dilation=dilation, + groups=groups, + weight_attr=conv_weight_attr, + bias_attr=False) + bn_weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Constant(bn_weight_init), + learning_rate=lr_mult) + bn_bias_attr = paddle.ParamAttr( + initializer=nn.initializer.Constant(0), learning_rate=lr_mult) + self.bn = nn.BatchNorm2D( + out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr) + + def forward(self, inputs): + out = self.c(inputs) + out = self.bn(out) + return out + + +class ConvBNAct(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + norm=nn.BatchNorm2D, + act=None, + bias_attr=False, + lr_mult=1.0): + super(ConvBNAct, self).__init__() + param_attr = paddle.ParamAttr(learning_rate=lr_mult) + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=param_attr, + bias_attr=param_attr if bias_attr else False) + self.act = act() if act is not None else Identity() + self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \ + if norm is not None else Identity() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.act(x) + return x + + +class MLP(nn.Layer): + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.ReLU, + drop=0., + lr_mult=1.0): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult) + param_attr = paddle.ParamAttr(learning_rate=lr_mult) + self.dwconv = nn.Conv2D( + hidden_features, + hidden_features, + 3, + 1, + 1, + groups=hidden_features, + weight_attr=param_attr, + bias_attr=param_attr) + self.act = act_layer() + self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.dwconv(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class InvertedResidual(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + expand_ratio, + activations=None, + lr_mult=1.0): + super(InvertedResidual, self).__init__() + assert stride in [1, 2], "The stride should be 1 or 2." + + if activations is None: + activations = nn.ReLU + + hidden_dim = int(round(in_channels * expand_ratio)) + self.use_res_connect = stride == 1 and in_channels == out_channels + + layers = [] + if expand_ratio != 1: + layers.append( + Conv2DBN( + in_channels, hidden_dim, ks=1, lr_mult=lr_mult)) + layers.append(activations()) + layers.extend([ + Conv2DBN( + hidden_dim, + hidden_dim, + ks=kernel_size, + stride=stride, + pad=kernel_size // 2, + groups=hidden_dim, + lr_mult=lr_mult), activations(), Conv2DBN( + hidden_dim, out_channels, ks=1, lr_mult=lr_mult) + ]) + self.conv = nn.Sequential(*layers) + self.out_channels = out_channels + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class TokenPyramidModule(nn.Layer): + def __init__(self, + cfgs, + out_indices, + in_channels=3, + inp_channel=16, + activation=nn.ReLU, + width_mult=1., + lr_mult=1.): + super().__init__() + self.out_indices = out_indices + + self.stem = nn.Sequential( + Conv2DBN( + in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult), + activation()) + + self.layers = [] + for i, (k, t, c, s) in enumerate(cfgs): + output_channel = make_divisible(c * width_mult, 8) + exp_size = t * inp_channel + exp_size = make_divisible(exp_size * width_mult, 8) + layer_name = 'layer{}'.format(i + 1) + layer = InvertedResidual( + inp_channel, + output_channel, + kernel_size=k, + stride=s, + expand_ratio=t, + activations=activation, + lr_mult=lr_mult) + self.add_sublayer(layer_name, layer) + self.layers.append(layer_name) + inp_channel = output_channel + + def forward(self, x): + outs = [] + x = self.stem(x) + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + +class Attention(nn.Layer): + def __init__(self, + dim, + key_dim, + num_heads, + attn_ratio=4, + activation=None, + lr_mult=1.0): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + + self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult) + self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult) + self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult) + + self.proj = nn.Sequential( + activation(), + Conv2DBN( + self.dh, dim, bn_weight_init=0, lr_mult=lr_mult)) + + def forward(self, x): + x_shape = paddle.shape(x) + H, W = x_shape[2], x_shape[3] + + qq = self.to_q(x).reshape( + [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2]) + kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1]) + vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose( + [0, 1, 3, 2]) + + attn = paddle.matmul(qq, kk) + attn = F.softmax(attn, axis=-1) + + xx = paddle.matmul(attn, vv) + + xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W]) + xx = self.proj(xx) + return xx + + +class Block(nn.Layer): + def __init__(self, + dim, + key_dim, + num_heads, + mlp_ratios=4., + attn_ratio=2., + drop=0., + drop_path=0., + act_layer=nn.ReLU, + lr_mult=1.0): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.mlp_ratios = mlp_ratios + + self.attn = Attention( + dim, + key_dim=key_dim, + num_heads=num_heads, + attn_ratio=attn_ratio, + activation=act_layer, + lr_mult=lr_mult) + + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + mlp_hidden_dim = int(dim * mlp_ratios) + self.mlp = MLP(in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + lr_mult=lr_mult) + + def forward(self, x): + h = x + x = self.attn(x) + x = self.drop_path(x) + x = h + x + + h = x + x = self.mlp(x) + x = self.drop_path(x) + x = x + h + return x + + +class BasicLayer(nn.Layer): + def __init__(self, + block_num, + embedding_dim, + key_dim, + num_heads, + mlp_ratios=4., + attn_ratio=2., + drop=0., + attn_drop=0., + drop_path=0., + act_layer=None, + lr_mult=1.0): + super().__init__() + self.block_num = block_num + + self.transformer_blocks = nn.LayerList() + for i in range(self.block_num): + self.transformer_blocks.append( + Block( + embedding_dim, + key_dim=key_dim, + num_heads=num_heads, + mlp_ratios=mlp_ratios, + attn_ratio=attn_ratio, + drop=drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) else drop_path, + act_layer=act_layer, + lr_mult=lr_mult)) + + def forward(self, x): + # token * N + for i in range(self.block_num): + x = self.transformer_blocks[i](x) + return x + + +class PyramidPoolAgg(nn.Layer): + def __init__(self, stride): + super().__init__() + self.stride = stride + self.tmp = Identity() # avoid the error of paddle.flops + + def forward(self, inputs): + ''' + # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor, + # so exporting the inference model will raise error. + _, _, H, W = inputs[-1].shape + H = (H - 1) // self.stride + 1 + W = (W - 1) // self.stride + 1 + return paddle.concat( + [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1) + ''' + out = [] + ks = 2**len(inputs) + stride = self.stride**len(inputs) + for x in inputs: + x = F.avg_pool2d(x, int(ks), int(stride)) + ks /= 2 + stride /= 2 + out.append(x) + out = paddle.concat(out, axis=1) + return out + + +class InjectionMultiSum(nn.Layer): + def __init__(self, in_channels, out_channels, activations=None, + lr_mult=1.0): + super(InjectionMultiSum, self).__init__() + + self.local_embedding = ConvBNAct( + in_channels, out_channels, kernel_size=1, lr_mult=lr_mult) + self.global_embedding = ConvBNAct( + in_channels, out_channels, kernel_size=1, lr_mult=lr_mult) + self.global_act = ConvBNAct( + in_channels, out_channels, kernel_size=1, lr_mult=lr_mult) + self.act = HSigmoid() + + def forward(self, x_low, x_global): + xl_hw = paddle.shape(x_low)[2:] + local_feat = self.local_embedding(x_low) + + global_act = self.global_act(x_global) + sig_act = F.interpolate( + self.act(global_act), xl_hw, mode='bilinear', align_corners=False) + + global_feat = self.global_embedding(x_global) + global_feat = F.interpolate( + global_feat, xl_hw, mode='bilinear', align_corners=False) + + out = local_feat * sig_act + global_feat + return out + + +class InjectionMultiSumCBR(nn.Layer): + def __init__(self, in_channels, out_channels, activations=None): + ''' + local_embedding: conv-bn-relu + global_embedding: conv-bn-relu + global_act: conv + ''' + super(InjectionMultiSumCBR, self).__init__() + + self.local_embedding = ConvBNAct( + in_channels, out_channels, kernel_size=1) + self.global_embedding = ConvBNAct( + in_channels, out_channels, kernel_size=1) + self.global_act = ConvBNAct( + in_channels, out_channels, kernel_size=1, norm=None, act=None) + self.act = HSigmoid() + + def forward(self, x_low, x_global): + xl_hw = paddle.shape(x)[2:] + local_feat = self.local_embedding(x_low) + # kernel + global_act = self.global_act(x_global) + global_act = F.interpolate( + self.act(global_act), xl_hw, mode='bilinear', align_corners=False) + # feat_h + global_feat = self.global_embedding(x_global) + global_feat = F.interpolate( + global_feat, xl_hw, mode='bilinear', align_corners=False) + out = local_feat * global_act + global_feat + return out + + +class FuseBlockSum(nn.Layer): + def __init__(self, in_channels, out_channels, activations=None): + super(FuseBlockSum, self).__init__() + + self.fuse1 = ConvBNAct( + in_channels, out_channels, kernel_size=1, act=None) + self.fuse2 = ConvBNAct( + in_channels, out_channels, kernel_size=1, act=None) + + def forward(self, x_low, x_high): + xl_hw = paddle.shape(x)[2:] + inp = self.fuse1(x_low) + kernel = self.fuse2(x_high) + feat_h = F.interpolate( + kernel, xl_hw, mode='bilinear', align_corners=False) + out = inp + feat_h + return out + + +class FuseBlockMulti(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + stride=1, + activations=None, ): + super(FuseBlockMulti, self).__init__() + assert stride in [1, 2], "The stride should be 1 or 2." + + self.fuse1 = ConvBNAct( + in_channels, out_channels, kernel_size=1, act=None) + self.fuse2 = ConvBNAct( + in_channels, out_channels, kernel_size=1, act=None) + self.act = HSigmoid() + + def forward(self, x_low, x_high): + xl_hw = paddle.shape(x)[2:] + inp = self.fuse1(x_low) + sig_act = self.fuse2(x_high) + sig_act = F.interpolate( + self.act(sig_act), xl_hw, mode='bilinear', align_corners=False) + out = inp * sig_act + return out + + +SIM_BLOCK = { + "fuse_sum": FuseBlockSum, + "fuse_multi": FuseBlockMulti, + "multi_sum": InjectionMultiSum, + "multi_sum_cbr": InjectionMultiSumCBR, +} + + +class TopTransformer(nn.Layer): + def __init__(self, + cfgs, + injection_out_channels, + encoder_out_indices, + trans_out_indices=[1, 2, 3], + depths=4, + key_dim=16, + num_heads=8, + attn_ratios=2, + mlp_ratios=2, + c2t_stride=2, + drop_path_rate=0., + act_layer=nn.ReLU6, + injection_type="muli_sum", + injection=True, + lr_mult=1.0, + in_channels=3, + pretrained=None): + super().__init__() + self.feat_channels = [ + c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices + ] + self.injection_out_channels = injection_out_channels + self.injection = injection + self.embed_dim = sum(self.feat_channels) + self.trans_out_indices = trans_out_indices + + self.tpm = TokenPyramidModule( + cfgs=cfgs, + out_indices=encoder_out_indices, + in_channels=in_channels, + lr_mult=lr_mult) + self.ppa = PyramidPoolAgg(stride=c2t_stride) + + dpr = [x.item() for x in \ + paddle.linspace(0, drop_path_rate, depths)] + self.trans = BasicLayer( + block_num=depths, + embedding_dim=self.embed_dim, + key_dim=key_dim, + num_heads=num_heads, + mlp_ratios=mlp_ratios, + attn_ratio=attn_ratios, + drop=0, + attn_drop=0, + drop_path=dpr, + act_layer=act_layer, + lr_mult=lr_mult) + + self.SIM = nn.LayerList() + inj_module = SIM_BLOCK[injection_type] + if self.injection: + for i in range(len(self.feat_channels)): + if i in trans_out_indices: + self.SIM.append( + inj_module( + self.feat_channels[i], + injection_out_channels[i], + activations=act_layer, + lr_mult=lr_mult)) + else: + self.SIM.append(Identity()) + + self.pretrained = pretrained + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def forward(self, x): + ouputs = self.tpm(x) + out = self.ppa(ouputs) + out = self.trans(out) + + if self.injection: + xx = out.split(self.feat_channels, axis=1) + results = [] + for i in range(len(self.feat_channels)): + if i in self.trans_out_indices: + local_tokens = ouputs[i] + global_semantics = xx[i] + out_ = self.SIM[i](local_tokens, global_semantics) + results.append(out_) + return results + else: + ouputs.append(out) + return ouputs + + +@manager.BACKBONES.add_component +def TopTransformer_Base(**kwargs): + cfgs = [ + # k, t, c, s + [3, 1, 16, 1], # 1/2 + [3, 4, 32, 2], # 1/4 1 + [3, 3, 32, 1], # + [5, 3, 64, 2], # 1/8 3 + [5, 3, 64, 1], # + [3, 3, 128, 2], # 1/16 5 + [3, 3, 128, 1], # + [5, 6, 160, 2], # 1/32 7 + [5, 6, 160, 1], # + [3, 6, 160, 1], # + ] + + model = TopTransformer( + cfgs=cfgs, + injection_out_channels=[None, 256, 256, 256], + encoder_out_indices=[2, 4, 6, 9], + trans_out_indices=[1, 2, 3], + depths=4, + key_dim=16, + num_heads=8, + attn_ratios=2, + mlp_ratios=2, + c2t_stride=2, + drop_path_rate=0., + act_layer=nn.ReLU6, + injection_type="multi_sum", + injection=True, + **kwargs) + return model + + +@manager.BACKBONES.add_component +def TopTransformer_Small(**kwargs): + cfgs = [ + # k, t, c, s + [3, 1, 16, 1], # 1/2 + [3, 4, 24, 2], # 1/4 1 + [3, 3, 24, 1], # + [5, 3, 48, 2], # 1/8 3 + [5, 3, 48, 1], # + [3, 3, 96, 2], # 1/16 5 + [3, 3, 96, 1], # + [5, 6, 128, 2], # 1/32 7 + [5, 6, 128, 1], # + [3, 6, 128, 1], # + ] + + model = TopTransformer( + cfgs=cfgs, + injection_out_channels=[None, 192, 192, 192], + encoder_out_indices=[2, 4, 6, 9], + trans_out_indices=[1, 2, 3], + depths=4, + key_dim=16, + num_heads=6, + attn_ratios=2, + mlp_ratios=2, + c2t_stride=2, + drop_path_rate=0., + act_layer=nn.ReLU6, + injection_type="multi_sum", + injection=True, + **kwargs) + return model + + +@manager.BACKBONES.add_component +def TopTransformer_Tiny(**kwargs): + cfgs = [ + # k, t, c, s + [3, 1, 16, 1], # 1/2 + [3, 4, 16, 2], # 1/4 1 + [3, 3, 16, 1], # + [5, 3, 32, 2], # 1/8 3 + [5, 3, 32, 1], # + [3, 3, 64, 2], # 1/16 5 + [3, 3, 64, 1], # + [5, 6, 96, 2], # 1/32 7 + [5, 6, 96, 1], # + ] + + model = TopTransformer( + cfgs=cfgs, + injection_out_channels=[None, 128, 128, 128], + encoder_out_indices=[2, 4, 6, 8], + trans_out_indices=[1, 2, 3], + depths=4, + key_dim=16, + num_heads=4, + attn_ratios=2, + mlp_ratios=2, + c2t_stride=2, + drop_path_rate=0., + act_layer=nn.ReLU6, + injection_type="multi_sum", + injection=True, + **kwargs) + return model diff --git a/paddlers/models/ppseg/models/backbones/transformer_utils.py b/paddlers/models/ppseg/models/backbones/transformer_utils.py index adbe075..9e2df68 100644 --- a/paddlers/models/ppseg/models/backbones/transformer_utils.py +++ b/paddlers/models/ppseg/models/backbones/transformer_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False): return x keep_prob = paddle.to_tensor(1 - drop_prob) shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) - random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) + random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype) random_tensor = paddle.floor(random_tensor) # binarize output = x.divide(keep_prob) * random_tensor return output diff --git a/paddlers/models/ppseg/models/backbones/vision_transformer.py b/paddlers/models/ppseg/models/backbones/vision_transformer.py index 350f1fd..ecc25d7 100644 --- a/paddlers/models/ppseg/models/backbones/vision_transformer.py +++ b/paddlers/models/ppseg/models/backbones/vision_transformer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,9 +20,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils, logger -from paddlers.models.ppseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity +from paddleseg.cvlibs import manager +from paddleseg.utils import utils, logger +from paddleseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity class Mlp(nn.Layer): @@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer): def __init__(self, img_size=224, patch_size=16, - in_chans=3, + in_channels=3, embed_dim=768, depth=12, num_heads=12, @@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer): self.patch_embed = PatchEmbed( img_size=img_size, patch_size=patch_size, - in_chans=in_chans, + in_chans=in_channels, embed_dim=embed_dim) self.pos_w = self.patch_embed.num_patches_in_w self.pos_h = self.patch_embed.num_patches_in_h diff --git a/paddlers/models/ppseg/models/backbones/xception_deeplab.py b/paddlers/models/ppseg/models/backbones/xception_deeplab.py index 216849e..4480096 100644 --- a/paddlers/models/ppseg/models/backbones/xception_deeplab.py +++ b/paddlers/models/ppseg/models/backbones/xception_deeplab.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,9 +15,9 @@ import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils +from paddleseg.models import layers __all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"] @@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer): Args: backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71'). + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path of pretrained model. output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16. """ - def __init__(self, backbone, pretrained=None, output_stride=16): + def __init__(self, + backbone, + in_channels=3, + pretrained=None, + output_stride=16): super(XceptionDeeplab, self).__init__() @@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer): self.feat_channels = [128, 2048] self._conv1 = ConvBNLayer( - 3, + in_channels, 32, 3, stride=2, diff --git a/paddlers/models/ppseg/models/bisenet.py b/paddlers/models/ppseg/models/bisenet.py index f09f874..a22d617 100644 --- a/paddlers/models/ppseg/models/bisenet.py +++ b/paddlers/models/ppseg/models/bisenet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers +from paddleseg import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers @manager.MODELS.add_component @@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer): Args: num_classes (int): The unique number of target classes. lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25. + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path or url of pretrained model. Default: None. """ @@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer): num_classes, lambd=0.25, align_corners=False, + in_channels=3, pretrained=None): super().__init__() @@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer): sb_channels = (C1, C3, C4, C5) mid_channels = 128 - self.db = DetailBranch(db_channels) - self.sb = SemanticBranch(sb_channels) + self.db = DetailBranch(in_channels, db_channels) + self.sb = SemanticBranch(in_channels, sb_channels) self.bga = BGA(mid_channels, align_corners) self.aux_head1 = SegHead(C1, C1, num_classes) @@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer): class DetailBranch(nn.Layer): """The detail branch of BiSeNet, which has wide channels but shallow layers.""" - def __init__(self, in_channels): + def __init__(self, in_channels, feature_channels): super().__init__() - C1, C2, C3 = in_channels + C1, C2, C3 = feature_channels self.convs = nn.Sequential( # stage 1 layers.ConvBNReLU( - 3, C1, 3, stride=2), + in_channels, C1, 3, stride=2), layers.ConvBNReLU(C1, C1, 3), # stage 2 layers.ConvBNReLU( @@ -217,11 +219,11 @@ class DetailBranch(nn.Layer): class SemanticBranch(nn.Layer): """The semantic branch of BiSeNet, which has narrow channels but deep layers.""" - def __init__(self, in_channels): + def __init__(self, in_channels, feature_channels): super().__init__() - C1, C3, C4, C5 = in_channels + C1, C3, C4, C5 = feature_channels - self.stem = StemBlock(3, C1) + self.stem = StemBlock(in_channels, C1) self.stage3 = nn.Sequential( GatherAndExpansionLayer2(C1, C3, 6), diff --git a/paddlers/models/ppseg/models/bisenetv1.py b/paddlers/models/ppseg/models/bisenetv1.py index 15c39ef..1e7b897 100644 --- a/paddlers/models/ppseg/models/bisenetv1.py +++ b/paddlers/models/ppseg/models/bisenetv1.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ccnet.py b/paddlers/models/ppseg/models/ccnet.py new file mode 100644 index 0000000..e42154c --- /dev/null +++ b/paddlers/models/ppseg/models/ccnet.py @@ -0,0 +1,174 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class CCNet(nn.Layer): + """ + The CCNet implementation based on PaddlePaddle. + + The original article refers to + Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation" + (https://arxiv.org/abs/1811.11721) + + Args: + num_classes (int): The unique number of target classes. + backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd. + backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3). + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + dropout_prob (float, optional): The probability of dropout. Default: 0.0. + recurrence (int, optional): The number of recurrent operations. Defautl: 1. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices=(2, 3), + enable_auxiliary_loss=True, + dropout_prob=0.0, + recurrence=1, + align_corners=False, + pretrained=None): + super().__init__() + self.enable_auxiliary_loss = enable_auxiliary_loss + self.recurrence = recurrence + self.align_corners = align_corners + + self.backbone = backbone + self.backbone_indices = backbone_indices + backbone_channels = [ + backbone.feat_channels[i] for i in backbone_indices + ] + + if enable_auxiliary_loss: + self.aux_head = layers.AuxLayer( + backbone_channels[0], + 512, + num_classes, + dropout_prob=dropout_prob) + self.head = RCCAModule( + backbone_channels[1], + 512, + num_classes, + dropout_prob=dropout_prob, + recurrence=recurrence) + self.pretrained = pretrained + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def forward(self, x): + feat_list = self.backbone(x) + logit_list = [] + output = self.head(feat_list[self.backbone_indices[-1]]) + logit_list.append(output) + if self.training and self.enable_auxiliary_loss: + aux_out = self.aux_head(feat_list[self.backbone_indices[-2]]) + logit_list.append(aux_out) + return [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class RCCAModule(nn.Layer): + def __init__(self, + in_channels, + out_channels, + num_classes, + dropout_prob=0.1, + recurrence=1): + super().__init__() + inter_channels = in_channels // 4 + self.recurrence = recurrence + self.conva = layers.ConvBNLeakyReLU( + in_channels, inter_channels, 3, padding=1, bias_attr=False) + self.cca = CrissCrossAttention(inter_channels) + self.convb = layers.ConvBNLeakyReLU( + inter_channels, inter_channels, 3, padding=1, bias_attr=False) + self.out = layers.AuxLayer( + in_channels + inter_channels, + out_channels, + num_classes, + dropout_prob=dropout_prob) + + def forward(self, x): + feat = self.conva(x) + for i in range(self.recurrence): + feat = self.cca(feat) + feat = self.convb(feat) + output = self.out(paddle.concat([x, feat], axis=1)) + return output + + +class CrissCrossAttention(nn.Layer): + def __init__(self, in_channels): + super().__init__() + self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1) + self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1) + self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1) + self.softmax = nn.Softmax(axis=3) + self.gamma = self.create_parameter( + shape=(1, ), default_initializer=nn.initializer.Constant(0)) + self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf')) + + def forward(self, x): + b, c, h, w = paddle.shape(x) + proj_q = self.q_conv(x) + proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape( + [b * w, -1, h]).transpose([0, 2, 1]) + proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape( + [b * h, -1, w]).transpose([0, 2, 1]) + + proj_k = self.k_conv(x) + proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h]) + proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w]) + + proj_v = self.v_conv(x) + proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h]) + proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w]) + + energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape( + [b, w, h, h]).transpose([0, 2, 1, 3]) + energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w]) + concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3)) + + attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape( + [b * w, h, h]) + attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w]) + out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape( + [b, w, -1, h]).transpose([0, 2, 3, 1]) + out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape( + [b, h, -1, w]).transpose([0, 2, 1, 3]) + return self.gamma * (out_h + out_w) + x + + def Inf(self, B, H, W): + return -paddle.tile( + paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0), + [B * W, 1, 1]) diff --git a/paddlers/models/ppseg/models/danet.py b/paddlers/models/ppseg/models/danet.py index dac6b67..c419131 100644 --- a/paddlers/models/ppseg/models/danet.py +++ b/paddlers/models/ppseg/models/danet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ddrnet.py b/paddlers/models/ppseg/models/ddrnet.py new file mode 100644 index 0000000..b2a6992 --- /dev/null +++ b/paddlers/models/ppseg/models/ddrnet.py @@ -0,0 +1,403 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils + + +class DualResNet(nn.Layer): + """ + The DDRNet implementation based on PaddlePaddle. + + The original article refers to + Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes" + (https://arxiv.org/abs/2101.06085) + + Args: + num_classes (int): The unique number of target classes. + in_channels (int, optional): Number of input channels. Default: 3. + block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2]. + planes (int): Base channels in network. Default: 64. + spp_planes (int): Branch channels for DAPPM. Default: 128. + head_planes (int): Mid channels of segmentation head. Default: 128. + enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + in_channels=3, + block_layers=[2, 2, 2, 2], + planes=64, + spp_planes=128, + head_planes=128, + enable_auxiliary_loss=False, + pretrained=None): + super().__init__() + highres_planes = planes * 2 + self.enable_auxiliary_loss = enable_auxiliary_loss + self.conv1 = nn.Sequential( + layers.ConvBNReLU( + in_channels, planes, kernel_size=3, stride=2, padding=1), + layers.ConvBNReLU( + planes, planes, kernel_size=3, stride=2, padding=1), ) + self.relu = nn.ReLU() + self.layer1 = self._make_layers(BasicBlock, planes, planes, + block_layers[0]) + self.layer2 = self._make_layers( + BasicBlock, planes, planes * 2, block_layers[1], stride=2) + self.layer3 = self._make_layers( + BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2) + self.layer4 = self._make_layers( + BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2) + + self.compression3 = layers.ConvBN( + planes * 4, highres_planes, kernel_size=1, bias_attr=False) + + self.compression4 = layers.ConvBN( + planes * 8, highres_planes, kernel_size=1, bias_attr=False) + + self.down3 = layers.ConvBN( + highres_planes, + planes * 4, + kernel_size=3, + stride=2, + bias_attr=False) + + self.down4 = nn.Sequential( + layers.ConvBNReLU( + highres_planes, + planes * 4, + kernel_size=3, + stride=2, + padding=1, + bias_attr=False), + layers.ConvBN( + planes * 4, + planes * 8, + kernel_size=3, + stride=2, + padding=1, + bias_attr=False)) + + self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes, + 2) + self.layer4_ = self._make_layers(BasicBlock, highres_planes, + highres_planes, 2) + self.layer5_ = self._make_layers(Bottleneck, highres_planes, + highres_planes, 1) + self.layer5 = self._make_layers( + Bottleneck, planes * 8, planes * 8, 1, stride=2) + + self.spp = DAPPM(planes * 16, spp_planes, planes * 4) + if self.enable_auxiliary_loss: + self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes) + self.head = DDRNetHead(planes * 4, head_planes, num_classes) + + self.pretrained = pretrained + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + else: + for m in self.sublayers(): + if isinstance(m, nn.Conv2D): + param_init.kaiming_normal_init(m.weight) + elif isinstance(m, nn.BatchNorm2D): + param_init.constant_init(m.weight, value=1) + param_init.constant_init(m.bias, value=0) + + def _make_layers(self, block, inplanes, planes, blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2D( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias_attr=False), + nn.BatchNorm2D(planes * block.expansion), ) + layers = [] + layers.append(block(inplanes, planes, stride, downsample)) + inplanes = planes * block.expansion + for i in range(1, blocks): + if i == (blocks - 1): + layers.append(block(inplanes, planes, stride=1, no_relu=True)) + else: + layers.append(block(inplanes, planes, stride=1, no_relu=False)) + return nn.Sequential(*layers) + + def forward(self, x): + n, c, h, w = paddle.shape(x) + width_output = w // 8 + height_output = h // 8 + + x = self.conv1(x) + stage1_out = self.layer1(x) + stage2_out = self.layer2(self.relu(stage1_out)) + stage3_out = self.layer3(self.relu(stage2_out)) + stage3_out_dual = self.layer3_(self.relu(stage2_out)) + x = stage3_out + self.down3(self.relu(stage3_out_dual)) + stage3_merge = stage3_out_dual + F.interpolate( + self.compression3(self.relu(stage3_out)), + size=[height_output, width_output], + mode='bilinear') + + stage4_out = self.layer4(self.relu(x)) + stage4_out_dual = self.layer4_(self.relu(stage3_merge)) + + x = stage4_out + self.down4(self.relu(stage4_out_dual)) + stage4_merge = stage4_out_dual + F.interpolate( + self.compression4(self.relu(stage4_out)), + size=[height_output, width_output], + mode='bilinear') + + stage5_out_dual = self.layer5_(self.relu(stage4_merge)) + x = F.interpolate( + self.spp(self.layer5(self.relu(x))), + size=[height_output, width_output], + mode='bilinear') + + output = self.head(x + stage5_out_dual) + logit_list = [] + logit_list.append(output) + + if self.enable_auxiliary_loss: + aux_out = self.aux_head(stage3_merge) + logit_list.append(aux_out) + return [ + F.interpolate( + logit, [h, w], mode='bilinear') for logit in logit_list + ] + + +class BasicBlock(nn.Layer): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + no_relu=False): + super().__init__() + self.conv_bn_relu = layers.ConvBNReLU( + inplanes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) + self.relu = nn.ReLU() + self.conv_bn = layers.ConvBN( + planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False) + self.downsample = downsample + self.stride = stride + self.no_relu = no_relu + + def forward(self, x): + residual = x + out = self.conv_bn_relu(x) + out = self.conv_bn(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + if self.no_relu: + return out + else: + return self.relu(out) + + +class Bottleneck(nn.Layer): + expansion = 2 + + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + no_relu=True): + super().__init__() + self.conv_bn_relu1 = layers.ConvBNReLU( + inplanes, planes, kernel_size=1, bias_attr=False) + self.conv_bn_relu2 = layers.ConvBNReLU( + planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) + self.conv_bn = layers.ConvBN( + planes, planes * self.expansion, kernel_size=1, bias_attr=False) + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride + self.no_relu = no_relu + + def forward(self, x): + residual = x + out = self.conv_bn_relu1(x) + out = self.conv_bn_relu2(out) + out = self.conv_bn(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + if self.no_relu: + return out + else: + return self.relu(out) + + +class DAPPM(nn.Layer): + def __init__(self, inplanes, branch_planes, outplanes): + super().__init__() + self.scale1 = nn.Sequential( + nn.AvgPool2D( + kernel_size=5, stride=2, padding=2), + layers.SyncBatchNorm(inplanes), + nn.ReLU(), + nn.Conv2D( + inplanes, branch_planes, kernel_size=1, bias_attr=False), ) + self.scale2 = nn.Sequential( + nn.AvgPool2D( + kernel_size=9, stride=4, padding=4), + layers.SyncBatchNorm(inplanes), + nn.ReLU(), + nn.Conv2D( + inplanes, branch_planes, kernel_size=1, bias_attr=False), ) + self.scale3 = nn.Sequential( + nn.AvgPool2D( + kernel_size=17, stride=8, padding=8), + layers.SyncBatchNorm(inplanes), + nn.ReLU(), + nn.Conv2D( + inplanes, branch_planes, kernel_size=1, bias_attr=False), ) + self.scale4 = nn.Sequential( + nn.AdaptiveAvgPool2D((1, 1)), + layers.SyncBatchNorm(inplanes), + nn.ReLU(), + nn.Conv2D( + inplanes, branch_planes, kernel_size=1, bias_attr=False), ) + self.scale0 = nn.Sequential( + layers.SyncBatchNorm(inplanes), + nn.ReLU(), + nn.Conv2D( + inplanes, branch_planes, kernel_size=1, bias_attr=False), ) + self.process1 = nn.Sequential( + layers.SyncBatchNorm(branch_planes), + nn.ReLU(), + nn.Conv2D( + branch_planes, + branch_planes, + kernel_size=3, + padding=1, + bias_attr=False), ) + self.process2 = nn.Sequential( + layers.SyncBatchNorm(branch_planes), + nn.ReLU(), + nn.Conv2D( + branch_planes, + branch_planes, + kernel_size=3, + padding=1, + bias_attr=False), ) + self.process3 = nn.Sequential( + layers.SyncBatchNorm(branch_planes), + nn.ReLU(), + nn.Conv2D( + branch_planes, + branch_planes, + kernel_size=3, + padding=1, + bias_attr=False), ) + self.process4 = nn.Sequential( + layers.SyncBatchNorm(branch_planes), + nn.ReLU(), + nn.Conv2D( + branch_planes, + branch_planes, + kernel_size=3, + padding=1, + bias_attr=False), ) + self.compression = nn.Sequential( + layers.SyncBatchNorm(branch_planes * 5), + nn.ReLU(), + nn.Conv2D( + branch_planes * 5, outplanes, kernel_size=1, bias_attr=False)) + self.shortcut = nn.Sequential( + layers.SyncBatchNorm(inplanes), + nn.ReLU(), + nn.Conv2D( + inplanes, outplanes, kernel_size=1, bias_attr=False)) + + def forward(self, x): + n, c, h, w = paddle.shape(x) + x0 = self.scale0(x) + x1 = self.process1( + F.interpolate( + self.scale1(x), size=[h, w], mode='bilinear') + x0) + x2 = self.process2( + F.interpolate( + self.scale2(x), size=[h, w], mode='bilinear') + x1) + x3 = self.process3( + F.interpolate( + self.scale3(x), size=[h, w], mode='bilinear') + x2) + x4 = self.process4( + F.interpolate( + self.scale4(x), size=[h, w], mode='bilinear') + x3) + + out = self.compression(paddle.concat([x0, x1, x2, x3, x4], + 1)) + self.shortcut(x) + return out + + +class DDRNetHead(nn.Layer): + def __init__(self, inplanes, interplanes, outplanes, scale_factor=None): + super().__init__() + self.bn1 = nn.BatchNorm2D(inplanes) + self.relu = nn.ReLU() + self.conv_bn_relu = layers.ConvBNReLU( + inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False) + self.conv = nn.Conv2D( + interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True) + + self.scale_factor = scale_factor + + def forward(self, x): + x = self.bn1(x) + x = self.relu(x) + x = self.conv_bn_relu(x) + out = self.conv(x) + + if self.scale_factor is not None: + out = F.interpolate( + out, scale_factor=self.scale_factor, mode='bilinear') + return out + + +@manager.MODELS.add_component +def DDRNet_23(**kwargs): + return DualResNet( + block_layers=[2, 2, 2, 2], + planes=64, + spp_planes=128, + head_planes=128, + **kwargs) diff --git a/paddlers/models/ppseg/models/decoupled_segnet.py b/paddlers/models/ppseg/models/decoupled_segnet.py index 80ff4b8..8386d6b 100644 --- a/paddlers/models/ppseg/models/decoupled_segnet.py +++ b/paddlers/models/ppseg/models/decoupled_segnet.py @@ -18,11 +18,11 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.models.backbones import resnet_vd -from paddlers.models.ppseg.models import deeplab -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.models.backbones import resnet_vd +from paddleseg.models import deeplab +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/deeplab.py b/paddlers/models/ppseg/models/deeplab.py index 3e50572..07863d5 100644 --- a/paddlers/models/ppseg/models/deeplab.py +++ b/paddlers/models/ppseg/models/deeplab.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils __all__ = ['DeepLabV3P', 'DeepLabV3'] diff --git a/paddlers/models/ppseg/models/dmnet.py b/paddlers/models/ppseg/models/dmnet.py index 52499a4..c150ac6 100644 --- a/paddlers/models/ppseg/models/dmnet.py +++ b/paddlers/models/ppseg/models/dmnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/dnlnet.py b/paddlers/models/ppseg/models/dnlnet.py index 82e020a..527eab0 100644 --- a/paddlers/models/ppseg/models/dnlnet.py +++ b/paddlers/models/ppseg/models/dnlnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/emanet.py b/paddlers/models/ppseg/models/emanet.py index 0d1827e..33eedcd 100644 --- a/paddlers/models/ppseg/models/emanet.py +++ b/paddlers/models/ppseg/models/emanet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component @@ -209,7 +209,9 @@ class EMAU(nn.Layer): mu = F.normalize(mu, axis=1, p=2) mu = self.mu * (1 - self.momentum) + mu * self.momentum if paddle.distributed.get_world_size() > 1: - mu = paddle.distributed.all_reduce(mu) + out = paddle.distributed.all_reduce(mu) + if out is not None: + mu = out mu /= paddle.distributed.get_world_size() self.mu = mu diff --git a/paddlers/models/ppseg/models/encnet.py b/paddlers/models/ppseg/models/encnet.py index ce58414..81bb9ef 100644 --- a/paddlers/models/ppseg/models/encnet.py +++ b/paddlers/models/ppseg/models/encnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/enet.py b/paddlers/models/ppseg/models/enet.py index 39f99c8..c677e46 100644 --- a/paddlers/models/ppseg/models/enet.py +++ b/paddlers/models/ppseg/models/enet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager, param_init +from paddleseg import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager, param_init __all__ = ['ENet'] @@ -34,6 +34,7 @@ class ENet(nn.Layer): Args: num_classes (int): The unique number of target classes. + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path or url of pretrained model. Default: None. encoder_relu (bool, optional): When ``True`` ReLU is used as the activation function; otherwise, PReLU is used. Default: False. @@ -43,13 +44,14 @@ class ENet(nn.Layer): def __init__(self, num_classes, + in_channels=3, pretrained=None, encoder_relu=False, decoder_relu=True): super(ENet, self).__init__() self.numclasses = num_classes - self.initial_block = InitialBlock(3, 16, relu=encoder_relu) + self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu) self.downsample1_0 = DownsamplingBottleneck( 16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu) diff --git a/paddlers/models/ppseg/models/espnet.py b/paddlers/models/ppseg/models/espnet.py index 50de246..1751f0e 100644 --- a/paddlers/models/ppseg/models/espnet.py +++ b/paddlers/models/ppseg/models/espnet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers +from paddleseg import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/espnetv1.py b/paddlers/models/ppseg/models/espnetv1.py index 4515aa6..7f1142e 100644 --- a/paddlers/models/ppseg/models/espnetv1.py +++ b/paddlers/models/ppseg/models/espnetv1.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/fast_scnn.py b/paddlers/models/ppseg/models/fast_scnn.py index e553a8f..aa8905f 100644 --- a/paddlers/models/ppseg/models/fast_scnn.py +++ b/paddlers/models/ppseg/models/fast_scnn.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils __all__ = ['FastSCNN'] @@ -34,6 +34,7 @@ class FastSCNN(nn.Layer): (https://arxiv.org/pdf/1902.04502.pdf). Args: num_classes (int): The unique number of target classes. + in_channels (int, optional): The channels of input image. Default: 3. enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. If true, auxiliary loss will be added after LearningToDownsample module. Default: False. align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature @@ -43,13 +44,15 @@ class FastSCNN(nn.Layer): def __init__(self, num_classes, + in_channels=3, enable_auxiliary_loss=True, align_corners=False, pretrained=None): super().__init__() - self.learning_to_downsample = LearningToDownsample(32, 48, 64) + self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48, + 64) self.global_feature_extractor = GlobalFeatureExtractor( in_channels=64, block_channels=[64, 96, 128], @@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer): out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64. """ - def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64): + def __init__(self, + in_channels=3, + dw_channels1=32, + dw_channels2=48, + out_channels=64): super(LearningToDownsample, self).__init__() self.conv_bn_relu = layers.ConvBNReLU( - in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2) + in_channels=in_channels, + out_channels=dw_channels1, + kernel_size=3, + stride=2) self.dsconv_bn_relu1 = layers.SeparableConvBNReLU( in_channels=dw_channels1, out_channels=dw_channels2, diff --git a/paddlers/models/ppseg/models/fastfcn.py b/paddlers/models/ppseg/models/fastfcn.py index 2fc9c74..87c86eb 100644 --- a/paddlers/models/ppseg/models/fastfcn.py +++ b/paddlers/models/ppseg/models/fastfcn.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/fcn.py b/paddlers/models/ppseg/models/fcn.py index 19554ca..e12aacd 100644 --- a/paddlers/models/ppseg/models/fcn.py +++ b/paddlers/models/ppseg/models/fcn.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers +from paddleseg import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/gcnet.py b/paddlers/models/ppseg/models/gcnet.py index c5fe7ae..fb5d8e3 100644 --- a/paddlers/models/ppseg/models/gcnet.py +++ b/paddlers/models/ppseg/models/gcnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ginet.py b/paddlers/models/ppseg/models/ginet.py index 99f2888..fe4b9ae 100644 --- a/paddlers/models/ppseg/models/ginet.py +++ b/paddlers/models/ppseg/models/ginet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn from paddle.nn import functional as F -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager @manager.MODELS.add_component @@ -92,7 +92,7 @@ class GINet(nn.Layer): return [ F.interpolate( - logit, (h, w), + logit, [h, w], mode='bilinear', align_corners=self.align_corners) for logit in logit_list ] diff --git a/paddlers/models/ppseg/models/glore.py b/paddlers/models/ppseg/models/glore.py new file mode 100644 index 0000000..12a26c1 --- /dev/null +++ b/paddlers/models/ppseg/models/glore.py @@ -0,0 +1,198 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class GloRe(nn.Layer): + """ + The GloRe implementation based on PaddlePaddle. + + The original article refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" + (https://arxiv.org/pdf/1811.12814.pdf) + + Args: + num_classes (int): The unique number of target classes. + backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. + backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. + gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512. + gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128. + gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices=(2, 3), + gru_channels=512, + gru_num_state=128, + gru_num_node=64, + enable_auxiliary_loss=True, + align_corners=False, + pretrained=None): + super().__init__() + + self.backbone = backbone + backbone_channels = [ + backbone.feat_channels[i] for i in backbone_indices + ] + + self.head = GloReHead(num_classes, backbone_indices, backbone_channels, + gru_channels, gru_num_state, gru_num_node, + enable_auxiliary_loss) + self.align_corners = align_corners + self.pretrained = pretrained + self.init_weight() + + def forward(self, x): + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + +class GloReHead(nn.Layer): + def __init__(self, + num_classes, + backbone_indices, + backbone_channels, + gru_channels=512, + gru_num_state=128, + gru_num_node=64, + enable_auxiliary_loss=True): + super().__init__() + + in_channels = backbone_channels[1] + self.conv_bn_relu = layers.ConvBNReLU( + in_channels, gru_channels, 1, bias_attr=False) + self.gru_module = GruModule( + num_input=gru_channels, + num_state=gru_num_state, + num_node=gru_num_node) + + self.dropout = nn.Dropout(0.1) + self.classifier = nn.Conv2D(512, num_classes, kernel_size=1) + self.auxlayer = layers.AuxLayer( + in_channels=backbone_channels[0], + inter_channels=backbone_channels[0] // 4, + out_channels=num_classes) + + self.backbone_indices = backbone_indices + self.enable_auxiliary_loss = enable_auxiliary_loss + + def forward(self, feat_list): + + logit_list = [] + x = feat_list[self.backbone_indices[1]] + + feature = self.conv_bn_relu(x) + gru_output = self.gru_module(feature) + output = self.dropout(gru_output) + logit = self.classifier(output) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + low_level_feat = feat_list[self.backbone_indices[0]] + auxiliary_logit = self.auxlayer(low_level_feat) + logit_list.append(auxiliary_logit) + + return logit_list + + +class GCN(nn.Layer): + def __init__(self, num_state, num_node, bias=False): + super(GCN, self).__init__() + self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, num_state, kernel_size=1, bias_attr=bias) + + def forward(self, x): + h = self.conv1(paddle.transpose(x, perm=(0, 2, 1))) + h = paddle.transpose(h, perm=(0, 2, 1)) + h = h + x + h = self.relu(self.conv2(h)) + return h + + +class GruModule(nn.Layer): + def __init__(self, + num_input=512, + num_state=128, + num_node=64, + normalize=False): + super(GruModule, self).__init__() + self.normalize = normalize + self.num_state = num_state + self.num_node = num_node + self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1) + self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1) + self.gcn = GCN(num_state=self.num_state, num_node=self.num_node) + self.extend_dim = nn.Conv2D( + self.num_state, num_input, kernel_size=1, bias_attr=False) + self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4) + + def forward(self, input): + n, c, h, w = input.shape + # B, C, H, W + reduction_dim = self.reduction_dim(input) + # B, N, H, W + mat_B = self.projection_mat(input) + # B, C, H*W + reshaped_reduction = paddle.reshape( + reduction_dim, shape=[n, self.num_state, h * w]) + # B, N, H*W + reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w]) + # B, N, H*W + reproject = reshaped_B + # B, C, N + node_state_V = paddle.matmul( + reshaped_reduction, paddle.transpose( + reshaped_B, perm=[0, 2, 1])) + + if self.normalize: + node_state_V = node_state_V * (1. / reshaped_reduction.shape[2]) + + # B, C, N + gcn_out = self.gcn(node_state_V) + # B, C, H*W + Y = paddle.matmul(gcn_out, reproject) + # B, C, H, W + Y = paddle.reshape(Y, shape=[n, self.num_state, h, w]) + Y_extend = self.extend_dim(Y) + Y_extend = self.extend_bn(Y_extend) + + out = input + Y_extend + return out diff --git a/paddlers/models/ppseg/models/gscnn.py b/paddlers/models/ppseg/models/gscnn.py index 50895aa..9352751 100644 --- a/paddlers/models/ppseg/models/gscnn.py +++ b/paddlers/models/ppseg/models/gscnn.py @@ -18,11 +18,11 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.models.backbones import resnet_vd -from paddlers.models.ppseg.models import deeplab -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.models.backbones import resnet_vd +from paddleseg.models import deeplab +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/hardnet.py b/paddlers/models/ppseg/models/hardnet.py index befa2c4..8de3154 100644 --- a/paddlers/models/ppseg/models/hardnet.py +++ b/paddlers/models/ppseg/models/hardnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component @@ -31,6 +31,7 @@ class HarDNet(nn.Layer): Args: num_classes (int): The unique number of target classes. + in_channels (int, optional): The channels of input image. Default: 3. stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48). ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320). grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7. @@ -43,6 +44,7 @@ class HarDNet(nn.Layer): def __init__(self, num_classes, + in_channels=3, stem_channels=(16, 24, 32, 48), ch_list=(64, 96, 160, 224, 320), grmul=1.7, @@ -60,7 +62,7 @@ class HarDNet(nn.Layer): self.stem = nn.Sequential( layers.ConvBNReLU( - 3, stem_channels[0], kernel_size=3, bias_attr=False), + in_channels, stem_channels[0], kernel_size=3, bias_attr=False), layers.ConvBNReLU( stem_channels[0], stem_channels[1], diff --git a/paddlers/models/ppseg/models/hrnet_contrast.py b/paddlers/models/ppseg/models/hrnet_contrast.py index 30f289b..dd5a206 100644 --- a/paddlers/models/ppseg/models/hrnet_contrast.py +++ b/paddlers/models/ppseg/models/hrnet_contrast.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/isanet.py b/paddlers/models/ppseg/models/isanet.py index 13bd4f2..e8bb3df 100644 --- a/paddlers/models/ppseg/models/isanet.py +++ b/paddlers/models/ppseg/models/isanet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/layers/__init__.py b/paddlers/models/ppseg/models/layers/__init__.py index f66ed44..509641c 100644 --- a/paddlers/models/ppseg/models/layers/__init__.py +++ b/paddlers/models/ppseg/models/layers/__init__.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU +from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU from .activation import Activation from .pyramid_pool import ASPPModule, PPModule from .attention import AttentionBlock from .nonlocal2d import NonLocal2D from .wrap_functions import * +from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten diff --git a/paddlers/models/ppseg/models/layers/activation.py b/paddlers/models/ppseg/models/layers/activation.py index 7eb9173..046ba87 100644 --- a/paddlers/models/ppseg/models/layers/activation.py +++ b/paddlers/models/ppseg/models/layers/activation.py @@ -33,7 +33,7 @@ class Activation(nn.Layer): Examples: - from paddlers.models.ppseg.models.common.activation import Activation + from paddleseg.models.common.activation import Activation relu = Activation("relu") print(relu) diff --git a/paddlers/models/ppseg/models/layers/attention.py b/paddlers/models/ppseg/models/layers/attention.py index c6e4a9a..e2db9ea 100644 --- a/paddlers/models/ppseg/models/layers/attention.py +++ b/paddlers/models/ppseg/models/layers/attention.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers +from paddleseg.models import layers class AttentionBlock(nn.Layer): @@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer): if self.out_project is not None: context = self.out_project(context) return context + + +class DualAttentionModule(nn.Layer): + """ + Dual attention module. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + """ + + def __init__(self, in_channels, out_channels): + super().__init__() + inter_channels = in_channels // 4 + + self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1) + self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1) + self.pam = PAM(inter_channels) + self.cam = CAM(inter_channels) + self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3) + self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3) + self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3) + + def forward(self, feats): + channel_feats = self.channel_conv(feats) + channel_feats = self.cam(channel_feats) + channel_feats = self.conv1(channel_feats) + + position_feats = self.position_conv(feats) + position_feats = self.pam(position_feats) + position_feats = self.conv2(position_feats) + + feats_sum = position_feats + channel_feats + out = self.conv3(feats_sum) + return out + + +class PAM(nn.Layer): + """ + Position attention module. + Args: + in_channels (int): The number of input channels. + """ + + def __init__(self, in_channels): + super().__init__() + mid_channels = in_channels // 8 + self.mid_channels = mid_channels + self.in_channels = in_channels + + self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1) + self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1) + self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1) + + self.gamma = self.create_parameter( + shape=[1], + dtype='float32', + default_initializer=nn.initializer.Constant(0)) + + def forward(self, x): + x_shape = paddle.shape(x) + + # query: n, h * w, c1 + query = self.query_conv(x) + query = paddle.reshape(query, (0, self.mid_channels, -1)) + query = paddle.transpose(query, (0, 2, 1)) + + # key: n, c1, h * w + key = self.key_conv(x) + key = paddle.reshape(key, (0, self.mid_channels, -1)) + + # sim: n, h * w, h * w + sim = paddle.bmm(query, key) + sim = F.softmax(sim, axis=-1) + + value = self.value_conv(x) + value = paddle.reshape(value, (0, self.in_channels, -1)) + sim = paddle.transpose(sim, (0, 2, 1)) + + # feat: from (n, c2, h * w) -> (n, c2, h, w) + feat = paddle.bmm(value, sim) + feat = paddle.reshape(feat, + (0, self.in_channels, x_shape[2], x_shape[3])) + + out = self.gamma * feat + x + return out + + +class CAM(nn.Layer): + """ + Channel attention module. + Args: + in_channels (int): The number of input channels. + """ + + def __init__(self, channels): + super().__init__() + + self.channels = channels + self.gamma = self.create_parameter( + shape=[1], + dtype='float32', + default_initializer=nn.initializer.Constant(0)) + + def forward(self, x): + x_shape = paddle.shape(x) + # query: n, c, h * w + query = paddle.reshape(x, (0, self.channels, -1)) + # key: n, h * w, c + key = paddle.reshape(x, (0, self.channels, -1)) + key = paddle.transpose(key, (0, 2, 1)) + + # sim: n, c, c + sim = paddle.bmm(query, key) + # The danet author claims that this can avoid gradient divergence + sim = paddle.max(sim, axis=-1, keepdim=True).tile( + [1, 1, self.channels]) - sim + sim = F.softmax(sim, axis=-1) + + # feat: from (n, c, h * w) to (n, c, h, w) + value = paddle.reshape(x, (0, self.channels, -1)) + feat = paddle.bmm(sim, value) + feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3])) + + out = self.gamma * feat + x + return out diff --git a/paddlers/models/ppseg/models/layers/layer_libs.py b/paddlers/models/ppseg/models/layers/layer_libs.py index 3f90814..9a9b332 100644 --- a/paddlers/models/ppseg/models/layers/layer_libs.py +++ b/paddlers/models/ppseg/models/layers/layer_libs.py @@ -17,7 +17,7 @@ import os import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers +from paddleseg.models import layers def SyncBatchNorm(*args, **kwargs): @@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer): return x +class ConvBNAct(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + padding='same', + act_type=None, + **kwargs): + super().__init__() + + self._conv = nn.Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + + if 'data_format' in kwargs: + data_format = kwargs['data_format'] + else: + data_format = 'NCHW' + self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format) + + self._act_type = act_type + if act_type is not None: + self._act = layers.Activation(act_type) + + def forward(self, x): + x = self._conv(x) + x = self._batch_norm(x) + if self._act_type is not None: + x = self._act(x) + return x + + class ConvBN(nn.Layer): def __init__(self, in_channels, @@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer): x = self._batch_norm(x) x = self._prelu(x) return x + + +class ConvBNLeakyReLU(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + padding='same', + **kwargs): + super().__init__() + + self._conv = nn.Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + + if 'data_format' in kwargs: + data_format = kwargs['data_format'] + else: + data_format = 'NCHW' + self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format) + self._relu = layers.Activation("leakyrelu") + + def forward(self, x): + x = self._conv(x) + x = self._batch_norm(x) + x = self._relu(x) + return x diff --git a/paddlers/models/ppseg/models/layers/nonlocal2d.py b/paddlers/models/ppseg/models/layers/nonlocal2d.py index 9d6386d..7552ff4 100644 --- a/paddlers/models/ppseg/models/layers/nonlocal2d.py +++ b/paddlers/models/ppseg/models/layers/nonlocal2d.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers +from paddleseg.models import layers class NonLocal2D(nn.Layer): diff --git a/paddlers/models/ppseg/models/layers/pyramid_pool.py b/paddlers/models/ppseg/models/layers/pyramid_pool.py index 3694437..9e26912 100644 --- a/paddlers/models/ppseg/models/layers/pyramid_pool.py +++ b/paddlers/models/ppseg/models/layers/pyramid_pool.py @@ -16,7 +16,7 @@ import paddle import paddle.nn.functional as F from paddle import nn -from paddlers.models.ppseg.models import layers +from paddleseg.models import layers class ASPPModule(nn.Layer): diff --git a/paddlers/models/ppseg/models/layers/tensor_fusion.py b/paddlers/models/ppseg/models/layers/tensor_fusion.py new file mode 100644 index 0000000..da212b4 --- /dev/null +++ b/paddlers/models/ppseg/models/layers/tensor_fusion.py @@ -0,0 +1,285 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.nn.initializer import Constant +from paddleseg.models import layers +from paddleseg.models.layers import tensor_fusion_helper as helper + + +class UAFM(nn.Layer): + """ + The base of Unified Attention Fusion Module. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__() + + self.conv_x = layers.ConvBNReLU( + x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False) + self.conv_out = layers.ConvBNReLU( + y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False) + self.resize_mode = resize_mode + + def check(self, x, y): + assert x.ndim == 4 and y.ndim == 4 + x_h, x_w = x.shape[2:] + y_h, y_w = y.shape[2:] + assert x_h >= y_h and x_w >= y_w + + def prepare(self, x, y): + x = self.prepare_x(x, y) + y = self.prepare_y(x, y) + return x, y + + def prepare_x(self, x, y): + x = self.conv_x(x) + return x + + def prepare_y(self, x, y): + y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode) + return y_up + + def fuse(self, x, y): + out = x + y + out = self.conv_out(out) + return out + + def forward(self, x, y): + """ + Args: + x (Tensor): The low level feature. + y (Tensor): The high level feature. + """ + self.check(x, y) + x, y = self.prepare(x, y) + out = self.fuse(x, y) + return out + + +class UAFM_ChAtten(UAFM): + """ + The UAFM with channel attention, which uses mean and max values. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) + + self.conv_xy_atten = nn.Sequential( + layers.ConvBNAct( + 4 * y_ch, + y_ch // 2, + kernel_size=1, + bias_attr=False, + act_type="leakyrelu"), + layers.ConvBN( + y_ch // 2, y_ch, kernel_size=1, bias_attr=False)) + + def fuse(self, x, y): + """ + Args: + x (Tensor): The low level feature. + y (Tensor): The high level feature. + """ + atten = helper.avg_max_reduce_hw([x, y], self.training) + atten = F.sigmoid(self.conv_xy_atten(atten)) + + out = x * atten + y * (1 - atten) + out = self.conv_out(out) + return out + + +class UAFM_ChAtten_S(UAFM): + """ + The UAFM with channel attention, which uses mean values. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) + + self.conv_xy_atten = nn.Sequential( + layers.ConvBNAct( + 2 * y_ch, + y_ch // 2, + kernel_size=1, + bias_attr=False, + act_type="leakyrelu"), + layers.ConvBN( + y_ch // 2, y_ch, kernel_size=1, bias_attr=False)) + + def fuse(self, x, y): + """ + Args: + x (Tensor): The low level feature. + y (Tensor): The high level feature. + """ + atten = helper.avg_reduce_hw([x, y]) + atten = F.sigmoid(self.conv_xy_atten(atten)) + + out = x * atten + y * (1 - atten) + out = self.conv_out(out) + return out + + +class UAFM_SpAtten(UAFM): + """ + The UAFM with spatial attention, which uses mean and max values. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) + + self.conv_xy_atten = nn.Sequential( + layers.ConvBNReLU( + 4, 2, kernel_size=3, padding=1, bias_attr=False), + layers.ConvBN( + 2, 1, kernel_size=3, padding=1, bias_attr=False)) + self._scale = self.create_parameter( + shape=[1], + attr=ParamAttr(initializer=Constant(value=1.)), + dtype="float32") + self._scale.stop_gradient = True + + def fuse(self, x, y): + """ + Args: + x (Tensor): The low level feature. + y (Tensor): The high level feature. + """ + atten = helper.avg_max_reduce_channel([x, y]) + atten = F.sigmoid(self.conv_xy_atten(atten)) + + out = x * atten + y * (self._scale - atten) + out = self.conv_out(out) + return out + + +class UAFM_SpAtten_S(UAFM): + """ + The UAFM with spatial attention, which uses mean values. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) + + self.conv_xy_atten = nn.Sequential( + layers.ConvBNReLU( + 2, 2, kernel_size=3, padding=1, bias_attr=False), + layers.ConvBN( + 2, 1, kernel_size=3, padding=1, bias_attr=False)) + + def fuse(self, x, y): + """ + Args: + x (Tensor): The low level feature. + y (Tensor): The high level feature. + """ + atten = helper.avg_reduce_channel([x, y]) + atten = F.sigmoid(self.conv_xy_atten(atten)) + + out = x * atten + y * (1 - atten) + out = self.conv_out(out) + return out + + +class UAFMMobile(UAFM): + """ + Unified Attention Fusion Module for mobile. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) + + self.conv_x = layers.SeparableConvBNReLU( + x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False) + self.conv_out = layers.SeparableConvBNReLU( + y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False) + + +class UAFMMobile_SpAtten(UAFM): + """ + Unified Attention Fusion Module with spatial attention for mobile. + Args: + x_ch (int): The channel of x tensor, which is the low level feature. + y_ch (int): The channel of y tensor, which is the high level feature. + out_ch (int): The channel of output tensor. + ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. + resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. + """ + + def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): + super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) + + self.conv_x = layers.SeparableConvBNReLU( + x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False) + self.conv_out = layers.SeparableConvBNReLU( + y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False) + + self.conv_xy_atten = nn.Sequential( + layers.ConvBNReLU( + 4, 2, kernel_size=3, padding=1, bias_attr=False), + layers.ConvBN( + 2, 1, kernel_size=3, padding=1, bias_attr=False)) + + def fuse(self, x, y): + """ + Args: + x (Tensor): The low level feature. + y (Tensor): The high level feature. + """ + atten = helper.avg_max_reduce_channel([x, y]) + atten = F.sigmoid(self.conv_xy_atten(atten)) + + out = x * atten + y * (1 - atten) + out = self.conv_out(out) + return out diff --git a/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py b/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py new file mode 100644 index 0000000..f47d14b --- /dev/null +++ b/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py @@ -0,0 +1,133 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def avg_reduce_hw(x): + # Reduce hw by avg + # Return cat([avg_pool_0, avg_pool_1, ...]) + if not isinstance(x, (list, tuple)): + return F.adaptive_avg_pool2d(x, 1) + elif len(x) == 1: + return F.adaptive_avg_pool2d(x[0], 1) + else: + res = [] + for xi in x: + res.append(F.adaptive_avg_pool2d(xi, 1)) + return paddle.concat(res, axis=1) + + +def avg_max_reduce_hw_helper(x, is_training, use_concat=True): + assert not isinstance(x, (list, tuple)) + avg_pool = F.adaptive_avg_pool2d(x, 1) + # TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training. + if is_training: + max_pool = F.adaptive_max_pool2d(x, 1) + else: + max_pool = paddle.max(x, axis=[2, 3], keepdim=True) + + if use_concat: + res = paddle.concat([avg_pool, max_pool], axis=1) + else: + res = [avg_pool, max_pool] + return res + + +def avg_max_reduce_hw(x, is_training): + # Reduce hw by avg and max + # Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...]) + if not isinstance(x, (list, tuple)): + return avg_max_reduce_hw_helper(x, is_training) + elif len(x) == 1: + return avg_max_reduce_hw_helper(x[0], is_training) + else: + res_avg = [] + res_max = [] + for xi in x: + avg, max = avg_max_reduce_hw_helper(xi, is_training, False) + res_avg.append(avg) + res_max.append(max) + res = res_avg + res_max + return paddle.concat(res, axis=1) + + +def avg_reduce_channel(x): + # Reduce channel by avg + # Return cat([avg_ch_0, avg_ch_1, ...]) + if not isinstance(x, (list, tuple)): + return paddle.mean(x, axis=1, keepdim=True) + elif len(x) == 1: + return paddle.mean(x[0], axis=1, keepdim=True) + else: + res = [] + for xi in x: + res.append(paddle.mean(xi, axis=1, keepdim=True)) + return paddle.concat(res, axis=1) + + +def max_reduce_channel(x): + # Reduce channel by max + # Return cat([max_ch_0, max_ch_1, ...]) + if not isinstance(x, (list, tuple)): + return paddle.max(x, axis=1, keepdim=True) + elif len(x) == 1: + return paddle.max(x[0], axis=1, keepdim=True) + else: + res = [] + for xi in x: + res.append(paddle.max(xi, axis=1, keepdim=True)) + return paddle.concat(res, axis=1) + + +def avg_max_reduce_channel_helper(x, use_concat=True): + # Reduce hw by avg and max, only support single input + assert not isinstance(x, (list, tuple)) + mean_value = paddle.mean(x, axis=1, keepdim=True) + max_value = paddle.max(x, axis=1, keepdim=True) + + if use_concat: + res = paddle.concat([mean_value, max_value], axis=1) + else: + res = [mean_value, max_value] + return res + + +def avg_max_reduce_channel(x): + # Reduce hw by avg and max + # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...]) + if not isinstance(x, (list, tuple)): + return avg_max_reduce_channel_helper(x) + elif len(x) == 1: + return avg_max_reduce_channel_helper(x[0]) + else: + res = [] + for xi in x: + res.extend(avg_max_reduce_channel_helper(xi, False)) + return paddle.concat(res, axis=1) + + +def cat_avg_max_reduce_channel(x): + # Reduce hw by cat+avg+max + assert isinstance(x, (list, tuple)) and len(x) > 1 + + x = paddle.concat(x, axis=1) + + mean_value = paddle.mean(x, axis=1, keepdim=True) + max_value = paddle.max(x, axis=1, keepdim=True) + res = paddle.concat([mean_value, max_value], axis=1) + + return res \ No newline at end of file diff --git a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py index 5a02ba6..4bf7bc7 100644 --- a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -99,7 +99,7 @@ class BCELoss(nn.Layer): raise ValueError( "if type of `weight` is str, it should equal to 'dynamic', but it is {}" .format(self.weight)) - elif isinstance(self.weight, paddle.VarBase): + elif not isinstance(self.weight, paddle.Tensor): raise TypeError( 'The type of `weight` is wrong, it should be Tensor or str, but it is {}' .format(type(self.weight))) diff --git a/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py b/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py index b1807fa..a9d09ef 100644 --- a/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py +++ b/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py index 74af92a..c934a0a 100644 --- a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer): logit = paddle.transpose(logit, [0, 2, 3, 1]) label = label.astype('int64') - # In F.cross_entropy, the ignore_index is invalid, which needs to be fixed. - # When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version. loss = F.cross_entropy( logit, label, @@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer): loss = loss * semantic_weights if self.weight is not None: - _one_hot = F.one_hot(label, logit.shape[-1]) + _one_hot = F.one_hot(label * mask, logit.shape[-1]) coef = paddle.sum(_one_hot * self.weight, axis=-1) else: coef = paddle.ones_like(label) diff --git a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py index af78cf2..eb02389 100644 --- a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py +++ b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py @@ -16,9 +16,9 @@ import numpy as np import paddle from paddle import nn import paddle.nn.functional as F -from scipy.ndimage.interpolation import shift +from scipy.ndimage import shift -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py index 24cde37..d6b49c6 100644 --- a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py +++ b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/dice_loss.py b/paddlers/models/ppseg/models/losses/dice_loss.py index cca8058..e7b8cef 100644 --- a/paddlers/models/ppseg/models/losses/dice_loss.py +++ b/paddlers/models/ppseg/models/losses/dice_loss.py @@ -13,44 +13,65 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component class DiceLoss(nn.Layer): """ - Implements the dice loss function. + The implements of the dice loss. Args: - ignore_index (int64): Specifies a target value that is ignored - and does not contribute to the input gradient. Default ``255``. - smooth (float32): laplace smoothing, - to smooth dice loss and accelerate convergence. following: - https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895 + weight (list[float], optional): The weight for each class. Default: None. + ignore_index (int64): ignore_index (int64, optional): Specifies a target value that + is ignored and does not contribute to the input gradient. Default ``255``. + smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence. + Default: 1.0 """ - def __init__(self, ignore_index=255, smooth=0.): - super(DiceLoss, self).__init__() + def __init__(self, weight=None, ignore_index=255, smooth=1.0): + super().__init__() + self.weight = weight self.ignore_index = ignore_index - self.eps = 1e-5 self.smooth = smooth + self.eps = 1e-8 def forward(self, logits, labels): - labels = paddle.cast(labels, dtype='int32') - labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1]) - labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2]) - labels_one_hot = paddle.cast(labels_one_hot, dtype='float32') + num_class = logits.shape[1] + if self.weight is not None: + assert num_class == len(self.weight), \ + "The lenght of weight should be euqal to the num class" + + mask = labels != self.ignore_index + mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32') + labels[labels == self.ignore_index] = 0 + labels_one_hot = F.one_hot(labels, num_class) + labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2]) logits = F.softmax(logits, axis=1) - mask = (paddle.unsqueeze(labels, 1) != self.ignore_index) - logits = logits * mask - labels_one_hot = labels_one_hot * mask + dice_loss = 0.0 + for i in range(num_class): + dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i], + mask, self.smooth, self.eps) + if self.weight is not None: + dice_loss_i *= self.weight[i] + dice_loss += dice_loss_i + dice_loss = dice_loss / num_class + + return dice_loss - dims = (0, ) + tuple(range(2, labels.ndimension() + 1)) - intersection = paddle.sum(logits * labels_one_hot, dims) - cardinality = paddle.sum(logits + labels_one_hot, dims) - dice_loss = ((2. * intersection + self.smooth) / - (cardinality + self.eps + self.smooth)).mean() - return 1 - dice_loss +def dice_loss_helper(logit, label, mask, smooth, eps): + assert logit.shape == label.shape, \ + "The shape of logit and label should be the same" + logit = paddle.reshape(logit, [0, -1]) + label = paddle.reshape(label, [0, -1]) + mask = paddle.reshape(mask, [0, -1]) + logit *= mask + label *= mask + intersection = paddle.sum(logit * label, axis=1) + cardinality = paddle.sum(logit + label, axis=1) + dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps) + dice_loss = dice_loss.mean() + return dice_loss diff --git a/paddlers/models/ppseg/models/losses/edge_attention_loss.py b/paddlers/models/ppseg/models/losses/edge_attention_loss.py index 44012bc..b000b75 100644 --- a/paddlers/models/ppseg/models/losses/edge_attention_loss.py +++ b/paddlers/models/ppseg/models/losses/edge_attention_loss.py @@ -16,8 +16,8 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import losses +from paddleseg.cvlibs import manager +from paddleseg.models import losses @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/focal_loss.py b/paddlers/models/ppseg/models/losses/focal_loss.py index c578345..4b5edd0 100644 --- a/paddlers/models/ppseg/models/losses/focal_loss.py +++ b/paddlers/models/ppseg/models/losses/focal_loss.py @@ -17,44 +17,116 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component class FocalLoss(nn.Layer): """ - Focal Loss. + The implement of focal loss. - Code referenced from: - https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py + The focal loss requires the label is 0 or 1 for now. Args: - gamma (float): the coefficient of Focal Loss. - ignore_index (int64): Specifies a target value that is ignored + alpha (float, list, optional): The alpha of focal loss. alpha is the weight + of class 1, 1-alpha is the weight of class 0. Default: 0.25 + gamma (float, optional): The gamma of Focal Loss. Default: 2.0 + ignore_index (int64, optional): Specifies a target value that is ignored and does not contribute to the input gradient. Default ``255``. """ - def __init__(self, gamma=2.0, ignore_index=255, edge_label=False): - super(FocalLoss, self).__init__() + def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255): + super().__init__() + self.alpha = alpha self.gamma = gamma self.ignore_index = ignore_index - self.edge_label = edge_label + self.EPS = 1e-10 def forward(self, logit, label): - logit = paddle.reshape( - logit, [logit.shape[0], logit.shape[1], -1]) # N,C,H,W => N,C,H*W - logit = paddle.transpose(logit, [0, 2, 1]) # N,C,H*W => N,H*W,C - logit = paddle.reshape(logit, - [-1, logit.shape[2]]) # N,H*W,C => N*H*W,C - label = paddle.reshape(label, [-1, 1]) - range_ = paddle.arange(0, label.shape[0]) - range_ = paddle.unsqueeze(range_, axis=-1) - label = paddle.cast(label, dtype='int64') - label = paddle.concat([range_, label], axis=-1) - logpt = F.log_softmax(logit) - logpt = paddle.gather_nd(logpt, label) - - pt = paddle.exp(logpt.detach()) - loss = -1 * (1 - pt)**self.gamma * logpt - loss = paddle.mean(loss) - return loss + """ + Forward computation. + + Args: + logit (Tensor): Logit tensor, the data type is float32, float64. Shape is + (N, C, H, W), where C is number of classes. + label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W), + where each value is 0 <= label[i] <= C-1. + Returns: + (Tensor): The average loss. + """ + assert logit.ndim == 4, "The ndim of logit should be 4." + assert logit.shape[1] == 2, "The channel of logit should be 2." + assert label.ndim == 3, "The ndim of label should be 3." + + class_num = logit.shape[1] # class num is 2 + logit = paddle.transpose(logit, [0, 2, 3, 1]) # N,C,H,W => N,H,W,C + + mask = label != self.ignore_index # N,H,W + mask = paddle.unsqueeze(mask, 3) + mask = paddle.cast(mask, 'float32') + mask.stop_gradient = True + + label = F.one_hot(label, class_num) # N,H,W,C + label = paddle.cast(label, logit.dtype) + label.stop_gradient = True + + loss = F.sigmoid_focal_loss( + logit=logit, + label=label, + alpha=self.alpha, + gamma=self.gamma, + reduction='none') + loss = loss * mask + avg_loss = paddle.sum(loss) / ( + paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS) + return avg_loss + + +@manager.LOSSES.add_component +class MultiClassFocalLoss(nn.Layer): + """ + The implement of focal loss for multi class. + + Args: + alpha (float, list, optional): The alpha of focal loss. alpha is the weight + of class 1, 1-alpha is the weight of class 0. Default: 0.25 + gamma (float, optional): The gamma of Focal Loss. Default: 2.0 + ignore_index (int64, optional): Specifies a target value that is ignored + and does not contribute to the input gradient. Default ``255``. + """ + + def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255): + super().__init__() + self.num_class = num_class + self.alpha = alpha + self.gamma = gamma + self.ignore_index = ignore_index + self.EPS = 1e-10 + + def forward(self, logit, label): + """ + Forward computation. + + Args: + logit (Tensor): Logit tensor, the data type is float32, float64. Shape is + (N, C, H, W), where C is number of classes. + label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W), + where each value is 0 <= label[i] <= C-1. + Returns: + (Tensor): The average loss. + """ + assert logit.ndim == 4, "The ndim of logit should be 4." + assert label.ndim == 3, "The ndim of label should be 3." + + logit = paddle.transpose(logit, [0, 2, 3, 1]) + label = label.astype('int64') + ce_loss = F.cross_entropy( + logit, label, ignore_index=self.ignore_index, reduction='none') + + pt = paddle.exp(-ce_loss) + focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss + + mask = paddle.cast(label != self.ignore_index, 'float32') + focal_loss *= mask + avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS) + return avg_loss diff --git a/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py b/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py index 777e57a..6a3d81b 100644 --- a/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py +++ b/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py @@ -13,7 +13,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/kl_loss.py b/paddlers/models/ppseg/models/losses/kl_loss.py index 23a4a6e..30cb925 100644 --- a/paddlers/models/ppseg/models/losses/kl_loss.py +++ b/paddlers/models/ppseg/models/losses/kl_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/l1_loss.py b/paddlers/models/ppseg/models/losses/l1_loss.py index 03f84cf..5d41f36 100644 --- a/paddlers/models/ppseg/models/losses/l1_loss.py +++ b/paddlers/models/ppseg/models/losses/l1_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss): def __init__(self, reduction='mean', ignore_index=255): super().__init__(reduction=reduction) + self.ignore_index = ignore_index + self.EPS = 1e-10 + + def forward(self, input, label): + mask = label != self.ignore_index + mask = paddle.cast(mask, "float32") + label.stop_gradient = True + mask.stop_gradient = True + + output = paddle.nn.functional.l1_loss( + input, label, "none", name=self.name) * mask + + if self.reduction == "mean": + return paddle.mean(output) / (paddle.mean(mask) + self.EPS) + elif self.reduction == "none": + return output + elif self.reduction == "sum": + return paddle.sum(output) + else: + raise ValueError( + "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but " + "received %s, which is not allowed." % self.reduction) diff --git a/paddlers/models/ppseg/models/losses/lovasz_loss.py b/paddlers/models/ppseg/models/losses/lovasz_loss.py index 506bd72..82dfaf9 100644 --- a/paddlers/models/ppseg/models/losses/lovasz_loss.py +++ b/paddlers/models/ppseg/models/losses/lovasz_loss.py @@ -22,7 +22,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels): signs = 2. * labels - 1. signs.stop_gradient = True errors = 1. - logits * signs - errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0, - 'descending', True) + if hasattr(paddle, "_legacy_C_ops"): + errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0, + 'descending', True) + else: + errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0, + 'descending', True) errors_sorted.stop_gradient = False gt_sorted = paddle.gather(labels, perm) grad = lovasz_grad(gt_sorted) @@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'): else: class_pred = probas[:, c] errors = paddle.abs(fg - class_pred) - errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0, - 'descending', True) + if hasattr(paddle, "_legacy_C_ops"): + errors_sorted, perm = paddle._legacy_C_ops.argsort( + errors, 'axis', 0, 'descending', True) + else: + errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0, + 'descending', True) errors_sorted.stop_gradient = False fg_sorted = paddle.gather(fg, perm) diff --git a/paddlers/models/ppseg/models/losses/mean_square_error_loss.py b/paddlers/models/ppseg/models/losses/mean_square_error_loss.py index 3365268..e6fc891 100644 --- a/paddlers/models/ppseg/models/losses/mean_square_error_loss.py +++ b/paddlers/models/ppseg/models/losses/mean_square_error_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/mixed_loss.py b/paddlers/models/ppseg/models/losses/mixed_loss.py index c850fa0..563b5c2 100644 --- a/paddlers/models/ppseg/models/losses/mixed_loss.py +++ b/paddlers/models/ppseg/models/losses/mixed_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py index ab424d4..c69d81e 100644 --- a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer): # get the label after ohem n, c, h, w = logit.shape - label = label.reshape((-1, )) + label = label.reshape((-1, )).astype('int64') valid_mask = (label != self.ignore_index).astype('int64') num_valid = valid_mask.sum() label = label * valid_mask diff --git a/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py b/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py index 56db270..f37fe07 100644 --- a/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py +++ b/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py @@ -16,8 +16,8 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import losses +from paddleseg.cvlibs import manager +from paddleseg.models import losses @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py index 2fc486e..7abe865 100644 --- a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer): elif num_hard >= n_view / 2: num_easy_keep = num_easy num_hard_keep = n_view - num_easy_keep - else: + elif num_easy >= n_view / 2: num_hard_keep = num_hard num_easy_keep = n_view - num_hard_keep + else: + num_hard_keep = num_hard + num_easy_keep = num_easy indices = None if num_hard > 0: diff --git a/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py index ffea59a..d43f6b2 100644 --- a/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/rmi_loss.py b/paddlers/models/ppseg/models/losses/rmi_loss.py index 964df91..271f738 100644 --- a/paddlers/models/ppseg/models/losses/rmi_loss.py +++ b/paddlers/models/ppseg/models/losses/rmi_loss.py @@ -17,7 +17,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager _euler_num = 2.718281828 _pi = 3.14159265 diff --git a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py index 15050e3..b54d545 100644 --- a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py +++ b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py @@ -18,7 +18,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component @@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer): label_num_conn, label_conn = cv2.connectedComponents( labels_np_class.astype(np.uint8)) + origin_pred_num_conn = pred_num_conn if pred_num_conn > 2 * label_num_conn: pred_num_conn = min(pred_num_conn, self.max_pred_num_conn) real_pred_num = pred_num_conn - 1 @@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer): # Connected Components Matching and SC Loss Calculation if real_label_num > 0 and real_pred_num > 0: img_connectivity = compute_class_connectiveity( - pred_conn, label_conn, pred_num_conn, label_num_conn, - pred_i, real_label_num, real_pred_num, zero) + pred_conn, label_conn, pred_num_conn, + origin_pred_num_conn, label_num_conn, pred_i, + real_label_num, real_pred_num, zero) sc_loss += 1 - img_connectivity elif real_label_num == 0 and real_pred_num == 0: # if no connected component, SC Loss = 0, so pass @@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer): def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn, - label_num_conn, pred, real_label_num, - real_pred_num, zero): + origin_pred_num_conn, label_num_conn, pred, + real_label_num, real_pred_num, zero): pred_conn = paddle.to_tensor(pred_conn) label_conn = paddle.to_tensor(label_conn) - pred_conn = F.one_hot(pred_conn, pred_num_conn) + pred_conn = F.one_hot(pred_conn, origin_pred_num_conn) label_conn = F.one_hot(label_conn, label_num_conn) ious = paddle.zeros((real_label_num, real_pred_num)) diff --git a/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py index 1293521..648ed35 100644 --- a/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager +from paddleseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/lraspp.py b/paddlers/models/ppseg/models/lraspp.py new file mode 100644 index 0000000..6f7db42 --- /dev/null +++ b/paddlers/models/ppseg/models/lraspp.py @@ -0,0 +1,162 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager + + +@manager.MODELS.add_component +class LRASPP(nn.Layer): + """ + Semantic segmentation model with a light R-ASPP head. + + The original article refers to + Howard, Andrew, et al. "Searching for mobilenetv3." + (https://arxiv.org/pdf/1909.11065.pdf) + + Args: + num_classes (int): The number of target classes. + backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must + has feat_channels, of which the length is 5. + backbone_indices (List(int), optional): The values indicate the indices of backbone output + used as the input of the LR-ASPP head. + Default: [0, 1, 3]. + lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head. + Default: [32, 64]. + lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head. + Default: 128 + resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head. + Default: bilinear. + use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use + a 49x49 kernel for average pooling. + Default: True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices=[0, 1, 3], + lraspp_head_inter_chs=[32, 64], + lraspp_head_out_ch=128, + resize_mode='bilinear', + use_gap=True, + pretrained=None): + super().__init__() + + # backbone + assert hasattr(backbone, 'feat_channels'), \ + "The backbone should has feat_channels." + assert len(backbone.feat_channels) >= len(backbone_indices), \ + f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ + f"greater than the length of feat_channels ({len(backbone.feat_channels)})." + assert len(backbone.feat_channels) > max(backbone_indices), \ + f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ + f"less than the length of feat_channels ({len(backbone.feat_channels)})." + self.backbone = backbone + + assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \ + "should not be lesser than 1" + + # head + assert len(backbone_indices) == len( + lraspp_head_inter_chs + ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs." + self.backbone_indices = backbone_indices + + self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels, + lraspp_head_inter_chs, lraspp_head_out_ch, + num_classes, resize_mode, use_gap) + + # pretrained + self.pretrained = pretrained + self.init_weight() + + def forward(self, x): + x_hw = paddle.shape(x)[2:] + + feats_backbone = self.backbone(x) + assert len(feats_backbone) >= len(self.backbone_indices), \ + f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ + f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" + + y = self.lraspp_head(feats_backbone) + y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False) + logit_list = [y] + + return logit_list + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + +class LRASPPHead(nn.Layer): + def __init__(self, + indices, + in_chs, + mid_chs, + out_ch, + n_classes, + resize_mode, + use_gap, + align_corners=False): + super().__init__() + + self.indices = indices[-2::-1] + self.in_chs = [in_chs[i] for i in indices[::-1]] + self.mid_chs = mid_chs[::-1] + self.convs = nn.LayerList() + self.conv_ups = nn.LayerList() + for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs): + self.convs.append( + nn.Conv2D( + in_ch, mid_ch, kernel_size=1, bias_attr=False)) + self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1)) + self.conv_w = nn.Sequential( + nn.AvgPool2D( + kernel_size=(49, 49), stride=(16, 20)) + if not use_gap else nn.AdaptiveAvgPool2D(1), + nn.Conv2D( + self.in_chs[0], out_ch, 1, bias_attr=False), + nn.Sigmoid()) + self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1) + self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False) + self.conv_out = nn.Conv2D( + out_ch, n_classes, kernel_size=1, bias_attr=False) + + self.interp = partial( + F.interpolate, mode=resize_mode, align_corners=align_corners) + + def forward(self, in_feat_list): + x = in_feat_list[-1] + + x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:]) + y = self.conv_t(x) + + for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups): + feat = in_feat_list[idx] + y = self.interp(y, paddle.shape(feat)[2:]) + y = paddle.concat([y, conv(feat)], axis=1) + y = conv_up(y) + + y = self.conv_out(y) + return y diff --git a/paddlers/models/ppseg/models/mla_transformer.py b/paddlers/models/ppseg/models/mla_transformer.py index c99a358..d5647e7 100644 --- a/paddlers/models/ppseg/models/mla_transformer.py +++ b/paddlers/models/ppseg/models/mla_transformer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils class MLAHeads(nn.Layer): diff --git a/paddlers/models/ppseg/models/mobileseg.py b/paddlers/models/ppseg/models/mobileseg.py new file mode 100644 index 0000000..8d4ea5b --- /dev/null +++ b/paddlers/models/ppseg/models/mobileseg.py @@ -0,0 +1,289 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager + + +@manager.MODELS.add_component +class MobileSeg(nn.Layer): + """ + The semantic segmentation models for mobile devices. + + Args: + num_classes (int): The number of target classes. + backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must + has feat_channels, of which the length is 5. + backbone_indices (List(int), optional): The values indicate the indices of output of backbone. + Default: [2, 3, 4]. + cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4]. + cm_out_ch (int, optional): The output channel of the last context module. Default: 128. + arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3. + arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128]. + seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head. + Default: [64, 64, 64]. + resize_mode (str, optional): The resize mode for the upsampling operation in decoder. + Default: bilinear. + use_last_fuse (bool, optional): Whether use fusion in the last. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices=[1, 2, 3], + cm_bin_sizes=[1, 2], + cm_out_ch=64, + arm_type='UAFMMobile', + arm_out_chs=[32, 48, 64], + seg_head_inter_chs=[32, 32, 32], + resize_mode='bilinear', + use_last_fuse=False, + pretrained=None): + super().__init__() + + # backbone + assert hasattr(backbone, 'feat_channels'), \ + "The backbone should has feat_channels." + assert len(backbone.feat_channels) >= len(backbone_indices), \ + f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ + f"greater than the length of feat_channels ({len(backbone.feat_channels)})." + assert len(backbone.feat_channels) > max(backbone_indices), \ + f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ + f"less than the length of feat_channels ({len(backbone.feat_channels)})." + self.backbone = backbone + + assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \ + "should not be lesser than 1" + self.backbone_indices = backbone_indices # [..., x16_id, x32_id] + backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices] + + # head + if len(arm_out_chs) == 1: + arm_out_chs = arm_out_chs * len(backbone_indices) + assert len(arm_out_chs) == len(backbone_indices), "The length of " \ + "arm_out_chs and backbone_indices should be equal" + + self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs, + cm_bin_sizes, cm_out_ch, arm_type, + resize_mode, use_last_fuse) + + if len(seg_head_inter_chs) == 1: + seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices) + assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \ + "seg_head_inter_chs and backbone_indices should be equal" + self.seg_heads = nn.LayerList() # [..., head_16, head32] + for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs): + self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes)) + + # pretrained + self.pretrained = pretrained + self.init_weight() + + def forward(self, x): + x_hw = paddle.shape(x)[2:] + + feats_backbone = self.backbone(x) # [x4, x8, x16, x32] + assert len(feats_backbone) >= len(self.backbone_indices), \ + f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ + f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" + + feats_selected = [feats_backbone[i] for i in self.backbone_indices] + feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32] + + if self.training: + logit_list = [] + for x, seg_head in zip(feats_head, self.seg_heads): + x = seg_head(x) + logit_list.append(x) + logit_list = [ + F.interpolate( + x, x_hw, mode='bilinear', align_corners=False) + for x in logit_list + ] + else: + x = self.seg_heads[0](feats_head[0]) + x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False) + logit_list = [x] + + return logit_list + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + +class MobileSegHead(nn.Layer): + """ + The head of MobileSeg. + + Args: + backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone. + arm_out_chs (List(int)): The out channels of each arm module. + cm_bin_sizes (List(int)): The bin size of context module. + cm_out_ch (int): The output channel of the last context module. + arm_type (str): The type of attention refinement module. + resize_mode (str): The resize mode for the upsampling operation in decoder. + """ + + def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch, + arm_type, resize_mode, use_last_fuse): + super().__init__() + + self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch, + cm_out_ch, cm_bin_sizes) + + assert hasattr(layers,arm_type), \ + "Not support arm_type ({})".format(arm_type) + arm_class = eval("layers." + arm_type) + + self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32] + for i in range(len(backbone_out_chs)): + low_chs = backbone_out_chs[i] + high_ch = cm_out_ch if i == len( + backbone_out_chs) - 1 else arm_out_chs[i + 1] + out_ch = arm_out_chs[i] + arm = arm_class( + low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode) + self.arm_list.append(arm) + + self.use_last_fuse = use_last_fuse + if self.use_last_fuse: + self.fuse_convs = nn.LayerList() + for i in range(1, len(arm_out_chs)): + conv = layers.SeparableConvBNReLU( + arm_out_chs[i], + arm_out_chs[0], + kernel_size=3, + bias_attr=False) + self.fuse_convs.append(conv) + self.last_conv = layers.SeparableConvBNReLU( + len(arm_out_chs) * arm_out_chs[0], + arm_out_chs[0], + kernel_size=3, + bias_attr=False) + + def forward(self, in_feat_list): + """ + Args: + in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. + x2, x4 and x8 are optional. + Returns: + out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. + x2, x4 and x8 are optional. + The length of in_feat_list and out_feat_list are the same. + """ + + high_feat = self.cm(in_feat_list[-1]) + out_feat_list = [] + + for i in reversed(range(len(in_feat_list))): + low_feat = in_feat_list[i] + arm = self.arm_list[i] + high_feat = arm(low_feat, high_feat) + out_feat_list.insert(0, high_feat) + + if self.use_last_fuse: + x_list = [out_feat_list[0]] + size = paddle.shape(out_feat_list[0])[2:] + for i, (x, conv + ) in enumerate(zip(out_feat_list[1:], self.fuse_convs)): + x = conv(x) + x = F.interpolate( + x, size=size, mode='bilinear', align_corners=False) + x_list.append(x) + x = paddle.concat(x_list, axis=1) + x = self.last_conv(x) + out_feat_list[0] = x + + return out_feat_list + + +class MobileContextModule(nn.Layer): + """ + Context Module for Mobile Model. + + Args: + in_channels (int): The number of input channels to pyramid pooling module. + inter_channels (int): The number of inter channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3). + align_corners (bool): An argument of F.interpolate. It should be set to False + when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, + in_channels, + inter_channels, + out_channels, + bin_sizes, + align_corners=False): + super().__init__() + + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_out = layers.SeparableConvBNReLU( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=3, + bias_attr=False) + + self.align_corners = align_corners + + def _make_stage(self, in_channels, out_channels, size): + prior = nn.AdaptiveAvgPool2D(output_size=size) + conv = layers.ConvBNReLU( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + return nn.Sequential(prior, conv) + + def forward(self, input): + out = None + input_shape = paddle.shape(input)[2:] + + for stage in self.stages: + x = stage(input) + x = F.interpolate( + x, + input_shape, + mode='bilinear', + align_corners=self.align_corners) + if out is None: + out = x + else: + out += x + + out = self.conv_out(out) + return out + + +class SegHead(nn.Layer): + def __init__(self, in_chan, mid_chan, n_classes): + super().__init__() + self.conv = layers.SeparableConvBNReLU( + in_chan, mid_chan, kernel_size=3, bias_attr=False) + self.conv_out = nn.Conv2D( + mid_chan, n_classes, kernel_size=1, bias_attr=False) + + def forward(self, x): + x = self.conv(x) + x = self.conv_out(x) + return x diff --git a/paddlers/models/ppseg/models/ocrnet.py b/paddlers/models/ppseg/models/ocrnet.py index 1225642..b1eb73f 100644 --- a/paddlers/models/ppseg/models/ocrnet.py +++ b/paddlers/models/ppseg/models/ocrnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers +from paddleseg import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/pfpnnet.py b/paddlers/models/ppseg/models/pfpnnet.py index 3b6c17b..25143ac 100644 --- a/paddlers/models/ppseg/models/pfpnnet.py +++ b/paddlers/models/ppseg/models/pfpnnet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/pointrend.py b/paddlers/models/ppseg/models/pointrend.py index 3f0b4ba..b1f9b01 100644 --- a/paddlers/models/ppseg/models/pointrend.py +++ b/paddlers/models/ppseg/models/pointrend.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/portraitnet.py b/paddlers/models/ppseg/models/portraitnet.py index 23f32df..255eaca 100644 --- a/paddlers/models/ppseg/models/portraitnet.py +++ b/paddlers/models/ppseg/models/portraitnet.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ import paddle.nn as nn -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.cvlibs import manager +from paddleseg import utils +from paddleseg.cvlibs import manager @manager.MODELS.add_component @@ -50,19 +50,9 @@ class PortraitNet(nn.Layer): self.init_weight() def forward(self, x): - img = x[:, :3, :, :] - img_ori = x[:, 3:, :, :] - - feat_list = self.backbone(img) + feat_list = self.backbone(x) logits_list = self.head(feat_list) - - feat_list = self.backbone(img_ori) - logits_ori_list = self.head(feat_list) - - return [ - logits_list[0], logits_ori_list[0], logits_list[1], - logits_ori_list[1] - ] + return [logits_list] def init_weight(self): if self.pretrained is not None: diff --git a/paddlers/models/ppseg/models/pp_liteseg.py b/paddlers/models/ppseg/models/pp_liteseg.py new file mode 100644 index 0000000..8467011 --- /dev/null +++ b/paddlers/models/ppseg/models/pp_liteseg.py @@ -0,0 +1,273 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class PPLiteSeg(nn.Layer): + """ + The PP_LiteSeg implementation based on PaddlePaddle. + + The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu, + Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai, + Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic + Segmentation Model. https://arxiv.org/abs/2204.02681". + + Args: + num_classes (int): The number of target classes. + backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must + has feat_channels, of which the length is 5. + backbone_indices (List(int), optional): The values indicate the indices of output of backbone. + Default: [2, 3, 4]. + arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3. + cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4]. + cm_out_ch (int, optional): The output channel of the last context module. Default: 128. + arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128]. + seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head. + Default: [64, 64, 64]. + resize_mode (str, optional): The resize mode for the upsampling operation in decoder. + Default: bilinear. + pretrained (str, optional): The path or url of pretrained model. Default: None. + + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices=[2, 3, 4], + arm_type='UAFM_SpAtten', + cm_bin_sizes=[1, 2, 4], + cm_out_ch=128, + arm_out_chs=[64, 96, 128], + seg_head_inter_chs=[64, 64, 64], + resize_mode='bilinear', + pretrained=None): + super().__init__() + + # backbone + assert hasattr(backbone, 'feat_channels'), \ + "The backbone should has feat_channels." + assert len(backbone.feat_channels) >= len(backbone_indices), \ + f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ + f"greater than the length of feat_channels ({len(backbone.feat_channels)})." + assert len(backbone.feat_channels) > max(backbone_indices), \ + f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ + f"less than the length of feat_channels ({len(backbone.feat_channels)})." + self.backbone = backbone + + assert len(backbone_indices) > 1, "The lenght of backbone_indices " \ + "should be greater than 1" + self.backbone_indices = backbone_indices # [..., x16_id, x32_id] + backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices] + + # head + if len(arm_out_chs) == 1: + arm_out_chs = arm_out_chs * len(backbone_indices) + assert len(arm_out_chs) == len(backbone_indices), "The length of " \ + "arm_out_chs and backbone_indices should be equal" + + self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs, + cm_bin_sizes, cm_out_ch, arm_type, + resize_mode) + + if len(seg_head_inter_chs) == 1: + seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices) + assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \ + "seg_head_inter_chs and backbone_indices should be equal" + self.seg_heads = nn.LayerList() # [..., head_16, head32] + for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs): + self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes)) + + # pretrained + self.pretrained = pretrained + self.init_weight() + + def forward(self, x): + x_hw = paddle.shape(x)[2:] + + feats_backbone = self.backbone(x) # [x2, x4, x8, x16, x32] + assert len(feats_backbone) >= len(self.backbone_indices), \ + f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ + f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" + + feats_selected = [feats_backbone[i] for i in self.backbone_indices] + + feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32] + + if self.training: + logit_list = [] + + for x, seg_head in zip(feats_head, self.seg_heads): + x = seg_head(x) + logit_list.append(x) + + logit_list = [ + F.interpolate( + x, x_hw, mode='bilinear', align_corners=False) + for x in logit_list + ] + else: + x = self.seg_heads[0](feats_head[0]) + x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False) + logit_list = [x] + + return logit_list + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + +class PPLiteSegHead(nn.Layer): + """ + The head of PPLiteSeg. + + Args: + backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone. + arm_out_chs (List(int)): The out channels of each arm module. + cm_bin_sizes (List(int)): The bin size of context module. + cm_out_ch (int): The output channel of the last context module. + arm_type (str): The type of attention refinement module. + resize_mode (str): The resize mode for the upsampling operation in decoder. + """ + + def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch, + arm_type, resize_mode): + super().__init__() + + self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch, + cm_bin_sizes) + + assert hasattr(layers,arm_type), \ + "Not support arm_type ({})".format(arm_type) + arm_class = eval("layers." + arm_type) + + self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32] + for i in range(len(backbone_out_chs)): + low_chs = backbone_out_chs[i] + high_ch = cm_out_ch if i == len( + backbone_out_chs) - 1 else arm_out_chs[i + 1] + out_ch = arm_out_chs[i] + arm = arm_class( + low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode) + self.arm_list.append(arm) + + def forward(self, in_feat_list): + """ + Args: + in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. + x2, x4 and x8 are optional. + Returns: + out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. + x2, x4 and x8 are optional. + The length of in_feat_list and out_feat_list are the same. + """ + + high_feat = self.cm(in_feat_list[-1]) + out_feat_list = [] + + for i in reversed(range(len(in_feat_list))): + low_feat = in_feat_list[i] + arm = self.arm_list[i] + high_feat = arm(low_feat, high_feat) + out_feat_list.insert(0, high_feat) + + return out_feat_list + + +class PPContextModule(nn.Layer): + """ + Simple Context module. + + Args: + in_channels (int): The number of input channels to pyramid pooling module. + inter_channels (int): The number of inter channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3). + align_corners (bool): An argument of F.interpolate. It should be set to False + when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, + in_channels, + inter_channels, + out_channels, + bin_sizes, + align_corners=False): + super().__init__() + + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_out = layers.ConvBNReLU( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=3, + padding=1) + + self.align_corners = align_corners + + def _make_stage(self, in_channels, out_channels, size): + prior = nn.AdaptiveAvgPool2D(output_size=size) + conv = layers.ConvBNReLU( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + return nn.Sequential(prior, conv) + + def forward(self, input): + out = None + input_shape = paddle.shape(input)[2:] + + for stage in self.stages: + x = stage(input) + x = F.interpolate( + x, + input_shape, + mode='bilinear', + align_corners=self.align_corners) + if out is None: + out = x + else: + out += x + + out = self.conv_out(out) + return out + + +class SegHead(nn.Layer): + def __init__(self, in_chan, mid_chan, n_classes): + super().__init__() + self.conv = layers.ConvBNReLU( + in_chan, + mid_chan, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False) + self.conv_out = nn.Conv2D( + mid_chan, n_classes, kernel_size=1, bias_attr=False) + + def forward(self, x): + x = self.conv(x) + x = self.conv_out(x) + return x diff --git a/paddlers/models/ppseg/models/pphumanseg_lite.py b/paddlers/models/ppseg/models/pphumanseg_lite.py index af52712..c206651 100644 --- a/paddlers/models/ppseg/models/pphumanseg_lite.py +++ b/paddlers/models/ppseg/models/pphumanseg_lite.py @@ -16,24 +16,28 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils __all__ = ['PPHumanSegLite'] @manager.MODELS.add_component class PPHumanSegLite(nn.Layer): - "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals." + "A self-developed ultra lightweight model from PaddleSeg, is suitable for real-time scene segmentation on web or mobile terminals." - def __init__(self, num_classes, pretrained=None, align_corners=False): + def __init__(self, + num_classes, + in_channels=3, + pretrained=None, + align_corners=False): super().__init__() self.pretrained = pretrained self.num_classes = num_classes self.align_corners = align_corners - self.conv_bn0 = _ConvBNReLU(3, 36, 3, 2, 1) + self.conv_bn0 = _ConvBNReLU(in_channels, 36, 3, 2, 1) self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0) self.block1 = nn.Sequential( diff --git a/paddlers/models/ppseg/models/pspnet.py b/paddlers/models/ppseg/models/pspnet.py index 5a6f6b7..77283b6 100644 --- a/paddlers/models/ppseg/models/pspnet.py +++ b/paddlers/models/ppseg/models/pspnet.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/segformer.py b/paddlers/models/ppseg/models/segformer.py index 7a7a1db..af9c27b 100644 --- a/paddlers/models/ppseg/models/segformer.py +++ b/paddlers/models/ppseg/models/segformer.py @@ -7,9 +7,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils class MLP(nn.Layer): @@ -127,51 +127,3 @@ class SegFormer(nn.Layer): mode='bilinear', align_corners=self.align_corners) ] - - -@manager.MODELS.add_component -def SegFormer_B0(**kwargs): - return SegFormer( - backbone=manager.BACKBONES['MixVisionTransformer_B0'](), - embedding_dim=256, - **kwargs) - - -@manager.MODELS.add_component -def SegFormer_B1(**kwargs): - return SegFormer( - backbone=manager.BACKBONES['MixVisionTransformer_B1'](), - embedding_dim=256, - **kwargs) - - -@manager.MODELS.add_component -def SegFormer_B2(**kwargs): - return SegFormer( - backbone=manager.BACKBONES['MixVisionTransformer_B2'](), - embedding_dim=768, - **kwargs) - - -@manager.MODELS.add_component -def SegFormer_B3(**kwargs): - return SegFormer( - backbone=manager.BACKBONES['MixVisionTransformer_B3'](), - embedding_dim=768, - **kwargs) - - -@manager.MODELS.add_component -def SegFormer_B4(**kwargs): - return SegFormer( - backbone=manager.BACKBONES['MixVisionTransformer_B4'](), - embedding_dim=768, - **kwargs) - - -@manager.MODELS.add_component -def SegFormer_B5(**kwargs): - return SegFormer( - backbone=manager.BACKBONES['MixVisionTransformer_B5'](), - embedding_dim=768, - **kwargs) diff --git a/paddlers/models/ppseg/models/segmenter.py b/paddlers/models/ppseg/models/segmenter.py index 4bfb743..84f9495 100644 --- a/paddlers/models/ppseg/models/segmenter.py +++ b/paddlers/models/ppseg/models/segmenter.py @@ -17,9 +17,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddlers.models.ppseg.utils import utils -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models.backbones import vision_transformer, transformer_utils +from paddleseg.utils import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models.backbones import vision_transformer, transformer_utils __all__ = ['LinearSegmenter', 'MaskSegmenter'] diff --git a/paddlers/models/ppseg/models/segnet.py b/paddlers/models/ppseg/models/segnet.py index d35bf9d..e861d13 100644 --- a/paddlers/models/ppseg/models/segnet.py +++ b/paddlers/models/ppseg/models/segnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component @@ -32,14 +32,14 @@ class SegNet(nn.Layer): num_classes (int): The unique number of target classes. """ - def __init__(self, num_classes, pretrained=None): + def __init__(self, num_classes, in_channels=3, pretrained=None): super().__init__() # Encoder Module self.enco1 = nn.Sequential( layers.ConvBNReLU( - 3, 64, 3, padding=1), + in_channels, 64, 3, padding=1), layers.ConvBNReLU( 64, 64, 3, padding=1)) diff --git a/paddlers/models/ppseg/models/setr.py b/paddlers/models/ppseg/models/setr.py index 8475987..0b95f16 100644 --- a/paddlers/models/ppseg/models/setr.py +++ b/paddlers/models/ppseg/models/setr.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddlers.models.ppseg.cvlibs import manager, param_init -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/sfnet.py b/paddlers/models/ppseg/models/sfnet.py index b9f9924..d04dfbe 100644 --- a/paddlers/models/ppseg/models/sfnet.py +++ b/paddlers/models/ppseg/models/sfnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/sinet.py b/paddlers/models/ppseg/models/sinet.py new file mode 100644 index 0000000..9381b44 --- /dev/null +++ b/paddlers/models/ppseg/models/sinet.py @@ -0,0 +1,449 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils + +CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]], + [[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]], + [[3, 2], [3, 4]], [[3, 1], [5, 2]]] + + +@manager.MODELS.add_component +class SINet(nn.Layer): + """ + The SINet implementation based on PaddlePaddle. + + The original article refers to + Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak + "SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules + and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099). + + Args: + num_classes (int): The unique number of target classes. + config (List, optional): The config for SINet. Defualt use the CFG. + stage2_blocks (int, optional): The num of blocks in stage2. Default: 2. + stage3_blocks (int, optional): The num of blocks in stage3. Default: 8. + in_channels (int, optional): The channels of input image. Default: 3. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes=2, + config=CFG, + stage2_blocks=2, + stage3_blocks=8, + in_channels=3, + pretrained=None): + super().__init__() + dim1 = 16 + dim2 = 48 + dim3 = 96 + + self.encoder = SINetEncoder(config, in_channels, num_classes, + stage2_blocks, stage3_blocks) + + self.up = nn.UpsamplingBilinear2D(scale_factor=2) + self.bn_3 = nn.BatchNorm(num_classes) + + self.level2_C = CBR(dim2, num_classes, 1, 1) + self.bn_2 = nn.BatchNorm(num_classes) + + self.classifier = nn.Sequential( + nn.UpsamplingBilinear2D(scale_factor=2), + nn.Conv2D( + num_classes, num_classes, 3, 1, 1, bias_attr=False)) + + self.pretrained = pretrained + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def forward(self, input): + output1 = self.encoder.level1(input) # x2 + + output2_0 = self.encoder.level2_0(output1) # x4 + for i, layer in enumerate(self.encoder.level2): + if i == 0: + output2 = layer(output2_0) + else: + output2 = layer(output2) + output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1)) + + output3_0 = self.encoder.level3_0(output2_cat) # x8 + for i, layer in enumerate(self.encoder.level3): + if i == 0: + output3 = layer(output3_0) + else: + output3 = layer(output3) + output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1)) + enc_final = self.encoder.classifier(output3_cat) # x8 + + dec_stage1 = self.bn_3(self.up(enc_final)) # x4 + stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1) + stage1_gate = (1 - stage1_confidence).unsqueeze(1) + + dec_stage2_0 = self.level2_C(output2) # x4 + dec_stage2 = self.bn_2( + self.up(dec_stage2_0 * stage1_gate + dec_stage1)) # x2 + + out = self.classifier(dec_stage2) # x + + return [out] + + +def channel_shuffle(x, groups): + x_shape = paddle.shape(x) + batch_size, height, width = x_shape[0], x_shape[2], x_shape[3] + num_channels = x.shape[1] + channels_per_group = num_channels // groups + + # reshape + x = paddle.reshape( + x=x, shape=[batch_size, groups, channels_per_group, height, width]) + + # transpose + x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) + + # flatten + x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width]) + + return x + + +class CBR(nn.Layer): + ''' + This class defines the convolution layer with batch normalization and PReLU activation + ''' + + def __init__(self, nIn, nOut, kSize, stride=1): + super().__init__() + padding = int((kSize - 1) / 2) + + self.conv = nn.Conv2D( + nIn, + nOut, (kSize, kSize), + stride=stride, + padding=(padding, padding), + bias_attr=False) + self.bn = nn.BatchNorm(nOut) + self.act = nn.PReLU(nOut) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + output = self.act(output) + return output + + +class SeparableCBR(nn.Layer): + ''' + This class defines the convolution layer with batch normalization and PReLU activation + ''' + + def __init__(self, nIn, nOut, kSize, stride=1): + super().__init__() + padding = int((kSize - 1) / 2) + + self.conv = nn.Sequential( + nn.Conv2D( + nIn, + nIn, (kSize, kSize), + stride=stride, + padding=(padding, padding), + groups=nIn, + bias_attr=False), + nn.Conv2D( + nIn, nOut, kernel_size=1, stride=1, bias_attr=False), ) + self.bn = nn.BatchNorm(nOut) + self.act = nn.PReLU(nOut) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + output = self.act(output) + return output + + +class SqueezeBlock(nn.Layer): + def __init__(self, exp_size, divide=4.0): + super(SqueezeBlock, self).__init__() + + if divide > 1: + self.dense = nn.Sequential( + nn.Linear(exp_size, int(exp_size / divide)), + nn.PReLU(int(exp_size / divide)), + nn.Linear(int(exp_size / divide), exp_size), + nn.PReLU(exp_size), ) + else: + self.dense = nn.Sequential( + nn.Linear(exp_size, exp_size), nn.PReLU(exp_size)) + + def forward(self, x): + alpha = F.adaptive_avg_pool2d(x, [1, 1]) + alpha = paddle.squeeze(alpha, axis=[2, 3]) + alpha = self.dense(alpha) + alpha = paddle.unsqueeze(alpha, axis=[2, 3]) + out = x * alpha + return out + + +class SESeparableCBR(nn.Layer): + ''' + This class defines the convolution layer with batch normalization and PReLU activation + ''' + + def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0): + super().__init__() + padding = int((kSize - 1) / 2) + + self.conv = nn.Sequential( + nn.Conv2D( + nIn, + nIn, (kSize, kSize), + stride=stride, + padding=(padding, padding), + groups=nIn, + bias_attr=False), + SqueezeBlock( + nIn, divide=divide), + nn.Conv2D( + nIn, nOut, kernel_size=1, stride=1, bias_attr=False), ) + + self.bn = nn.BatchNorm(nOut) + self.act = nn.PReLU(nOut) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + output = self.act(output) + return output + + +class BR(nn.Layer): + ''' + This class groups the batch normalization and PReLU activation + ''' + + def __init__(self, nOut): + super().__init__() + self.bn = nn.BatchNorm(nOut) + self.act = nn.PReLU(nOut) + + def forward(self, input): + output = self.bn(input) + output = self.act(output) + return output + + +class CB(nn.Layer): + ''' + This class groups the convolution and batch normalization + ''' + + def __init__(self, nIn, nOut, kSize, stride=1): + super().__init__() + padding = int((kSize - 1) / 2) + self.conv = nn.Conv2D( + nIn, + nOut, (kSize, kSize), + stride=stride, + padding=(padding, padding), + bias_attr=False) + self.bn = nn.BatchNorm(nOut) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return output + + +class C(nn.Layer): + ''' + This class is for a convolutional layer. + ''' + + def __init__(self, nIn, nOut, kSize, stride=1, group=1): + super().__init__() + padding = int((kSize - 1) / 2) + self.conv = nn.Conv2D( + nIn, + nOut, (kSize, kSize), + stride=stride, + padding=(padding, padding), + bias_attr=False, + groups=group) + + def forward(self, input): + output = self.conv(input) + return output + + +class S2block(nn.Layer): + ''' + This class defines the dilated convolution. + ''' + + def __init__(self, nIn, nOut, kSize, avgsize): + super().__init__() + + self.resolution_down = False + if avgsize > 1: + self.resolution_down = True + self.down_res = nn.AvgPool2D(avgsize, avgsize) + self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize) + self.avgsize = avgsize + + padding = int((kSize - 1) / 2) + self.conv = nn.Sequential( + nn.Conv2D( + nIn, + nIn, + kernel_size=(kSize, kSize), + stride=1, + padding=(padding, padding), + groups=nIn, + bias_attr=False), + nn.BatchNorm(nIn)) + + self.act_conv1x1 = nn.Sequential( + nn.PReLU(nIn), + nn.Conv2D( + nIn, nOut, kernel_size=1, stride=1, bias_attr=False), ) + + self.bn = nn.BatchNorm(nOut) + + def forward(self, input): + if self.resolution_down: + input = self.down_res(input) + output = self.conv(input) + + output = self.act_conv1x1(output) + if self.resolution_down: + output = self.up_res(output) + return self.bn(output) + + +class S2module(nn.Layer): + ''' + This class defines the ESP block, which is based on the following principle + Reduce ---> Split ---> Transform --> Merge + ''' + + def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]): + super().__init__() + + group_n = len(config) + assert group_n == 2 + n = int(nOut / group_n) + n1 = nOut - group_n * n + + self.c1 = C(nIn, n, 1, 1, group=group_n) + # self.c1 = C(nIn, n, 1, 1) + + for i in range(group_n): + if i == 0: + self.layer_0 = S2block( + n, n + n1, kSize=config[i][0], avgsize=config[i][1]) + else: + self.layer_1 = S2block( + n, n, kSize=config[i][0], avgsize=config[i][1]) + + self.BR = BR(nOut) + self.add = add + self.group_n = group_n + + def forward(self, input): + output1 = self.c1(input) + output1 = channel_shuffle(output1, self.group_n) + res_0 = self.layer_0(output1) + res_1 = self.layer_1(output1) + combine = paddle.concat([res_0, res_1], 1) + + if self.add: + combine = input + combine + output = self.BR(combine) + return output + + +class SINetEncoder(nn.Layer): + def __init__(self, + config, + in_channels=3, + num_classes=2, + stage2_blocks=2, + stage3_blocks=8): + super().__init__() + assert stage2_blocks == 2 + dim1 = 16 + dim2 = 48 + dim3 = 96 + + self.level1 = CBR(in_channels, 12, 3, 2) + + self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1) + + self.level2 = nn.LayerList() + for i in range(0, stage2_blocks): + if i == 0: + self.level2.append( + S2module( + dim1, dim2, config=config[i], add=False)) + else: + self.level2.append(S2module(dim2, dim2, config=config[i])) + self.BR2 = BR(dim2 + dim1) + + self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2) + self.level3 = nn.LayerList() + for i in range(0, stage3_blocks): + if i == 0: + self.level3.append( + S2module( + dim2, dim3, config=config[2 + i], add=False)) + else: + self.level3.append(S2module(dim3, dim3, config=config[2 + i])) + self.BR3 = BR(dim3 + dim2) + + self.classifier = C(dim3 + dim2, num_classes, 1, 1) + + def forward(self, input): + output1 = self.level1(input) # x2 + + output2_0 = self.level2_0(output1) # x4 + for i, layer in enumerate(self.level2): + if i == 0: + output2 = layer(output2_0) + else: + output2 = layer(output2) + + output3_0 = self.level3_0( + self.BR2(paddle.concat([output2_0, output2], 1))) # x8 + for i, layer in enumerate(self.level3): + if i == 0: + output3 = layer(output3_0) + else: + output3 = layer(output3) + + output3_cat = self.BR3(paddle.concat([output3_0, output3], 1)) + classifier = self.classifier(output3_cat) + return classifier diff --git a/paddlers/models/ppseg/models/stdcseg.py b/paddlers/models/ppseg/models/stdcseg.py index 0b0b044..a772881 100644 --- a/paddlers/models/ppseg/models/stdcseg.py +++ b/paddlers/models/ppseg/models/stdcseg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import utils +from paddleseg import utils +from paddleseg.models import layers +from paddleseg.cvlibs import manager +from paddleseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/topformer.py b/paddlers/models/ppseg/models/topformer.py new file mode 100644 index 0000000..29241cd --- /dev/null +++ b/paddlers/models/ppseg/models/topformer.py @@ -0,0 +1,155 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils +from paddleseg.models.backbones.top_transformer import ConvBNAct + + +@manager.MODELS.add_component +class TopFormer(nn.Layer): + """ + The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle. + + The original article refers to + Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu, + and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation." + In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, + pp. 12083-12093. 2022. + + This model refers to https://github.com/hustvl/TopFormer. + + Args: + num_classes(int,optional): The unique number of target classes. + backbone(nn.Layer): Backbone network. + head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False. + align_corners (bool, optional): Set the align_corners in resizing. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + head_use_dw=False, + align_corners=False, + pretrained=None): + super().__init__() + self.backbone = backbone + + head_in_channels = [ + i for i in backbone.injection_out_channels if i is not None + ] + self.decode_head = TopFormerHead( + num_classes=num_classes, + in_channels=head_in_channels, + use_dw=head_use_dw, + align_corners=align_corners) + + self.align_corners = align_corners + self.pretrained = pretrained + self.init_weight() + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + def forward(self, x): + x_hw = paddle.shape(x)[2:] + x = self.backbone(x) # len=3, 1/8,1/16,1/32 + x = self.decode_head(x) + x = F.interpolate( + x, x_hw, mode='bilinear', align_corners=self.align_corners) + + return [x] + + +class TopFormerHead(nn.Layer): + def __init__(self, + num_classes, + in_channels, + in_index=[0, 1, 2], + in_transform='multiple_select', + use_dw=False, + dropout_ratio=0.1, + align_corners=False): + super().__init__() + + self.in_index = in_index + self.in_transform = in_transform + self.align_corners = align_corners + + self._init_inputs(in_channels, in_index, in_transform) + self.linear_fuse = ConvBNAct( + in_channels=self.last_channels, + out_channels=self.last_channels, + kernel_size=1, + stride=1, + groups=self.last_channels if use_dw else 1, + act=nn.ReLU) + self.dropout = nn.Dropout2D(dropout_ratio) + self.conv_seg = nn.Conv2D( + self.last_channels, num_classes, kernel_size=1) + + def _init_inputs(self, in_channels, in_index, in_transform): + assert in_transform in [None, 'resize_concat', 'multiple_select'] + if in_transform is not None: + assert len(in_channels) == len(in_index) + if in_transform == 'resize_concat': + self.last_channels = sum(in_channels) + else: + self.last_channels = in_channels[0] + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.last_channels = in_channels + + def _transform_inputs(self, inputs): + if self.in_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + inputs = [ + F.interpolate( + input_data=x, + size=paddle.shape(inputs[0])[2:], + mode='bilinear', + align_corners=self.align_corners) for x in inputs + ] + inputs = paddle.concat(inputs, axis=1) + elif self.in_transform == 'multiple_select': + inputs_tmp = [inputs[i] for i in self.in_index] + inputs = inputs_tmp[0] + for x in inputs_tmp[1:]: + x = F.interpolate( + x, + size=paddle.shape(inputs)[2:], + mode='bilinear', + align_corners=self.align_corners) + inputs += x + else: + inputs = inputs[self.in_index] + + return inputs + + def forward(self, x): + x = self._transform_inputs(x) + x = self.linear_fuse(x) + x = self.dropout(x) + x = self.conv_seg(x) + return x diff --git a/paddlers/models/ppseg/models/u2net.py b/paddlers/models/ppseg/models/u2net.py index 2f24d06..c0f12b2 100644 --- a/paddlers/models/ppseg/models/u2net.py +++ b/paddlers/models/ppseg/models/u2net.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers -from paddlers.models.ppseg.utils import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils __all__ = ['U2Net', 'U2Netp'] @@ -34,15 +34,15 @@ class U2Net(nn.Layer): Args: num_classes (int): The unique number of target classes. - in_ch (int, optional): Input channels. Default: 3. + in_channels (int, optional): Input channels. Default: 3. pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None. """ - def __init__(self, num_classes, in_ch=3, pretrained=None): + def __init__(self, num_classes, in_channels=3, pretrained=None): super(U2Net, self).__init__() - self.stage1 = RSU7(in_ch, 32, 64) + self.stage1 = RSU7(in_channels, 32, 64) self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True) self.stage2 = RSU6(64, 32, 128) @@ -153,10 +153,10 @@ class U2Net(nn.Layer): class U2Netp(nn.Layer): """Please Refer to U2Net above.""" - def __init__(self, num_classes, in_ch=3, pretrained=None): + def __init__(self, num_classes, in_channels=3, pretrained=None): super(U2Netp, self).__init__() - self.stage1 = RSU7(in_ch, 16, 64) + self.stage1 = RSU7(in_channels, 16, 64) self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True) self.stage2 = RSU6(64, 16, 64) diff --git a/paddlers/models/ppseg/models/unet.py b/paddlers/models/ppseg/models/unet.py index bb2a99a..8ef1cd8 100644 --- a/paddlers/models/ppseg/models/unet.py +++ b/paddlers/models/ppseg/models/unet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg import utils -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models import layers +from paddleseg import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers @manager.MODELS.add_component @@ -36,18 +36,19 @@ class UNet(nn.Layer): is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling. If False, use resize_bilinear. Default: False. + in_channels (int, optional): The channels of input image. Default: 3. pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None. """ def __init__(self, num_classes, - input_channel=3, align_corners=False, use_deconv=False, + in_channels=3, pretrained=None): super().__init__() - self.encode = Encoder(input_channel) + self.encode = Encoder(in_channels) self.decode = Decoder(align_corners, use_deconv=use_deconv) self.cls = self.conv = nn.Conv2D( in_channels=64, @@ -73,12 +74,11 @@ class UNet(nn.Layer): class Encoder(nn.Layer): - def __init__(self, input_channel=3): + def __init__(self, in_channels=3): super().__init__() self.double_conv = nn.Sequential( - layers.ConvBNReLU(input_channel, 64, 3), - layers.ConvBNReLU(64, 64, 3)) + layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3)) down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]] self.down_sample_list = nn.LayerList([ self.down_sampling(channel[0], channel[1]) diff --git a/paddlers/models/ppseg/models/unet_3plus.py b/paddlers/models/ppseg/models/unet_3plus.py index 57e55b9..55c4520 100644 --- a/paddlers/models/ppseg/models/unet_3plus.py +++ b/paddlers/models/ppseg/models/unet_3plus.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm -from paddlers.models.ppseg.cvlibs.param_init import kaiming_normal_init +from paddleseg.cvlibs import manager +from paddleseg.models.layers.layer_libs import SyncBatchNorm +from paddleseg.cvlibs.param_init import kaiming_normal_init @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/unet_plusplus.py b/paddlers/models/ppseg/models/unet_plusplus.py index e0cfe1b..0d728d6 100644 --- a/paddlers/models/ppseg/models/unet_plusplus.py +++ b/paddlers/models/ppseg/models/unet_plusplus.py @@ -15,10 +15,10 @@ import paddle import paddle.nn as nn -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.utils import load_entire_model -from paddlers.models.ppseg.cvlibs.param_init import kaiming_normal_init -from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm +from paddleseg.cvlibs import manager +from paddleseg.utils import load_entire_model +from paddleseg.cvlibs.param_init import kaiming_normal_init +from paddleseg.models.layers.layer_libs import SyncBatchNorm @manager.MODELS.add_component @@ -31,8 +31,8 @@ class UNetPlusPlus(nn.Layer): (https://arxiv.org/abs/1807.10165). Args: - in_channels (int): The channel number of input image. num_classes (int): The unique number of target classes. + in_channels (int, optional): The channel number of input image. Default: 3. use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling. If False, use resize_bilinear. Default: False. align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature @@ -42,8 +42,8 @@ class UNetPlusPlus(nn.Layer): """ def __init__(self, - in_channels, num_classes, + in_channels=3, use_deconv=False, align_corners=False, pretrained=None, diff --git a/paddlers/models/ppseg/models/upernet.py b/paddlers/models/ppseg/models/upernet.py new file mode 100644 index 0000000..e024632 --- /dev/null +++ b/paddlers/models/ppseg/models/upernet.py @@ -0,0 +1,173 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg import utils +from paddleseg.cvlibs import manager +from paddleseg.models import layers + + +@manager.MODELS.add_component +class UPerNet(nn.Layer): + """ + The UPerNet implementation based on PaddlePaddle. + + The original article refers to + Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding" + (https://arxiv.org/abs/1807.10221). + + Args: + num_classes (int): The unique number of target classes. + backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. + backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone. + channels (int): The channels of inter layers. Default: 512. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + dropout_prob (float): Dropout ratio for upernet head. Default: 0.1. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices, + channels=512, + enable_auxiliary_loss=False, + align_corners=False, + dropout_prob=0.1, + pretrained=None): + super().__init__() + self.backbone = backbone + self.backbone_indices = backbone_indices + self.in_channels = [ + self.backbone.feat_channels[i] for i in backbone_indices + ] + self.align_corners = align_corners + self.pretrained = pretrained + self.enable_auxiliary_loss = enable_auxiliary_loss + + fpn_inplanes = [ + self.backbone.feat_channels[i] for i in backbone_indices + ] + self.head = UPerNetHead( + num_classes=num_classes, + fpn_inplanes=fpn_inplanes, + dropout_prob=dropout_prob, + channels=channels, + enable_auxiliary_loss=self.enable_auxiliary_loss) + self.init_weight() + + def forward(self, x): + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + logit_list = [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + return logit_list + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + +class UPerNetHead(nn.Layer): + def __init__(self, + num_classes, + fpn_inplanes, + channels, + dropout_prob=0.1, + enable_auxiliary_loss=False, + align_corners=True): + super(UPerNetHead, self).__init__() + self.align_corners = align_corners + self.ppm = layers.PPModule( + in_channels=fpn_inplanes[-1], + out_channels=channels, + bin_sizes=(1, 2, 3, 6), + dim_reduction=True, + align_corners=True) + self.enable_auxiliary_loss = enable_auxiliary_loss + self.lateral_convs = nn.LayerList() + self.fpn_convs = nn.LayerList() + + for fpn_inplane in fpn_inplanes[:-1]: + self.lateral_convs.append( + layers.ConvBNReLU(fpn_inplane, channels, 1)) + self.fpn_convs.append( + layers.ConvBNReLU( + channels, channels, 3, bias_attr=False)) + + if self.enable_auxiliary_loss: + self.aux_head = layers.AuxLayer( + fpn_inplanes[2], + fpn_inplanes[2], + num_classes, + dropout_prob=dropout_prob) + + self.fpn_bottleneck = layers.ConvBNReLU( + len(fpn_inplanes) * channels, channels, 3, padding=1) + + self.conv_last = nn.Sequential( + layers.ConvBNReLU( + len(fpn_inplanes) * channels, channels, 3, bias_attr=False), + nn.Conv2D( + channels, num_classes, kernel_size=1)) + self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1) + + def forward(self, inputs): + laterals = [] + for i, lateral_conv in enumerate(self.lateral_convs): + laterals.append(lateral_conv(inputs[i])) + + laterals.append(self.ppm(inputs[-1])) + fpn_levels = len(laterals) + for i in range(fpn_levels - 1, 0, -1): + prev_shape = paddle.shape(laterals[i - 1]) + laterals[i - 1] = laterals[i - 1] + F.interpolate( + laterals[i], + size=prev_shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + fpn_outs = [] + for i in range(fpn_levels - 1): + fpn_outs.append(self.fpn_convs[i](laterals[i])) + fpn_outs.append(laterals[-1]) + + for i in range(fpn_levels - 1, 0, -1): + fpn_outs[i] = F.interpolate( + fpn_outs[i], + size=paddle.shape(fpn_outs[0])[2:], + mode='bilinear', + align_corners=self.align_corners) + fuse_out = paddle.concat(fpn_outs, axis=1) + x = self.fpn_bottleneck(fuse_out) + + x = self.conv_seg(x) + logits_list = [x] + if self.enable_auxiliary_loss: + aux_out = self.aux_head(inputs[2]) + logits_list.append(aux_out) + return logits_list + else: + return logits_list diff --git a/paddlers/models/ppseg/transforms/functional.py b/paddlers/models/ppseg/transforms/functional.py index cb26b6e..ca8b475 100644 --- a/paddlers/models/ppseg/transforms/functional.py +++ b/paddlers/models/ppseg/transforms/functional.py @@ -15,7 +15,14 @@ import cv2 import numpy as np from PIL import Image, ImageEnhance -from scipy.ndimage.morphology import distance_transform_edt +from scipy.ndimage import distance_transform_edt + + +def rescale_size(img_size, target_size): + scale = min( + max(target_size) / max(img_size), min(target_size) / min(img_size)) + rescaled_size = [round(i * scale) for i in img_size] + return rescaled_size, scale def normalize(im, mean, std): diff --git a/paddlers/models/ppseg/transforms/transforms.py b/paddlers/models/ppseg/transforms/transforms.py index 5f2b640..f73186d 100644 --- a/paddlers/models/ppseg/transforms/transforms.py +++ b/paddlers/models/ppseg/transforms/transforms.py @@ -19,8 +19,9 @@ import cv2 import numpy as np from PIL import Image -from paddlers.models.ppseg.cvlibs import manager -from paddlers.models.ppseg.transforms import functional +from paddleseg.cvlibs import manager +from paddleseg.transforms import functional +from paddleseg.utils import logger @manager.TRANSFORMS.add_component @@ -32,43 +33,63 @@ class Compose: Args: transforms (list): A list contains data pre-processing or augmentation. Empty list means only reading images, no transformation. to_rgb (bool, optional): If converting image to RGB color space. Default: True. + img_channels (int, optional): The image channels used to check the loaded image. Default: 3. Raises: TypeError: When 'transforms' is not a list. ValueError: when the length of 'transforms' is less than 1. """ - def __init__(self, transforms, to_rgb=True): + def __init__(self, transforms, to_rgb=True, img_channels=3): if not isinstance(transforms, list): raise TypeError('The transforms must be a list!') self.transforms = transforms self.to_rgb = to_rgb + self.img_channels = img_channels + self.read_flag = cv2.IMREAD_GRAYSCALE if img_channels == 1 else cv2.IMREAD_COLOR - def __call__(self, im, label=None): + def __call__(self, data): """ Args: - im (str|np.ndarray): It is either image path or image object. - label (str|np.ndarray): It is either label path or label ndarray. + data: A dict to deal with. It may include keys: 'img', 'label', 'trans_info' and 'gt_fields'. + 'trans_info' reserve the image shape informating. And the 'gt_fields' save the key need to transforms + together with 'img' - Returns: - (tuple). A tuple including image, image info, and label after transformation. + Returns: A dict after process。 """ - if isinstance(im, str): - im = cv2.imread(im).astype('float32') - if isinstance(label, str): - label = np.asarray(Image.open(label)) - if im is None: - raise ValueError('Can\'t read The image file {}!'.format(im)) - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + if 'img' not in data.keys(): + raise ValueError("`data` must include `img` key.") + if isinstance(data['img'], str): + data['img'] = cv2.imread(data['img'], + self.read_flag).astype('float32') + if data['img'] is None: + raise ValueError('Can\'t read The image file {}!'.format(data[ + 'img'])) + if not isinstance(data['img'], np.ndarray): + raise TypeError("Image type is not numpy.") + + img_channels = 1 if data['img'].ndim == 2 else data['img'].shape[2] + if img_channels != self.img_channels: + raise ValueError( + 'The img_channels ({}) is not equal to the channel of loaded image ({})'. + format(self.img_channels, img_channels)) + if self.to_rgb and img_channels == 3: + data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB) + + if 'label' in data.keys() and isinstance(data['label'], str): + data['label'] = np.asarray(Image.open(data['label'])) + + # the `trans_info` will save the process of image shape, and will be used in evaluation and prediction. + if 'trans_info' not in data.keys(): + data['trans_info'] = [] for op in self.transforms: - outputs = op(im, label) - im = outputs[0] - if len(outputs) == 2: - label = outputs[1] - im = np.transpose(im, (2, 0, 1)) - return (im, label) + data = op(data) + + if data['img'].ndim == 2: + data['img'] = data['img'][..., np.newaxis] + data['img'] = np.transpose(data['img'], (2, 0, 1)) + return data @manager.TRANSFORMS.add_component @@ -83,15 +104,12 @@ class RandomHorizontalFlip: def __init__(self, prob=0.5): self.prob = prob - def __call__(self, im, label=None): + def __call__(self, data): if random.random() < self.prob: - im = functional.horizontal_flip(im) - if label is not None: - label = functional.horizontal_flip(label) - if label is None: - return (im, ) - else: - return (im, label) + data['img'] = functional.horizontal_flip(data['img']) + for key in data.get('gt_fields', []): + data[key] = functional.horizontal_flip(data[key]) + return data @manager.TRANSFORMS.add_component @@ -106,15 +124,12 @@ class RandomVerticalFlip: def __init__(self, prob=0.1): self.prob = prob - def __call__(self, im, label=None): + def __call__(self, data): if random.random() < self.prob: - im = functional.vertical_flip(im) - if label is not None: - label = functional.vertical_flip(label) - if label is None: - return (im, ) - else: - return (im, label) + data['img'] = functional.vertical_flip(data['img']) + for key in data.get('gt_fields', []): + data[key] = functional.vertical_flip(data[key]) + return data @manager.TRANSFORMS.add_component @@ -123,7 +138,11 @@ class Resize: Resize an image. Args: - target_size (list|tuple, optional): The target size of image. Default: (512, 512). + target_size (list|tuple, optional): The target size (w, h) of image. Default: (512, 512). + keep_ratio (bool, optional): Whether to keep the same ratio for width and height in resizing. + Default: False. + size_divisor (int, optional): If size_divisor is not None, make the width and height be the times + of size_divisor. Default: None. interp (str, optional): The interpolation mode of resize is consistent with opencv. ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']. Note that when it is 'RANDOM', a random interpolation mode would be specified. Default: "LINEAR". @@ -143,11 +162,11 @@ class Resize: 'LANCZOS4': cv2.INTER_LANCZOS4 } - def __init__(self, target_size=(512, 512), interp='LINEAR'): - self.interp = interp - if not (interp == "RANDOM" or interp in self.interp_dict): - raise ValueError("`interp` should be one of {}".format( - self.interp_dict.keys())) + def __init__(self, + target_size=(512, 512), + keep_ratio=False, + size_divisor=None, + interp='LINEAR'): if isinstance(target_size, list) or isinstance(target_size, tuple): if len(target_size) != 2: raise ValueError( @@ -157,40 +176,42 @@ class Resize: raise TypeError( "Type of `target_size` is invalid. It should be list or tuple, but it is {}" .format(type(target_size))) + if not (interp == "RANDOM" or interp in self.interp_dict): + raise ValueError("`interp` should be one of {}".format( + self.interp_dict.keys())) + if size_divisor is not None: + assert isinstance(size_divisor, + int), "size_divisor should be None or int" self.target_size = target_size + self.keep_ratio = keep_ratio + self.size_divisor = size_divisor + self.interp = interp - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label), - - Raises: - TypeError: When the 'img' type is not numpy. - ValueError: When the length of "im" shape is not 3. - """ - - if not isinstance(im, np.ndarray): - raise TypeError("Resize: image type is not numpy.") - if len(im.shape) != 3: - raise ValueError('Resize: image is not 3-dimensional.') + def __call__(self, data): + data['trans_info'].append(('resize', data['img'].shape[0:2])) if self.interp == "RANDOM": interp = random.choice(list(self.interp_dict.keys())) else: interp = self.interp - im = functional.resize(im, self.target_size, self.interp_dict[interp]) - if label is not None: - label = functional.resize(label, self.target_size, - cv2.INTER_NEAREST) - if label is None: - return (im, ) - else: - return (im, label) + target_size = self.target_size + if self.keep_ratio: + h, w = data['img'].shape[0:2] + target_size, _ = functional.rescale_size((w, h), self.target_size) + if self.size_divisor: + target_size = [ + math.ceil(i / self.size_divisor) * self.size_divisor + for i in target_size + ] + + data['img'] = functional.resize(data['img'], target_size, + self.interp_dict[interp]) + for key in data.get('gt_fields', []): + data[key] = functional.resize(data[key], target_size, + cv2.INTER_NEAREST) + + return data @manager.TRANSFORMS.add_component @@ -205,25 +226,14 @@ class ResizeByLong: def __init__(self, long_size): self.long_size = long_size - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ - - im = functional.resize_long(im, self.long_size) - if label is not None: - label = functional.resize_long(label, self.long_size, - cv2.INTER_NEAREST) + def __call__(self, data): + data['trans_info'].append(('resize', data['img'].shape[0:2])) + data['img'] = functional.resize_long(data['img'], self.long_size) + for key in data.get('gt_fields', []): + data[key] = functional.resize_long(data[key], self.long_size, + cv2.INTER_NEAREST) - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -238,25 +248,14 @@ class ResizeByShort: def __init__(self, short_size): self.short_size = short_size - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): + data['trans_info'].append(('resize', data['img'].shape[0:2])) + data['img'] = functional.resize_short(data['img'], self.short_size) + for key in data.get('gt_fields', []): + data[key] = functional.resize_short(data[key], self.short_size, + cv2.INTER_NEAREST) - im = functional.resize_short(im, self.short_size) - if label is not None: - label = functional.resize_short(label, self.short_size, - cv2.INTER_NEAREST) - - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -296,16 +295,10 @@ class LimitLong: self.max_long = max_long self.min_long = min_long - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. + def __call__(self, data): + data['trans_info'].append(('resize', data['img'].shape[0:2])) - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ - h, w = im.shape[0], im.shape[1] + h, w = data['img'].shape[0], data['img'].shape[1] long_edge = max(h, w) target = long_edge if (self.max_long is not None) and (long_edge > self.max_long): @@ -314,14 +307,12 @@ class LimitLong: target = self.min_long if target != long_edge: - im = functional.resize_long(im, target) - if label is not None: - label = functional.resize_long(label, target, cv2.INTER_NEAREST) + data['img'] = functional.resize_long(data['img'], target) + for key in data.get('gt_fields', []): + data[key] = functional.resize_long(data[key], target, + cv2.INTER_NEAREST) - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -342,30 +333,20 @@ class ResizeRangeScaling: self.min_value = min_value self.max_value = max_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if self.min_value == self.max_value: random_size = self.max_value else: random_size = int( np.random.uniform(self.min_value, self.max_value) + 0.5) - im = functional.resize_long(im, random_size, cv2.INTER_LINEAR) - if label is not None: - label = functional.resize_long(label, random_size, - cv2.INTER_NEAREST) + data['img'] = functional.resize_long(data['img'], random_size, + cv2.INTER_LINEAR) + for key in data.get('gt_fields', []): + data[key] = functional.resize_long(data[key], random_size, + cv2.INTER_NEAREST) - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -395,15 +376,7 @@ class ResizeStepScaling: self.max_scale_factor = max_scale_factor self.scale_step_size = scale_step_size - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if self.min_scale_factor == self.max_scale_factor: scale_factor = self.min_scale_factor @@ -420,17 +393,14 @@ class ResizeStepScaling: num_steps).tolist() np.random.shuffle(scale_factors) scale_factor = scale_factors[0] - w = int(round(scale_factor * im.shape[1])) - h = int(round(scale_factor * im.shape[0])) + w = int(round(scale_factor * data['img'].shape[1])) + h = int(round(scale_factor * data['img'].shape[0])) - im = functional.resize(im, (w, h), cv2.INTER_LINEAR) - if label is not None: - label = functional.resize(label, (w, h), cv2.INTER_NEAREST) + data['img'] = functional.resize(data['img'], (w, h), cv2.INTER_LINEAR) + for key in data.get('gt_fields', []): + data[key] = functional.resize(data[key], (w, h), cv2.INTER_NEAREST) - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -439,55 +409,40 @@ class Normalize: Normalize an image. Args: - mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5]. - std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5]. + mean (list, optional): The mean value of a data set. Default: [0.5,]. + std (list, optional): The standard deviation of a data set. Default: [0.5,]. Raises: ValueError: When mean/std is not list or any value in std is 0. """ - def __init__(self, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)): - self.mean = mean - self.std = std - if not (isinstance(self.mean, - (list, tuple)) and isinstance(self.std, - (list, tuple))): + def __init__(self, mean=(0.5, ), std=(0.5, )): + if not (isinstance(mean, (list, tuple)) and isinstance(std, (list, tuple))) \ + and (len(mean) not in [1, 3]) and (len(std) not in [1, 3]): raise ValueError( - "{}: input type is invalid. It should be list or tuple".format( - self)) + "{}: input type is invalid. It should be list or tuple with the lenght of 1 or 3". + format(self)) + self.mean = np.array(mean) + self.std = np.array(std) + from functools import reduce if reduce(lambda x, y: x * y, self.std) == 0: raise ValueError('{}: std is invalid!'.format(self)) - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ - - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - im = functional.normalize(im, mean, std) - - if label is None: - return (im, ) - else: - return (im, label) + def __call__(self, data): + data['img'] = functional.normalize(data['img'], self.mean, self.std) + return data @manager.TRANSFORMS.add_component -class Pad: +class Padding: """ Add bottom-right padding to a raw image or annotation image. Args: target_size (list|tuple): The target size after padding. - im_padding_value (list, optional): The padding value of raw image. - Default: [127.5, 127.5, 127.5]. + im_padding_value (float, optional): The padding value of raw image. + Default: 127.5. label_padding_value (int, optional): The padding value of annotation image. Default: 255. Raises: @@ -497,7 +452,7 @@ class Pad: def __init__(self, target_size, - im_padding_value=(127.5, 127.5, 127.5), + im_padding_value=127.5, label_padding_value=255): if isinstance(target_size, list) or isinstance(target_size, tuple): if len(target_size) != 2: @@ -512,17 +467,9 @@ class Pad: self.im_padding_value = im_padding_value self.label_padding_value = label_padding_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ - - im_height, im_width = im.shape[0], im.shape[1] + def __call__(self, data): + data['trans_info'].append(('padding', data['img'].shape[0:2])) + im_height, im_width = data['img'].shape[0], data['img'].shape[1] if isinstance(self.target_size, int): target_height = self.target_size target_width = self.target_size @@ -536,83 +483,72 @@ class Pad: 'The size of image should be less than `target_size`, but the size of image ({}, {}) is larger than `target_size` ({}, {})' .format(im_width, im_height, target_width, target_height)) else: - im = cv2.copyMakeBorder( - im, + img_channels = 1 if data['img'].ndim == 2 else data['img'].shape[2] + data['img'] = cv2.copyMakeBorder( + data['img'], 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, - value=self.im_padding_value) - if label is not None: - label = cv2.copyMakeBorder( - label, + value=(self.im_padding_value, ) * img_channels) + for key in data.get('gt_fields', []): + data[key] = cv2.copyMakeBorder( + data[key], 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, value=self.label_padding_value) - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component -class PadByAspectRatio: +class PaddingByAspectRatio: """ Args: aspect_ratio (int|float, optional): The aspect ratio = width / height. Default: 1. + im_padding_value (float, optional): The padding value of raw image. Default: 127.5. + label_padding_value (int, optional): The padding value of annotation image. Default: 255. """ def __init__(self, aspect_ratio=1, - im_padding_value=(127.5, 127.5, 127.5), + im_padding_value=127.5, label_padding_value=255): self.aspect_ratio = aspect_ratio self.im_padding_value = im_padding_value self.label_padding_value = label_padding_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): - img_height = im.shape[0] - img_width = im.shape[1] + img_height = data['img'].shape[0] + img_width = data['img'].shape[1] ratio = img_width / img_height if ratio == self.aspect_ratio: - if label is None: - return (im, ) - else: - return (im, label) + return data elif ratio > self.aspect_ratio: img_height = int(img_width / self.aspect_ratio) else: img_width = int(img_height * self.aspect_ratio) - padding = Pad((img_width, img_height), - im_padding_value=self.im_padding_value, - label_padding_value=self.label_padding_value) - return padding(im, label) + padding = Padding( + (img_width, img_height), + im_padding_value=self.im_padding_value, + label_padding_value=self.label_padding_value) + return padding(data) @manager.TRANSFORMS.add_component -class RandomPadCrop: +class RandomPaddingCrop: """ Crop a sub-image from a raw image and annotation image randomly. If the target cropping size is larger than original image, then the bottom-right padding will be added. Args: crop_size (tuple, optional): The target cropping size. Default: (512, 512). - im_padding_value (list, optional): The padding value of raw image. - Default: [127.5, 127.5, 127.5]. + im_padding_value (float, optional): The padding value of raw image. Default: 127.5. label_padding_value (int, optional): The padding value of annotation image. Default: 255. Raises: @@ -622,7 +558,7 @@ class RandomPadCrop: def __init__(self, crop_size=(512, 512), - im_padding_value=(127.5, 127.5, 127.5), + im_padding_value=127.5, label_padding_value=255): if isinstance(crop_size, list) or isinstance(crop_size, tuple): if len(crop_size) != 2: @@ -637,15 +573,7 @@ class RandomPadCrop: self.im_padding_value = im_padding_value self.label_padding_value = label_padding_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if isinstance(self.crop_size, int): crop_width = self.crop_size @@ -654,51 +582,50 @@ class RandomPadCrop: crop_width = self.crop_size[0] crop_height = self.crop_size[1] - img_height = im.shape[0] - img_width = im.shape[1] + img_height = data['img'].shape[0] + img_width = data['img'].shape[1] if img_height == crop_height and img_width == crop_width: - if label is None: - return (im, ) - else: - return (im, label) + return data else: pad_height = max(crop_height - img_height, 0) pad_width = max(crop_width - img_width, 0) + img_channels = 1 if data['img'].ndim == 2 else data['img'].shape[2] if (pad_height > 0 or pad_width > 0): - im = cv2.copyMakeBorder( - im, + data['img'] = cv2.copyMakeBorder( + data['img'], 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, - value=self.im_padding_value) - if label is not None: - label = cv2.copyMakeBorder( - label, + value=(self.im_padding_value, ) * img_channels) + for key in data.get('gt_fields', []): + data[key] = cv2.copyMakeBorder( + data[key], 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, value=self.label_padding_value) - img_height = im.shape[0] - img_width = im.shape[1] + img_height = data['img'].shape[0] + img_width = data['img'].shape[1] if crop_height > 0 and crop_width > 0: h_off = np.random.randint(img_height - crop_height + 1) w_off = np.random.randint(img_width - crop_width + 1) - im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width - ), :] - if label is not None: - label = label[h_off:(crop_height + h_off), w_off:( + if data['img'].ndim == 2: + data['img'] = data['img'][h_off:(crop_height + h_off), + w_off:(w_off + crop_width)] + else: + data['img'] = data['img'][h_off:(crop_height + h_off), + w_off:(w_off + crop_width), :] + for key in data.get('gt_fields', []): + data[key] = data[key][h_off:(crop_height + h_off), w_off:( w_off + crop_width)] - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -718,8 +645,8 @@ class RandomCenterCrop: if isinstance(retain_ratio, list) or isinstance(retain_ratio, tuple): if len(retain_ratio) != 2: raise ValueError( - 'When type of `retain_ratio` is list or tuple, it shoule include 2 elements, but it is {}'. - format(retain_ratio)) + 'When type of `retain_ratio` is list or tuple, it shoule include 2 elements, but it is {}' + .format(retain_ratio)) if retain_ratio[0] > 1 or retain_ratio[1] > 1 or retain_ratio[ 0] < 0 or retain_ratio[1] < 0: raise ValueError( @@ -731,51 +658,40 @@ class RandomCenterCrop: .format(type(retain_ratio))) self.retain_ratio = retain_ratio - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): retain_width = self.retain_ratio[0] retain_height = self.retain_ratio[1] - img_height = im.shape[0] - img_width = im.shape[1] + img_height = data['img'].shape[0] + img_width = data['img'].shape[1] if retain_width == 1. and retain_height == 1.: - if label is None: - return (im, ) - else: - return (im, label) + return data else: randw = np.random.randint(img_width * (1 - retain_width)) randh = np.random.randint(img_height * (1 - retain_height)) offsetw = 0 if randw == 0 else np.random.randint(randw) offseth = 0 if randh == 0 else np.random.randint(randh) p0, p1, p2, p3 = offseth, img_height + offseth - randh, offsetw, img_width + offsetw - randw - im = im[p0:p1, p2:p3, :] - if label is not None: - label = label[p0:p1, p2:p3, :] + if data['img'].ndim == 2: + data['img'] = data['img'][p0:p1, p2:p3] + else: + data['img'] = data['img'][p0:p1, p2:p3, :] + for key in data.get('gt_fields', []): + data[key] = data[key][p0:p1, p2:p3] - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component -class ScalePad: +class ScalePadding: """ Add center padding to a raw image or annotation image,then scale the image to target size. Args: target_size (list|tuple, optional): The target size of image. Default: (512, 512). - im_padding_value (list, optional): The padding value of raw image. - Default: [127.5, 127.5, 127.5]. + im_padding_value (float, optional): The padding value of raw image. Default: 127.5 label_padding_value (int, optional): The padding value of annotation image. Default: 255. Raises: @@ -785,7 +701,7 @@ class ScalePad: def __init__(self, target_size=(512, 512), - im_padding_value=(127.5, 127.5, 127.5), + im_padding_value=127.5, label_padding_value=255): if isinstance(target_size, list) or isinstance(target_size, tuple): if len(target_size) != 2: @@ -801,45 +717,43 @@ class ScalePad: self.im_padding_value = im_padding_value self.label_padding_value = label_padding_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ - height = im.shape[0] - width = im.shape[1] + def __call__(self, data): + height = data['img'].shape[0] + width = data['img'].shape[1] - new_im = np.zeros( - (max(height, width), max(height, width), 3)) + self.im_padding_value - if label is not None: + img_channels = 1 if data['img'].ndim == 2 else data['img'].shape[2] + if data['img'].ndim == 2: + new_im = np.zeros((max(height, width), max(height, width) + )) + self.im_padding_value + else: + new_im = np.zeros((max(height, width), max(height, width), + img_channels)) + self.im_padding_value + if 'label' in data['gt_fields']: new_label = np.zeros((max(height, width), max(height, width) )) + self.label_padding_value if height > width: padding = int((height - width) / 2) - new_im[:, padding:padding + width, :] = im - if label is not None: - new_label[:, padding:padding + width] = label + if data['img'].ndim == 2: + new_im[:, padding:padding + width] = data['img'] + else: + new_im[:, padding:padding + width, :] = data['img'] + if 'label' in data['gt_fields']: + new_label[:, padding:padding + width] = data['label'] else: padding = int((width - height) / 2) - new_im[padding:padding + height, :, :] = im - if label is not None: - new_label[padding:padding + height, :] = label - - im = np.uint8(new_im) - im = functional.resize(im, self.target_size, interp=cv2.INTER_CUBIC) - if label is not None: - label = np.uint8(new_label) - label = functional.resize( - label, self.target_size, interp=cv2.INTER_CUBIC) - if label is None: - return (im, ) - else: - return (im, label) + new_im[padding:padding + height, :] = data['img'] + if 'label' in data['gt_fields']: + new_label[padding:padding + height, :] = data['label'] + + data['img'] = np.uint8(new_im) + data['img'] = functional.resize( + data['img'], self.target_size, interp=cv2.INTER_CUBIC) + if 'label' in data['gt_fields']: + data['label'] = np.uint8(new_label) + data['label'] = functional.resize( + data['label'], self.target_size, interp=cv2.INTER_CUBIC) + return data @manager.TRANSFORMS.add_component @@ -857,27 +771,16 @@ class RandomNoise: self.prob = prob self.max_sigma = max_sigma - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if random.random() < self.prob: mu = 0 sigma = random.random() * self.max_sigma - im = np.array(im, dtype=np.float32) - im += np.random.normal(mu, sigma, im.shape) - im[im > 255] = 255 - im[im < 0] = 0 + data['img'] = np.array(data['img'], dtype=np.float32) + data['img'] += np.random.normal(mu, sigma, data['img'].shape) + data['img'][data['img'] > 255] = 255 + data['img'][data['img'] < 0] = 0 - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -899,15 +802,7 @@ class RandomBlur: self.prob = prob self.blur_type = blur_type - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if self.prob <= 0: n = 0 @@ -922,28 +817,28 @@ class RandomBlur: radius = radius + 1 if radius > 9: radius = 9 - im = np.array(im, dtype='uint8') + data['img'] = np.array(data['img'], dtype='uint8') if self.blur_type == "gaussian": - im = cv2.GaussianBlur(im, (radius, radius), 0, 0) + data['img'] = cv2.GaussianBlur(data['img'], + (radius, radius), 0, 0) elif self.blur_type == "median": - im = cv2.medianBlur(im, radius) + data['img'] = cv2.medianBlur(data['img'], radius) elif self.blur_type == "blur": - im = cv2.blur(im, (radius, radius)) + data['img'] = cv2.blur(data['img'], (radius, radius)) elif self.blur_type == "random": select = random.random() if select < 0.3: - im = cv2.GaussianBlur(im, (radius, radius), 0) + data['img'] = cv2.GaussianBlur(data['img'], + (radius, radius), 0) elif select < 0.6: - im = cv2.medianBlur(im, radius) + data['img'] = cv2.medianBlur(data['img'], radius) else: - im = cv2.blur(im, (radius, radius)) + data['img'] = cv2.blur(data['img'], (radius, radius)) else: - im = cv2.GaussianBlur(im, (radius, radius), 0, 0) - im = np.array(im, dtype='float32') - if label is None: - return (im, ) - else: - return (im, label) + data['img'] = cv2.GaussianBlur(data['img'], + (radius, radius), 0, 0) + data['img'] = np.array(data['img'], dtype='float32') + return data @manager.TRANSFORMS.add_component @@ -953,31 +848,23 @@ class RandomRotation: Args: max_rotation (float, optional): The maximum rotation degree. Default: 15. - im_padding_value (list, optional): The padding value of raw image. - Default: [127.5, 127.5, 127.5]. + im_padding_value (float, optional): The padding value of raw image. Default: 127.5. label_padding_value (int, optional): The padding value of annotation image. Default: 255. """ def __init__(self, max_rotation=15, - im_padding_value=(127.5, 127.5, 127.5), + im_padding_value=127.5, label_padding_value=255): self.max_rotation = max_rotation self.im_padding_value = im_padding_value self.label_padding_value = label_padding_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if self.max_rotation > 0: - (h, w) = im.shape[:2] + (h, w) = data['img'].shape[:2] + img_channels = 1 if data['img'].ndim == 2 else data['img'].shape[2] do_rotation = np.random.uniform(-self.max_rotation, self.max_rotation) pc = (w // 2, h // 2) @@ -992,26 +879,23 @@ class RandomRotation: r[0, 2] += (nw / 2) - cx r[1, 2] += (nh / 2) - cy dsize = (nw, nh) - im = cv2.warpAffine( - im, + data['img'] = cv2.warpAffine( + data['img'], r, dsize=dsize, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, - borderValue=self.im_padding_value) - if label is not None: - label = cv2.warpAffine( - label, + borderValue=(self.im_padding_value, ) * img_channels) + for key in data.get('gt_fields', []): + data[key] = cv2.warpAffine( + data[key], r, dsize=dsize, flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=self.label_padding_value) - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -1029,19 +913,11 @@ class RandomScaleAspect: self.min_scale = min_scale self.aspect_ratio = aspect_ratio - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): if self.min_scale != 0 and self.aspect_ratio != 0: - img_height = im.shape[0] - img_width = im.shape[1] + img_height = data['img'].shape[0] + img_width = data['img'].shape[1] for i in range(0, 10): area = img_height * img_width target_area = area * np.random.uniform(self.min_scale, 1.0) @@ -1059,20 +935,20 @@ class RandomScaleAspect: h1 = np.random.randint(0, img_height - dh) w1 = np.random.randint(0, img_width - dw) - im = im[h1:(h1 + dh), w1:(w1 + dw), :] - im = cv2.resize( - im, (img_width, img_height), + if data['img'].ndim == 2: + data['img'] = data['img'][h1:(h1 + dh), w1:(w1 + dw)] + else: + data['img'] = data['img'][h1:(h1 + dh), w1:(w1 + dw), :] + data['img'] = cv2.resize( + data['img'], (img_width, img_height), interpolation=cv2.INTER_LINEAR) - if label is not None: - label = label[h1:(h1 + dh), w1:(w1 + dw)] - label = cv2.resize( - label, (img_width, img_height), + for key in data.get('gt_fields', []): + data[key] = data[key][h1:(h1 + dh), w1:(w1 + dw)] + data[key] = cv2.resize( + data[key], (img_width, img_height), interpolation=cv2.INTER_NEAREST) break - if label is None: - return (im, ) - else: - return (im, label) + return data @manager.TRANSFORMS.add_component @@ -1115,15 +991,7 @@ class RandomDistort: self.sharpness_range = sharpness_range self.sharpness_prob = sharpness_prob - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): brightness_lower = 1 - self.brightness_range brightness_upper = 1 + self.brightness_range @@ -1137,8 +1005,10 @@ class RandomDistort: sharpness_upper = 1 + self.sharpness_range ops = [ functional.brightness, functional.contrast, functional.saturation, - functional.hue, functional.sharpness + functional.sharpness ] + if data['img'].ndim > 2: + ops.append(functional.hue) random.shuffle(ops) params_dict = { 'brightness': { @@ -1169,19 +1039,16 @@ class RandomDistort: 'hue': self.hue_prob, 'sharpness': self.sharpness_prob } - im = im.astype('uint8') - im = Image.fromarray(im) + data['img'] = data['img'].astype('uint8') + data['img'] = Image.fromarray(data['img']) for id in range(len(ops)): params = params_dict[ops[id].__name__] prob = prob_dict[ops[id].__name__] - params['im'] = im + params['im'] = data['img'] if np.random.uniform(0, 1) < prob: - im = ops[id](**params) - im = np.asarray(im).astype('float32') - if label is None: - return (im, ) - else: - return (im, label) + data['img'] = ops[id](**params) + data['img'] = np.asarray(data['img']).astype('float32') + return data @manager.TRANSFORMS.add_component @@ -1195,7 +1062,7 @@ class RandomAffine: max_rotation (float, optional): The maximum rotation degree. Default: 15. min_scale_factor (float, optional): The minimum scale. Default: 0.75. max_scale_factor (float, optional): The maximum scale. Default: 1.25. - im_padding_value (float, optional): The padding value of raw image. Default: (128, 128, 128). + im_padding_value (float, optional): The padding value of raw image. Default: 128. label_padding_value (int, optional): The padding value of annotation image. Default: (255, 255, 255). """ @@ -1205,8 +1072,8 @@ class RandomAffine: max_rotation=15, min_scale_factor=0.75, max_scale_factor=1.25, - im_padding_value=(128, 128, 128), - label_padding_value=(255, 255, 255)): + im_padding_value=128, + label_padding_value=255): self.size = size self.translation_offset = translation_offset self.max_rotation = max_rotation @@ -1215,18 +1082,10 @@ class RandomAffine: self.im_padding_value = im_padding_value self.label_padding_value = label_padding_value - def __call__(self, im, label=None): - """ - Args: - im (np.ndarray): The Image data. - label (np.ndarray, optional): The label data. Default: None. - - Returns: - (tuple). When label is None, it returns (im, ), otherwise it returns (im, label). - """ + def __call__(self, data): w, h = self.size - bbox = [0, 0, im.shape[1] - 1, im.shape[0] - 1] + bbox = [0, 0, data['img'].shape[1] - 1, data['img'].shape[0] - 1] x_offset = (random.random() - 0.5) * 2 * self.translation_offset y_offset = (random.random() - 0.5) * 2 * self.translation_offset dx = (w - (bbox[2] + bbox[0])) / 2.0 @@ -1250,21 +1109,20 @@ class RandomAffine: [0, 0, 1.0]]) matrix = matrix.dot(matrix_trans)[0:2, :] - im = cv2.warpAffine( - np.uint8(im), + img_channels = 1 if data['img'].ndim == 2 else data['img'].shape[2] + data['img'] = cv2.warpAffine( + np.uint8(data['img']), matrix, tuple(self.size), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, - borderValue=self.im_padding_value) - if label is not None: - label = cv2.warpAffine( - np.uint8(label), + borderValue=(self.im_padding_value, ) * img_channels) + for key in data.get('gt_fields', []): + data[key] = cv2.warpAffine( + np.uint8(data[key]), matrix, tuple(self.size), flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT) - if label is None: - return (im, ) - else: - return (im, label) + borderMode=cv2.BORDER_CONSTANT, + borderValue=self.label_padding_value) + return data diff --git a/paddlers/models/ppseg/utils/__init__.py b/paddlers/models/ppseg/utils/__init__.py index 63c7894..dc01765 100644 --- a/paddlers/models/ppseg/utils/__init__.py +++ b/paddlers/models/ppseg/utils/__init__.py @@ -19,5 +19,4 @@ from .env import seg_env, get_sys_env from .utils import * from .timer import TimeAverager, calculate_eta from . import visualize -from .config_check import config_check from .ema import EMA diff --git a/paddlers/models/ppseg/utils/config_check.py b/paddlers/models/ppseg/utils/config_check.py deleted file mode 100644 index 47a7049..0000000 --- a/paddlers/models/ppseg/utils/config_check.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - - -def config_check(cfg, train_dataset=None, val_dataset=None): - """ - To check config。 - - Args: - cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config. - train_dataset (paddle.io.Dataset): Used to read and process training datasets. - val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. - """ - - num_classes_check(cfg, train_dataset, val_dataset) - - -def num_classes_check(cfg, train_dataset, val_dataset): - """" - Check that the num_classes in model, train_dataset and val_dataset is consistent. - """ - num_classes_set = set() - if train_dataset and hasattr(train_dataset, 'num_classes'): - num_classes_set.add(train_dataset.num_classes) - if val_dataset and hasattr(val_dataset, 'num_classes'): - num_classes_set.add(val_dataset.num_classes) - if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None): - num_classes_set.add(cfg.dic['model'].get('num_classes')) - if (not cfg.train_dataset) and (not cfg.val_dataset): - raise ValueError( - 'One of `train_dataset` or `val_dataset should be given, but there are none.' - ) - if len(num_classes_set) == 0: - raise ValueError( - '`num_classes` is not found. Please set it in model, train_dataset or val_dataset' - ) - elif len(num_classes_set) > 1: - raise ValueError( - '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset' - .format(num_classes_set)) - else: - num_classes = num_classes_set.pop() - if train_dataset: - train_dataset.num_classes = num_classes - if val_dataset: - val_dataset.num_classes = num_classes diff --git a/paddlers/models/ppseg/utils/env/__init__.py b/paddlers/models/ppseg/utils/env/__init__.py index 9518a96..7af6e06 100644 --- a/paddlers/models/ppseg/utils/env/__init__.py +++ b/paddlers/models/ppseg/utils/env/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. diff --git a/paddlers/models/ppseg/utils/env/seg_env.py b/paddlers/models/ppseg/utils/env/seg_env.py index 791987e..cf11dbd 100644 --- a/paddlers/models/ppseg/utils/env/seg_env.py +++ b/paddlers/models/ppseg/utils/env/seg_env.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ PRETRAINED_MODEL_HOME : The directory to store the automatically downloaded pret import os -from paddlers.models.ppseg.utils import logger +from paddleseg.utils import logger def _get_user_home(): diff --git a/paddlers/models/ppseg/utils/env/sys_env.py b/paddlers/models/ppseg/utils/env/sys_env.py index 921feb9..8681509 100644 --- a/paddlers/models/ppseg/utils/env/sys_env.py +++ b/paddlers/models/ppseg/utils/env/sys_env.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ import sys import cv2 import paddle +import paddleseg IS_WINDOWS = sys.platform == 'win32' @@ -57,8 +58,12 @@ def _get_nvcc_info(cuda_home): if cuda_home is not None and os.path.isdir(cuda_home): try: nvcc = os.path.join(cuda_home, 'bin/nvcc') - nvcc = subprocess.check_output( - "{} -V".format(nvcc), shell=True).decode() + if not IS_WINDOWS: + nvcc = subprocess.check_output( + "{} -V".format(nvcc), shell=True).decode() + else: + nvcc = subprocess.check_output( + "\"{}\" -V".format(nvcc), shell=True).decode() nvcc = nvcc.strip().split('\n')[-1] except subprocess.SubprocessError: nvcc = "Not Available" @@ -116,6 +121,7 @@ def get_sys_env(): except: pass + env_info['PaddleSeg'] = paddleseg.__version__ env_info['PaddlePaddle'] = paddle.__version__ env_info['OpenCV'] = cv2.__version__ diff --git a/paddlers/models/ppseg/utils/metrics.py b/paddlers/models/ppseg/utils/metrics.py index d457a08..ca765e5 100644 --- a/paddlers/models/ppseg/utils/metrics.py +++ b/paddlers/models/ppseg/utils/metrics.py @@ -135,37 +135,6 @@ def mean_iou(intersect_area, pred_area, label_area): return np.array(class_iou), miou -def fwiou(intersect_area, pred_area, label_area): - """ - Calculate iou. - - Args: - intersect_area (Tensor): The intersection area of prediction and ground truth on all classes. - pred_area (Tensor): The prediction area on all classes. - label_area (Tensor): The ground truth area on all classes. - - Returns: - np.ndarray: iou on all classes. - float: Frequency Weighted iou of all classes. - np.ndarray: Frequency of all classes. - """ - intersect_area = intersect_area.numpy() - pred_area = pred_area.numpy() - label_area = label_area.numpy() - union = pred_area + label_area - intersect_area - class_iou = [] - for i in range(len(intersect_area)): - if union[i] == 0: - iou = 0 - else: - iou = intersect_area[i] / union[i] - class_iou.append(iou) - fw = label_area / np.sum(label_area) - fwious = np.array(fw) * np.array(class_iou) - fwiou = np.sum(fwious) - return np.array(class_iou), fwiou, fw - - def dice(intersect_area, pred_area, label_area): """ Calculate DICE. @@ -194,6 +163,7 @@ def dice(intersect_area, pred_area, label_area): return np.array(class_dice), mdice +# This is a deprecated function, please use class_measurement function. def accuracy(intersect_area, pred_area): """ Calculate accuracy @@ -219,6 +189,38 @@ def accuracy(intersect_area, pred_area): return np.array(class_acc), macc +def class_measurement(intersect_area, pred_area, label_area): + """ + Calculate accuracy, calss precision and class recall. + + Args: + intersect_area (Tensor): The intersection area of prediction and ground truth on all classes. + pred_area (Tensor): The prediction area on all classes. + label_area (Tensor): The ground truth area on all classes. + + Returns: + float: The mean accuracy. + np.ndarray: The precision of all classes. + np.ndarray: The recall of all classes. + """ + intersect_area = intersect_area.numpy() + pred_area = pred_area.numpy() + label_area = label_area.numpy() + + mean_acc = np.sum(intersect_area) / np.sum(pred_area) + class_precision = [] + class_recall = [] + for i in range(len(intersect_area)): + precision = 0 if pred_area[i] == 0 \ + else intersect_area[i] / pred_area[i] + recall = 0 if label_area[i] == 0 \ + else intersect_area[i] / label_area[i] + class_precision.append(precision) + class_recall.append(recall) + + return mean_acc, np.array(class_precision), np.array(class_recall) + + def kappa(intersect_area, pred_area, label_area): """ Calculate kappa coefficient @@ -231,9 +233,9 @@ def kappa(intersect_area, pred_area, label_area): Returns: float: kappa coefficient. """ - intersect_area = intersect_area.numpy() - pred_area = pred_area.numpy() - label_area = label_area.numpy() + intersect_area = intersect_area.numpy().astype(np.float64) + pred_area = pred_area.numpy().astype(np.float64) + label_area = label_area.numpy().astype(np.float64) total_area = np.sum(label_area) po = np.sum(intersect_area) / total_area pe = np.sum(pred_area * label_area) / (total_area * total_area) diff --git a/paddlers/models/ppseg/utils/train_profiler.py b/paddlers/models/ppseg/utils/train_profiler.py index 2eedb2e..4b4d53b 100644 --- a/paddlers/models/ppseg/utils/train_profiler.py +++ b/paddlers/models/ppseg/utils/train_profiler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/paddlers/models/ppseg/utils/utils.py b/paddlers/models/ppseg/utils/utils.py index 329b399..526d04e 100644 --- a/paddlers/models/ppseg/utils/utils.py +++ b/paddlers/models/ppseg/utils/utils.py @@ -22,8 +22,8 @@ from urllib.parse import urlparse, unquote import paddle -from paddlers.models.ppseg.utils import logger, seg_env -from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddleseg.utils import logger, seg_env +from paddleseg.utils.download import download_file_and_uncompress @contextlib.contextmanager @@ -160,6 +160,8 @@ def get_image_list(image_path): for f in files: if '.ipynb_checkpoints' in root: continue + if f.startswith('.'): + continue if os.path.splitext(f)[-1] in valid_suffix: image_list.append(os.path.join(root, f)) else: diff --git a/paddlers/models/ppseg/utils/visualize.py b/paddlers/models/ppseg/utils/visualize.py index bc2947c..27211c4 100644 --- a/paddlers/models/ppseg/utils/visualize.py +++ b/paddlers/models/ppseg/utils/visualize.py @@ -63,7 +63,7 @@ def get_pseudo_color_map(pred, color_map=None): pred (numpy.ndarray): the origin predicted image. color_map (list, optional): the palette color map. Default: None, use paddleseg's default color map. - + Returns: (numpy.ndarray): the pseduo image. """ @@ -103,3 +103,41 @@ def get_color_map_list(num_classes, custom_color=None): if custom_color: color_map[:len(custom_color)] = custom_color return color_map + + +def paste_images(image_list): + """ + Paste all image to a image. + Args: + image_list (List or Tuple): The images to be pasted and their size are the same. + Returns: + result_img (PIL.Image): The pasted image. + """ + assert isinstance(image_list, + (list, tuple)), "image_list should be a list or tuple" + assert len( + image_list) > 1, "The length of image_list should be greater than 1" + + pil_img_list = [] + for img in image_list: + if isinstance(img, str): + assert os.path.exists(img), "The image is not existed: {}".format( + img) + img = PILImage.open(img) + img = np.array(img) + elif isinstance(img, np.ndarray): + img = PILImage.fromarray(img) + pil_img_list.append(img) + + sample_img = pil_img_list[0] + size = sample_img.size + for img in pil_img_list: + assert size == img.size, "The image size in image_list should be the same" + + width, height = sample_img.size + result_img = PILImage.new(sample_img.mode, + (width * len(pil_img_list), height)) + for i, img in enumerate(pil_img_list): + result_img.paste(img, box=(width * i, 0)) + + return result_img From d51b6839425ab6b74ada88b0bb0863bab05d3c40 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Mon, 12 Sep 2022 14:18:27 +0800 Subject: [PATCH 03/15] Add hash --- paddlers/models/hash.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 paddlers/models/hash.txt diff --git a/paddlers/models/hash.txt b/paddlers/models/hash.txt new file mode 100644 index 0000000..3307510 --- /dev/null +++ b/paddlers/models/hash.txt @@ -0,0 +1 @@ +ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef \ No newline at end of file From a895234700e77fbe5ea586b7286e1c27f5d115ce Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Mon, 12 Sep 2022 14:19:46 +0800 Subject: [PATCH 04/15] Add CondenseNet V2 --- .../{condensenet_v2.py => condensenetv2.py} | 884 +++++++++--------- paddlers/tasks/classifier.py | 24 +- .../condensenetv2/condensenetv2_ucmerced.yaml | 10 + .../clas/condensenetv2/train_infer_python.txt | 53 ++ test_tipc/configs/clas/hrnet/hrnet.yaml | 10 - .../configs/clas/hrnet/hrnet_ucmerced.yaml | 2 +- tests/rs_models/test_clas_models.py | 35 +- .../train/classification/condensenetv2.py | 90 ++ 8 files changed, 644 insertions(+), 464 deletions(-) rename paddlers/rs_models/clas/{condensenet_v2.py => condensenetv2.py} (95%) create mode 100644 test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml create mode 100644 test_tipc/configs/clas/condensenetv2/train_infer_python.txt delete mode 100644 test_tipc/configs/clas/hrnet/hrnet.yaml create mode 100644 tutorials/train/classification/condensenetv2.py diff --git a/paddlers/rs_models/clas/condensenet_v2.py b/paddlers/rs_models/clas/condensenetv2.py similarity index 95% rename from paddlers/rs_models/clas/condensenet_v2.py rename to paddlers/rs_models/clas/condensenetv2.py index 53bb5aa..ca2b222 100644 --- a/paddlers/rs_models/clas/condensenet_v2.py +++ b/paddlers/rs_models/clas/condensenetv2.py @@ -1,442 +1,442 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is based on https://github.com/AgentMaker/Paddle-Image-Models -Ths copyright of AgentMaker/Paddle-Image-Models is as follows: -Apache License [see LICENSE for details] -""" - -import paddle -import paddle.nn as nn - -__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"] - - -class SELayer(nn.Layer): - def __init__(self, inplanes, reduction=16): - super(SELayer, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.fc = nn.Sequential( - nn.Linear( - inplanes, inplanes // reduction, bias_attr=False), - nn.ReLU(), - nn.Linear( - inplanes // reduction, inplanes, bias_attr=False), - nn.Sigmoid(), ) - - def forward(self, x): - b, c, _, _ = x.shape - y = self.avg_pool(x).reshape((b, c)) - y = self.fc(y).reshape((b, c, 1, 1)) - return x * paddle.expand(y, shape=x.shape) - - -class HS(nn.Layer): - def __init__(self): - super(HS, self).__init__() - self.relu6 = nn.ReLU6() - - def forward(self, inputs): - return inputs * self.relu6(inputs + 3) / 6 - - -class Conv(nn.Sequential): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - groups=1, - activation="ReLU", - bn_momentum=0.9, ): - super(Conv, self).__init__() - self.add_sublayer( - "norm", nn.BatchNorm2D( - in_channels, momentum=bn_momentum)) - if activation == "ReLU": - self.add_sublayer("activation", nn.ReLU()) - elif activation == "HS": - self.add_sublayer("activation", HS()) - else: - raise NotImplementedError - self.add_sublayer( - "conv", - nn.Conv2D( - in_channels, - out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - bias_attr=False, - groups=groups, ), ) - - -def ShuffleLayer(x, groups): - batchsize, num_channels, height, width = x.shape - channels_per_group = num_channels // groups - # Reshape - x = x.reshape((batchsize, groups, channels_per_group, height, width)) - # Transpose - x = x.transpose((0, 2, 1, 3, 4)) - # Reshape - x = x.reshape((batchsize, groups * channels_per_group, height, width)) - return x - - -def ShuffleLayerTrans(x, groups): - batchsize, num_channels, height, width = x.shape - channels_per_group = num_channels // groups - # Reshape - x = x.reshape((batchsize, channels_per_group, groups, height, width)) - # Transpose - x = x.transpose((0, 2, 1, 3, 4)) - # Reshape - x = x.reshape((batchsize, channels_per_group * groups, height, width)) - return x - - -class CondenseLGC(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - groups=1, - activation="ReLU", ): - super(CondenseLGC, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.groups = groups - self.norm = nn.BatchNorm2D(self.in_channels) - if activation == "ReLU": - self.activation = nn.ReLU() - elif activation == "HS": - self.activation = HS() - else: - raise NotImplementedError - self.conv = nn.Conv2D( - self.in_channels, - self.out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=self.groups, - bias_attr=False, ) - self.register_buffer( - "index", paddle.zeros( - (self.in_channels, ), dtype="int64")) - - def forward(self, x): - x = paddle.index_select(x, self.index, axis=1) - x = self.norm(x) - x = self.activation(x) - x = self.conv(x) - x = ShuffleLayer(x, self.groups) - return x - - -class CondenseSFR(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - groups=1, - activation="ReLU", ): - super(CondenseSFR, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.groups = groups - self.norm = nn.BatchNorm2D(self.in_channels) - if activation == "ReLU": - self.activation = nn.ReLU() - elif activation == "HS": - self.activation = HS() - else: - raise NotImplementedError - self.conv = nn.Conv2D( - self.in_channels, - self.out_channels, - kernel_size=kernel_size, - padding=padding, - groups=self.groups, - bias_attr=False, - stride=stride, ) - self.register_buffer("index", - paddle.zeros( - (self.out_channels, self.out_channels))) - - def forward(self, x): - x = self.norm(x) - x = self.activation(x) - x = ShuffleLayerTrans(x, self.groups) - x = self.conv(x) # SIZE: N, C, H, W - N, C, H, W = x.shape - x = x.reshape((N, C, H * W)) - x = x.transpose((0, 2, 1)) # SIZE: N, HW, C - # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C - x = paddle.matmul(x, self.index) - x = x.transpose((0, 2, 1)) # SIZE: N, C, HW - x = x.reshape((N, C, H, W)) # SIZE: N, C, HW - return x - - -class _SFR_DenseLayer(nn.Layer): - def __init__( - self, - in_channels, - growth_rate, - group_1x1, - group_3x3, - group_trans, - bottleneck, - activation, - use_se=False, ): - super(_SFR_DenseLayer, self).__init__() - self.group_1x1 = group_1x1 - self.group_3x3 = group_3x3 - self.group_trans = group_trans - self.use_se = use_se - # 1x1 conv i --> b*k - self.conv_1 = CondenseLGC( - in_channels, - bottleneck * growth_rate, - kernel_size=1, - groups=self.group_1x1, - activation=activation, ) - # 3x3 conv b*k --> k - self.conv_2 = Conv( - bottleneck * growth_rate, - growth_rate, - kernel_size=3, - padding=1, - groups=self.group_3x3, - activation=activation, ) - # 1x1 res conv k(8-16-32)--> i (k*l) - self.sfr = CondenseSFR( - growth_rate, - in_channels, - kernel_size=1, - groups=self.group_trans, - activation=activation, ) - if self.use_se: - self.se = SELayer(inplanes=growth_rate, reduction=1) - - def forward(self, x): - x_ = x - x = self.conv_1(x) - x = self.conv_2(x) - if self.use_se: - x = self.se(x) - sfr_feature = self.sfr(x) - y = x_ + sfr_feature - return paddle.concat([y, x], 1) - - -class _SFR_DenseBlock(nn.Sequential): - def __init__( - self, - num_layers, - in_channels, - growth_rate, - group_1x1, - group_3x3, - group_trans, - bottleneck, - activation, - use_se, ): - super(_SFR_DenseBlock, self).__init__() - for i in range(num_layers): - layer = _SFR_DenseLayer( - in_channels + i * growth_rate, - growth_rate, - group_1x1, - group_3x3, - group_trans, - bottleneck, - activation, - use_se, ) - self.add_sublayer("denselayer_%d" % (i + 1), layer) - - -class _Transition(nn.Layer): - def __init__(self): - super(_Transition, self).__init__() - self.pool = nn.AvgPool2D(kernel_size=2, stride=2) - - def forward(self, x): - x = self.pool(x) - return x - - -class CondenseNetV2(nn.Layer): - def __init__( - self, - stages, - growth, - HS_start_block, - SE_start_block, - fc_channel, - group_1x1, - group_3x3, - group_trans, - bottleneck, - last_se_reduction, - in_channels=3, - class_num=1000, ): - super(CondenseNetV2, self).__init__() - self.stages = stages - self.growth = growth - self.in_channels = in_channels - self.class_num = class_num - self.last_se_reduction = last_se_reduction - assert len(self.stages) == len(self.growth) - self.progress = 0.0 - - self.init_stride = 2 - self.pool_size = 7 - - self.features = nn.Sequential() - # Initial nChannels should be 3 - self.num_features = 2 * self.growth[0] - # Dense-block 1 (224x224) - self.features.add_sublayer( - "init_conv", - nn.Conv2D( - in_channels, - self.num_features, - kernel_size=3, - stride=self.init_stride, - padding=1, - bias_attr=False, ), ) - for i in range(len(self.stages)): - activation = "HS" if i >= HS_start_block else "ReLU" - use_se = True if i >= SE_start_block else False - # Dense-block i - self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck, - activation, use_se) - - self.fc = nn.Linear(self.num_features, fc_channel) - self.fc_act = HS() - - # Classifier layer - if class_num > 0: - self.classifier = nn.Linear(fc_channel, class_num) - self._initialize() - - def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck, - activation, use_se): - # Check if ith is the last one - last = i == len(self.stages) - 1 - block = _SFR_DenseBlock( - num_layers=self.stages[i], - in_channels=self.num_features, - growth_rate=self.growth[i], - group_1x1=group_1x1, - group_3x3=group_3x3, - group_trans=group_trans, - bottleneck=bottleneck, - activation=activation, - use_se=use_se, ) - self.features.add_sublayer("denseblock_%d" % (i + 1), block) - self.num_features += self.stages[i] * self.growth[i] - if not last: - trans = _Transition() - self.features.add_sublayer("transition_%d" % (i + 1), trans) - else: - self.features.add_sublayer("norm_last", - nn.BatchNorm2D(self.num_features)) - self.features.add_sublayer("relu_last", nn.ReLU()) - self.features.add_sublayer("pool_last", - nn.AvgPool2D(self.pool_size)) - # if useSE: - self.features.add_sublayer( - "se_last", - SELayer( - self.num_features, reduction=self.last_se_reduction)) - - def forward(self, x): - features = self.features(x) - out = features.reshape((features.shape[0], features.shape[1] * - features.shape[2] * features.shape[3])) - out = self.fc(out) - out = self.fc_act(out) - - if self.class_num > 0: - out = self.classifier(out) - - return out - - def _initialize(self): - # Initialize - for m in self.sublayers(): - if isinstance(m, nn.Conv2D): - nn.initializer.KaimingNormal()(m.weight) - elif isinstance(m, nn.BatchNorm2D): - nn.initializer.Constant(value=1.0)(m.weight) - nn.initializer.Constant(value=0.0)(m.bias) - - -def CondenseNetV2_a(**kwargs): - model = CondenseNetV2( - stages=[1, 1, 4, 6, 8], - growth=[8, 8, 16, 32, 64], - HS_start_block=2, - SE_start_block=3, - fc_channel=828, - group_1x1=8, - group_3x3=8, - group_trans=8, - bottleneck=4, - last_se_reduction=16, - **kwargs) - return model - - -def CondenseNetV2_b(**kwargs): - model = CondenseNetV2( - stages=[2, 4, 6, 8, 6], - growth=[6, 12, 24, 48, 96], - HS_start_block=2, - SE_start_block=3, - fc_channel=1024, - group_1x1=6, - group_3x3=6, - group_trans=6, - bottleneck=4, - last_se_reduction=16, - **kwargs) - return model - - -def CondenseNetV2_c(**kwargs): - model = CondenseNetV2( - stages=[4, 6, 8, 10, 8], - growth=[8, 16, 32, 64, 128], - HS_start_block=2, - SE_start_block=3, - fc_channel=1024, - group_1x1=8, - group_3x3=8, - group_trans=8, - bottleneck=4, - last_se_reduction=16, - **kwargs) - return model +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/AgentMaker/Paddle-Image-Models +Ths copyright of AgentMaker/Paddle-Image-Models is as follows: +Apache License [see LICENSE for details] +""" + +import paddle +import paddle.nn as nn + +__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"] + + +class SELayer(nn.Layer): + def __init__(self, inplanes, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Linear( + inplanes, inplanes // reduction, bias_attr=False), + nn.ReLU(), + nn.Linear( + inplanes // reduction, inplanes, bias_attr=False), + nn.Sigmoid(), ) + + def forward(self, x): + b, c, _, _ = x.shape + y = self.avg_pool(x).reshape((b, c)) + y = self.fc(y).reshape((b, c, 1, 1)) + return x * paddle.expand(y, shape=x.shape) + + +class HS(nn.Layer): + def __init__(self): + super(HS, self).__init__() + self.relu6 = nn.ReLU6() + + def forward(self, inputs): + return inputs * self.relu6(inputs + 3) / 6 + + +class Conv(nn.Sequential): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + groups=1, + activation="ReLU", + bn_momentum=0.9, ): + super(Conv, self).__init__() + self.add_sublayer( + "norm", nn.BatchNorm2D( + in_channels, momentum=bn_momentum)) + if activation == "ReLU": + self.add_sublayer("activation", nn.ReLU()) + elif activation == "HS": + self.add_sublayer("activation", HS()) + else: + raise NotImplementedError + self.add_sublayer( + "conv", + nn.Conv2D( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias_attr=False, + groups=groups, ), ) + + +def ShuffleLayer(x, groups): + batchsize, num_channels, height, width = x.shape + channels_per_group = num_channels // groups + # Reshape + x = x.reshape((batchsize, groups, channels_per_group, height, width)) + # Transpose + x = x.transpose((0, 2, 1, 3, 4)) + # Reshape + x = x.reshape((batchsize, groups * channels_per_group, height, width)) + return x + + +def ShuffleLayerTrans(x, groups): + batchsize, num_channels, height, width = x.shape + channels_per_group = num_channels // groups + # Reshape + x = x.reshape((batchsize, channels_per_group, groups, height, width)) + # Transpose + x = x.transpose((0, 2, 1, 3, 4)) + # Reshape + x = x.reshape((batchsize, channels_per_group * groups, height, width)) + return x + + +class CondenseLGC(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + groups=1, + activation="ReLU", ): + super(CondenseLGC, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.groups = groups + self.norm = nn.BatchNorm2D(self.in_channels) + if activation == "ReLU": + self.activation = nn.ReLU() + elif activation == "HS": + self.activation = HS() + else: + raise NotImplementedError + self.conv = nn.Conv2D( + self.in_channels, + self.out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=self.groups, + bias_attr=False, ) + self.register_buffer( + "index", paddle.zeros( + (self.in_channels, ), dtype="int64")) + + def forward(self, x): + x = paddle.index_select(x, self.index, axis=1) + x = self.norm(x) + x = self.activation(x) + x = self.conv(x) + x = ShuffleLayer(x, self.groups) + return x + + +class CondenseSFR(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + groups=1, + activation="ReLU", ): + super(CondenseSFR, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.groups = groups + self.norm = nn.BatchNorm2D(self.in_channels) + if activation == "ReLU": + self.activation = nn.ReLU() + elif activation == "HS": + self.activation = HS() + else: + raise NotImplementedError + self.conv = nn.Conv2D( + self.in_channels, + self.out_channels, + kernel_size=kernel_size, + padding=padding, + groups=self.groups, + bias_attr=False, + stride=stride, ) + self.register_buffer("index", + paddle.zeros( + (self.out_channels, self.out_channels))) + + def forward(self, x): + x = self.norm(x) + x = self.activation(x) + x = ShuffleLayerTrans(x, self.groups) + x = self.conv(x) # SIZE: N, C, H, W + N, C, H, W = x.shape + x = x.reshape((N, C, H * W)) + x = x.transpose((0, 2, 1)) # SIZE: N, HW, C + # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C + x = paddle.matmul(x, self.index) + x = x.transpose((0, 2, 1)) # SIZE: N, C, HW + x = x.reshape((N, C, H, W)) # SIZE: N, C, HW + return x + + +class _SFR_DenseLayer(nn.Layer): + def __init__( + self, + in_channels, + growth_rate, + group_1x1, + group_3x3, + group_trans, + bottleneck, + activation, + use_se=False, ): + super(_SFR_DenseLayer, self).__init__() + self.group_1x1 = group_1x1 + self.group_3x3 = group_3x3 + self.group_trans = group_trans + self.use_se = use_se + # 1x1 conv i --> b*k + self.conv_1 = CondenseLGC( + in_channels, + bottleneck * growth_rate, + kernel_size=1, + groups=self.group_1x1, + activation=activation, ) + # 3x3 conv b*k --> k + self.conv_2 = Conv( + bottleneck * growth_rate, + growth_rate, + kernel_size=3, + padding=1, + groups=self.group_3x3, + activation=activation, ) + # 1x1 res conv k(8-16-32)--> i (k*l) + self.sfr = CondenseSFR( + growth_rate, + in_channels, + kernel_size=1, + groups=self.group_trans, + activation=activation, ) + if self.use_se: + self.se = SELayer(inplanes=growth_rate, reduction=1) + + def forward(self, x): + x_ = x + x = self.conv_1(x) + x = self.conv_2(x) + if self.use_se: + x = self.se(x) + sfr_feature = self.sfr(x) + y = x_ + sfr_feature + return paddle.concat([y, x], 1) + + +class _SFR_DenseBlock(nn.Sequential): + def __init__( + self, + num_layers, + in_channels, + growth_rate, + group_1x1, + group_3x3, + group_trans, + bottleneck, + activation, + use_se, ): + super(_SFR_DenseBlock, self).__init__() + for i in range(num_layers): + layer = _SFR_DenseLayer( + in_channels + i * growth_rate, + growth_rate, + group_1x1, + group_3x3, + group_trans, + bottleneck, + activation, + use_se, ) + self.add_sublayer("denselayer_%d" % (i + 1), layer) + + +class _Transition(nn.Layer): + def __init__(self): + super(_Transition, self).__init__() + self.pool = nn.AvgPool2D(kernel_size=2, stride=2) + + def forward(self, x): + x = self.pool(x) + return x + + +class CondenseNetV2(nn.Layer): + def __init__( + self, + stages, + growth, + HS_start_block, + SE_start_block, + fc_channel, + group_1x1, + group_3x3, + group_trans, + bottleneck, + last_se_reduction, + in_channels=3, + class_num=1000, ): + super(CondenseNetV2, self).__init__() + self.stages = stages + self.growth = growth + self.in_channels = in_channels + self.class_num = class_num + self.last_se_reduction = last_se_reduction + assert len(self.stages) == len(self.growth) + self.progress = 0.0 + + self.init_stride = 2 + self.pool_size = 7 + + self.features = nn.Sequential() + # Initial nChannels should be 3 + self.num_features = 2 * self.growth[0] + # Dense-block 1 (224x224) + self.features.add_sublayer( + "init_conv", + nn.Conv2D( + in_channels, + self.num_features, + kernel_size=3, + stride=self.init_stride, + padding=1, + bias_attr=False, ), ) + for i in range(len(self.stages)): + activation = "HS" if i >= HS_start_block else "ReLU" + use_se = True if i >= SE_start_block else False + # Dense-block i + self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck, + activation, use_se) + + self.fc = nn.Linear(self.num_features, fc_channel) + self.fc_act = HS() + + # Classifier layer + if class_num > 0: + self.classifier = nn.Linear(fc_channel, class_num) + self._initialize() + + def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck, + activation, use_se): + # Check if ith is the last one + last = i == len(self.stages) - 1 + block = _SFR_DenseBlock( + num_layers=self.stages[i], + in_channels=self.num_features, + growth_rate=self.growth[i], + group_1x1=group_1x1, + group_3x3=group_3x3, + group_trans=group_trans, + bottleneck=bottleneck, + activation=activation, + use_se=use_se, ) + self.features.add_sublayer("denseblock_%d" % (i + 1), block) + self.num_features += self.stages[i] * self.growth[i] + if not last: + trans = _Transition() + self.features.add_sublayer("transition_%d" % (i + 1), trans) + else: + self.features.add_sublayer("norm_last", + nn.BatchNorm2D(self.num_features)) + self.features.add_sublayer("relu_last", nn.ReLU()) + self.features.add_sublayer("pool_last", + nn.AvgPool2D(self.pool_size)) + # if useSE: + self.features.add_sublayer( + "se_last", + SELayer( + self.num_features, reduction=self.last_se_reduction)) + + def forward(self, x): + features = self.features(x) + out = features.reshape((features.shape[0], features.shape[1] * + features.shape[2] * features.shape[3])) + out = self.fc(out) + out = self.fc_act(out) + + if self.class_num > 0: + out = self.classifier(out) + + return out + + def _initialize(self): + # Initialize + for m in self.sublayers(): + if isinstance(m, nn.Conv2D): + nn.initializer.KaimingNormal()(m.weight) + elif isinstance(m, nn.BatchNorm2D): + nn.initializer.Constant(value=1.0)(m.weight) + nn.initializer.Constant(value=0.0)(m.bias) + + +def CondenseNetV2_A(**kwargs): + model = CondenseNetV2( + stages=[1, 1, 4, 6, 8], + growth=[8, 8, 16, 32, 64], + HS_start_block=2, + SE_start_block=3, + fc_channel=828, + group_1x1=8, + group_3x3=8, + group_trans=8, + bottleneck=4, + last_se_reduction=16, + **kwargs) + return model + + +def CondenseNetV2_B(**kwargs): + model = CondenseNetV2( + stages=[2, 4, 6, 8, 6], + growth=[6, 12, 24, 48, 96], + HS_start_block=2, + SE_start_block=3, + fc_channel=1024, + group_1x1=6, + group_3x3=6, + group_trans=6, + bottleneck=4, + last_se_reduction=16, + **kwargs) + return model + + +def CondenseNetV2_C(**kwargs): + model = CondenseNetV2( + stages=[4, 6, 8, 10, 8], + growth=[8, 16, 32, 64, 128], + HS_start_block=2, + SE_start_block=3, + fc_channel=1024, + group_1x1=8, + group_3x3=8, + group_trans=8, + bottleneck=4, + last_se_reduction=16, + **kwargs) + return model diff --git a/paddlers/tasks/classifier.py b/paddlers/tasks/classifier.py index 83c20fb..7af2c02 100644 --- a/paddlers/tasks/classifier.py +++ b/paddlers/tasks/classifier.py @@ -34,9 +34,7 @@ from paddlers.utils.checkpoint import cls_pretrain_weights_dict from paddlers.transforms import Resize, decode_image from .base import BaseModel -__all__ = [ - "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b" -] +__all__ = ["ResNet50_vd", "MobileNetV3", "HRNet", "CondenseNetV2"] class BaseClassifier(BaseModel): @@ -600,13 +598,13 @@ class ResNet50_vd(BaseClassifier): **params) -class MobileNetV3_small_x1_0(BaseClassifier): +class MobileNetV3(BaseClassifier): def __init__(self, num_classes=2, use_mixed_loss=False, losses=None, **params): - super(MobileNetV3_small_x1_0, self).__init__( + super(MobileNetV3, self).__init__( model_name='MobileNetV3_small_x1_0', num_classes=num_classes, use_mixed_loss=use_mixed_loss, @@ -614,13 +612,13 @@ class MobileNetV3_small_x1_0(BaseClassifier): **params) -class HRNet_W18_C(BaseClassifier): +class HRNet(BaseClassifier): def __init__(self, num_classes=2, use_mixed_loss=False, losses=None, **params): - super(HRNet_W18_C, self).__init__( + super(HRNet, self).__init__( model_name='HRNet_W18_C', num_classes=num_classes, use_mixed_loss=use_mixed_loss, @@ -628,15 +626,21 @@ class HRNet_W18_C(BaseClassifier): **params) -class CondenseNetV2_b(BaseClassifier): +class CondenseNetV2(BaseClassifier): def __init__(self, num_classes=2, use_mixed_loss=False, losses=None, + in_chnanels=3, + arch='A', **params): - super(CondenseNetV2_b, self).__init__( - model_name='CondenseNetV2_b', + if arch not in ('A', 'B', 'C'): + raise ValueError("{} is not a supported architecture.".format(arch)) + model_name = 'CondenseNetV2_' + arch + super(CondenseNetV2, self).__init__( + model_name=model_name, num_classes=num_classes, use_mixed_loss=use_mixed_loss, losses=losses, + in_channels=in_channels, **params) diff --git a/test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml b/test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml new file mode 100644 index 0000000..9808f00 --- /dev/null +++ b/test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml @@ -0,0 +1,10 @@ +# Configurations of CondenseNet V2 with UCMerced dataset + +_base_: ../_base_/ucmerced.yaml + +save_dir: ./test_tipc/output/clas/condensenetv2/ + +model: !Node + type: CondenseNetV2 + args: + num_classes: 21 \ No newline at end of file diff --git a/test_tipc/configs/clas/condensenetv2/train_infer_python.txt b/test_tipc/configs/clas/condensenetv2/train_infer_python.txt new file mode 100644 index 0000000..0e8832b --- /dev/null +++ b/test_tipc/configs/clas/condensenetv2/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:clas:condensenetv2 +python:python +gpu_list:0|0,1 +use_gpu:null|null +--precision:null +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=10 +--save_dir:adaptive +--train_batch_size:lite_train_lite_infer=16|lite_train_whole_infer=16|whole_train_whole_infer=16 +--model_path:null +--config:lite_train_lite_infer=./test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml|lite_train_whole_infer=./test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml|whole_train_whole_infer=./test_tipc/configs/clas/condensenetv2/condensenetv2_ucmerced.yaml +train_model_name:best_model +null:null +## +trainer:norm +norm_train:test_tipc/run_task.py train clas +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================export_params=========================== +--save_dir:adaptive +--model_dir:adaptive +--fixed_input_shape:[-1,3,256,256] +norm_export:deploy/export/export_model.py +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +===========================infer_params=========================== +infer_model:null +infer_export:null +infer_quant:False +inference:test_tipc/infer.py +--device:cpu|gpu +--enable_mkldnn:True +--cpu_threads:6 +--batch_size:1 +--use_trt:False +--precision:fp32 +--model_dir:null +--config:null +--save_log_path:null +--benchmark:True +--model_name:condensenetv2 +null:null \ No newline at end of file diff --git a/test_tipc/configs/clas/hrnet/hrnet.yaml b/test_tipc/configs/clas/hrnet/hrnet.yaml deleted file mode 100644 index 4c9879f..0000000 --- a/test_tipc/configs/clas/hrnet/hrnet.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Basic configurations of HRNet - -_base_: ../_base_/ucmerced.yaml - -save_dir: ./test_tipc/output/clas/hrnet/ - -model: !Node - type: HRNet_W18_C - args: - num_classes: 21 \ No newline at end of file diff --git a/test_tipc/configs/clas/hrnet/hrnet_ucmerced.yaml b/test_tipc/configs/clas/hrnet/hrnet_ucmerced.yaml index 3a09756..3a58807 100644 --- a/test_tipc/configs/clas/hrnet/hrnet_ucmerced.yaml +++ b/test_tipc/configs/clas/hrnet/hrnet_ucmerced.yaml @@ -5,6 +5,6 @@ _base_: ../_base_/ucmerced.yaml save_dir: ./test_tipc/output/clas/hrnet/ model: !Node - type: HRNet_W18_C + type: HRNet args: num_classes: 21 \ No newline at end of file diff --git a/tests/rs_models/test_clas_models.py b/tests/rs_models/test_clas_models.py index ab184fa..3c89d78 100644 --- a/tests/rs_models/test_clas_models.py +++ b/tests/rs_models/test_clas_models.py @@ -18,7 +18,7 @@ from rs_models.test_model import TestModel __all__ = [] -class TestCDModel(TestModel): +class TestClasModel(TestModel): DEFAULT_HW = (224, 224) def check_output(self, output, target): @@ -36,3 +36,36 @@ class TestCDModel(TestModel): def set_targets(self): self.targets = [[self.DEFAULT_BATCH_SIZE, spec.get('num_classes', 2)] for spec in self.specs] + + +class TestCondenseNetV2AModel(TestClasModel): + MODEL_CLASS = paddlers.rs_models.clas.CondenseNetV2_A + + def set_specs(self): + self.specs = [ + dict(in_channels=3, num_classes=2), + dict(in_channels=10, num_classes=2), + dict(in_channels=3, num_classes=100) + ] # yapf: disable + + +class TestCondenseNetV2BModel(TestClasModel): + MODEL_CLASS = paddlers.rs_models.clas.CondenseNetV2_B + + def set_specs(self): + self.specs = [ + dict(in_channels=3, num_classes=2), + dict(in_channels=10, num_classes=2), + dict(in_channels=3, num_classes=100) + ] # yapf: disable + + +class TestCondenseNetV2CModel(TestClasModel): + MODEL_CLASS = paddlers.rs_models.clas.CondenseNetV2_C + + def set_specs(self): + self.specs = [ + dict(in_channels=3, num_classes=2), + dict(in_channels=10, num_classes=2), + dict(in_channels=3, num_classes=100) + ] # yapf: disable diff --git a/tutorials/train/classification/condensenetv2.py b/tutorials/train/classification/condensenetv2.py new file mode 100644 index 0000000..62fd4f4 --- /dev/null +++ b/tutorials/train/classification/condensenetv2.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python + +# 场景分类模型CondenseNet V2训练示例脚本 +# 执行此脚本前,请确认已正确安装PaddleRS库 + +import paddlers as pdrs +from paddlers import transforms as T + +# 数据集存放目录 +DATA_DIR = './data/ucmerced/' +# 训练集`file_list`文件路径 +TRAIN_FILE_LIST_PATH = './data/ucmerced/train.txt' +# 验证集`file_list`文件路径 +EVAL_FILE_LIST_PATH = './data/ucmerced/val.txt' +# 数据集类别信息文件路径 +LABEL_LIST_PATH = './data/ucmerced/labels.txt' +# 实验目录,保存输出的模型权重和结果 +EXP_DIR = './output/hrnet/' + +# 下载和解压UC Merced数据集 +pdrs.utils.download_and_decompress( + 'https://paddlers.bj.bcebos.com/datasets/ucmerced.zip', path='./data/') + +# 定义训练和验证时使用的数据变换(数据增强、预处理等) +# 使用Compose组合多种变换方式。Compose中包含的变换将按顺序串行执行 +# API说明:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/apis/data.md +train_transforms = T.Compose([ + # 读取影像 + T.DecodeImg(), + # 将影像缩放到256x256大小 + T.Resize(target_size=256), + # 以50%的概率实施随机水平翻转 + T.RandomHorizontalFlip(prob=0.5), + # 以50%的概率实施随机垂直翻转 + T.RandomVerticalFlip(prob=0.5), + # 将数据归一化到[-1,1] + T.Normalize( + mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + T.ArrangeClassifier('train') +]) + +eval_transforms = T.Compose([ + T.DecodeImg(), + T.Resize(target_size=256), + # 验证阶段与训练阶段的数据归一化方式必须相同 + T.Normalize( + mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + T.ArrangeClassifier('eval') +]) + +# 分别构建训练和验证所用的数据集 +train_dataset = pdrs.datasets.ClasDataset( + data_dir=DATA_DIR, + file_list=TRAIN_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=train_transforms, + num_workers=0, + shuffle=True) + +eval_dataset = pdrs.datasets.ClasDataset( + data_dir=DATA_DIR, + file_list=EVAL_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=eval_transforms, + num_workers=0, + shuffle=False) + +# 构建CondenseNet V2模型 +# 目前已支持的模型请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/intro/model_zoo.md +# 模型输入参数请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/classifier.py +model = pdrs.tasks.clas.CondenseNetV2(num_classes=len(train_dataset.labels)) + +# 执行模型训练 +model.train( + num_epochs=2, + train_dataset=train_dataset, + train_batch_size=16, + eval_dataset=eval_dataset, + save_interval_epochs=1, + # 每多少次迭代记录一次日志 + log_interval_steps=50, + save_dir=EXP_DIR, + # 初始学习率大小 + learning_rate=0.01, + # 是否使用early stopping策略,当精度不再改善时提前终止训练 + early_stop=False, + # 是否启用VisualDL日志功能 + use_vdl=True, + # 指定从某个检查点继续训练 + resume_checkpoint=None) From 9f5c87e9dd8bc2817b31388b1c1491c577ad7cf6 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Mon, 12 Sep 2022 14:20:36 +0800 Subject: [PATCH 05/15] Rename clas models --- paddlers/rs_models/clas/__init__.py | 2 +- test_tipc/README.md | 4 ++++ test_tipc/configs/clas/mobilenetv3/mobilenetv3_ucmerced.yaml | 2 +- tutorials/train/classification/hrnet.py | 2 +- tutorials/train/classification/mobilenetv3.py | 3 +-- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/paddlers/rs_models/clas/__init__.py b/paddlers/rs_models/clas/__init__.py index 9ac8d19..9ae6c3d 100644 --- a/paddlers/rs_models/clas/__init__.py +++ b/paddlers/rs_models/clas/__init__.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .condensenet_v2 import CondenseNetV2_a, CondenseNetV2_b, CondenseNetV2_c +from .condensenetv2 import CondenseNetV2_A, CondenseNetV2_B, CondenseNetV2_C diff --git a/test_tipc/README.md b/test_tipc/README.md index 934a83f..759cbd3 100644 --- a/test_tipc/README.md +++ b/test_tipc/README.md @@ -32,6 +32,7 @@ | 变化检测 | FC-Siam-conc | 支持 | - | - | - | | 变化检测 | FC-Siam-diff | 支持 | - | - | - | | 变化检测 | ChangeFormer | 支持 | - | - | - | +| 场景分类 | CondenseNet V2 | 支持 | - | - | - | | 场景分类 | HRNet | 支持 | - | - | - | | 场景分类 | MobileNetV3 | 支持 | - | - | - | | 场景分类 | ResNet50-vd | 支持 | - | - | - | @@ -43,8 +44,11 @@ | 目标检测 | PP-YOLO Tiny | 支持 | - | - | - | | 目标检测 | PP-YOLOv2 | 支持 | - | - | - | | 目标检测 | YOLOv3 | 支持 | - | - | - | +| 图像分割 | BiSeNet V2 | 支持 | - | - | - | | 图像分割 | DeepLab V3+ | 支持 | - | - | - | | 图像分割 | FarSeg | 支持 | - | - | - | +| 图像分割 | Fast-SCNN | 支持 | - | - | - | +| 图像分割 | HRNet | 支持 | - | - | - | | 图像分割 | UNet | 支持 | - | - | - | ## 3 测试工具简介 diff --git a/test_tipc/configs/clas/mobilenetv3/mobilenetv3_ucmerced.yaml b/test_tipc/configs/clas/mobilenetv3/mobilenetv3_ucmerced.yaml index becdd5f..dd1ff7e 100644 --- a/test_tipc/configs/clas/mobilenetv3/mobilenetv3_ucmerced.yaml +++ b/test_tipc/configs/clas/mobilenetv3/mobilenetv3_ucmerced.yaml @@ -5,6 +5,6 @@ _base_: ../_base_/ucmerced.yaml save_dir: ./test_tipc/output/clas/mobilenetv3/ model: !Node - type: MobileNetV3_small_x1_0 + type: MobileNetV3 args: num_classes: 21 \ No newline at end of file diff --git a/tutorials/train/classification/hrnet.py b/tutorials/train/classification/hrnet.py index 7a89843..a03a538 100644 --- a/tutorials/train/classification/hrnet.py +++ b/tutorials/train/classification/hrnet.py @@ -68,7 +68,7 @@ eval_dataset = pdrs.datasets.ClasDataset( # 构建HRNet模型 # 目前已支持的模型请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/intro/model_zoo.md # 模型输入参数请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/classifier.py -model = pdrs.tasks.clas.HRNet_W18_C(num_classes=len(train_dataset.labels)) +model = pdrs.tasks.clas.HRNet(num_classes=len(train_dataset.labels)) # 执行模型训练 model.train( diff --git a/tutorials/train/classification/mobilenetv3.py b/tutorials/train/classification/mobilenetv3.py index 36efe29..7b6177c 100644 --- a/tutorials/train/classification/mobilenetv3.py +++ b/tutorials/train/classification/mobilenetv3.py @@ -68,8 +68,7 @@ eval_dataset = pdrs.datasets.ClasDataset( # 构建MobileNetV3模型 # 目前已支持的模型请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/intro/model_zoo.md # 模型输入参数请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/classifier.py -model = pdrs.tasks.clas.MobileNetV3_small_x1_0( - num_classes=len(train_dataset.labels)) +model = pdrs.tasks.clas.MobileNetV3(num_classes=len(train_dataset.labels)) # 执行模型训练 model.train( From 69c160404a8c32638e9bb8eb65702c659c9f51a5 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Mon, 12 Sep 2022 14:21:02 +0800 Subject: [PATCH 06/15] Add seg models --- docs/intro/model_zoo.md | 23 +++-- paddlers/tasks/segmenter.py | 11 ++- test_tipc/configs/cd/bit/bit.yaml | 8 -- .../configs/cd/changeformer/changeformer.yaml | 8 -- test_tipc/configs/cd/fccdn/fccdn.yaml | 13 --- .../seg/bisenetv2/bisenetv2_rsseg.yaml | 11 +++ .../seg/bisenetv2/train_infer_python.txt | 53 +++++++++++ .../seg/fast_scnn/fast_scnn_rsseg.yaml | 11 +++ .../seg/fast_scnn/train_infer_python.txt | 53 +++++++++++ test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml | 11 +++ .../configs/seg/hrnet/train_infer_python.txt | 53 +++++++++++ test_tipc/docs/test_train_inference_python.md | 8 ++ tutorials/train/README.md | 4 + .../train/semantic_segmentation/bisenetv2.py | 93 +++++++++++++++++++ .../train/semantic_segmentation/fast_scnn.py | 93 +++++++++++++++++++ .../train/semantic_segmentation/hrnet.py | 93 +++++++++++++++++++ 16 files changed, 504 insertions(+), 42 deletions(-) delete mode 100644 test_tipc/configs/cd/bit/bit.yaml delete mode 100644 test_tipc/configs/cd/changeformer/changeformer.yaml delete mode 100644 test_tipc/configs/cd/fccdn/fccdn.yaml create mode 100644 test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml create mode 100644 test_tipc/configs/seg/bisenetv2/train_infer_python.txt create mode 100644 test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml create mode 100644 test_tipc/configs/seg/fast_scnn/train_infer_python.txt create mode 100644 test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml create mode 100644 test_tipc/configs/seg/hrnet/train_infer_python.txt create mode 100644 tutorials/train/semantic_segmentation/bisenetv2.py create mode 100644 tutorials/train/semantic_segmentation/fast_scnn.py create mode 100644 tutorials/train/semantic_segmentation/hrnet.py diff --git a/docs/intro/model_zoo.md b/docs/intro/model_zoo.md index 9281df8..f8b520d 100644 --- a/docs/intro/model_zoo.md +++ b/docs/intro/model_zoo.md @@ -20,18 +20,21 @@ PaddleRS目前已支持的全部模型如下(标注\*的为遥感专用模型 | 变化检测 | \*FCCDN | 是 | | 变化检测 | \*SNUNet | 是 | | 变化检测 | \*STANet | 是 | -| 场景分类 | CondenseNetV2 | 是 | -| 场景分类 | HRNet | 是 | -| 场景分类 | MobileNetV3 | 是 | -| 场景分类 | ResNet50-vd | 是 | +| 场景分类 | CondenseNet V2 | 是 | +| 场景分类 | HRNet | 否 | +| 场景分类 | MobileNetV3 | 否 | +| 场景分类 | ResNet50-vd | 否 | | 图像复原 | DRN | 否 | | 图像复原 | ESRGAN | 否 | | 图像复原 | LESRCNN | 否 | -| 目标检测 | Faster R-CNN | 是 | -| 目标检测 | PP-YOLO | 是 | -| 目标检测 | PP-YOLO Tiny | 是 | -| 目标检测 | PP-YOLOv2 | 是 | -| 目标检测 | YOLOv3 | 是 | +| 目标检测 | Faster R-CNN | 否 | +| 目标检测 | PP-YOLO | 否 | +| 目标检测 | PP-YOLO Tiny | 否 | +| 目标检测 | PP-YOLOv2 | 否 | +| 目标检测 | YOLOv3 | 否 | +| 图像分割 | BiSeNet V2 | 是 | | 图像分割 | DeepLab V3+ | 是 | -| 图像分割 | \*FarSeg | 否 | +| 图像分割 | \*FarSeg | 是 | +| 图像分割 | Fast-SCNN | 是 | +| 图像分割 | HRNet | 是 | | 图像分割 | UNet | 是 | diff --git a/paddlers/tasks/segmenter.py b/paddlers/tasks/segmenter.py index 83cbffa..9e6b584 100644 --- a/paddlers/tasks/segmenter.py +++ b/paddlers/tasks/segmenter.py @@ -806,7 +806,7 @@ class UNet(BaseSegmenter): }) super(UNet, self).__init__( model_name='UNet', - input_channel=in_channels, + in_channels=in_channels, num_classes=num_classes, use_mixed_loss=use_mixed_loss, losses=losses, @@ -834,7 +834,7 @@ class DeepLabV3P(BaseSegmenter): if params.get('with_net', True): with DisablePrint(): backbone = getattr(ppseg.models, backbone)( - input_channel=in_channels, output_stride=output_stride) + in_channels=in_channels, output_stride=output_stride) else: backbone = None params.update({ @@ -854,6 +854,7 @@ class DeepLabV3P(BaseSegmenter): class FastSCNN(BaseSegmenter): def __init__(self, + in_channels=3, num_classes=2, use_mixed_loss=False, losses=None, @@ -862,6 +863,7 @@ class FastSCNN(BaseSegmenter): params.update({'align_corners': align_corners}) super(FastSCNN, self).__init__( model_name='FastSCNN', + in_channels=in_channels, num_classes=num_classes, use_mixed_loss=use_mixed_loss, losses=losses, @@ -870,6 +872,7 @@ class FastSCNN(BaseSegmenter): class HRNet(BaseSegmenter): def __init__(self, + in_channels=3, num_classes=2, width=48, use_mixed_loss=False, @@ -884,7 +887,7 @@ class HRNet(BaseSegmenter): if params.get('with_net', True): with DisablePrint(): backbone = getattr(ppseg.models, self.backbone_name)( - align_corners=align_corners) + in_channels=in_channels, align_corners=align_corners) else: backbone = None @@ -900,6 +903,7 @@ class HRNet(BaseSegmenter): class BiSeNetV2(BaseSegmenter): def __init__(self, + in_channels=3, num_classes=2, use_mixed_loss=False, losses=None, @@ -908,6 +912,7 @@ class BiSeNetV2(BaseSegmenter): params.update({'align_corners': align_corners}) super(BiSeNetV2, self).__init__( model_name='BiSeNetV2', + in_channels=in_channels, num_classes=num_classes, use_mixed_loss=use_mixed_loss, losses=losses, diff --git a/test_tipc/configs/cd/bit/bit.yaml b/test_tipc/configs/cd/bit/bit.yaml deleted file mode 100644 index 3d3c62b..0000000 --- a/test_tipc/configs/cd/bit/bit.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Basic configurations of BIT - -_base_: ../_base_/airchange.yaml - -save_dir: ./test_tipc/output/cd/bit/ - -model: !Node - type: BIT \ No newline at end of file diff --git a/test_tipc/configs/cd/changeformer/changeformer.yaml b/test_tipc/configs/cd/changeformer/changeformer.yaml deleted file mode 100644 index 785749d..0000000 --- a/test_tipc/configs/cd/changeformer/changeformer.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Basic configurations of ChangeFormer - -_base_: ../_base_/airchange.yaml - -save_dir: ./test_tipc/output/cd/changeformer/ - -model: !Node - type: ChangeFormer \ No newline at end of file diff --git a/test_tipc/configs/cd/fccdn/fccdn.yaml b/test_tipc/configs/cd/fccdn/fccdn.yaml deleted file mode 100644 index 8b93717..0000000 --- a/test_tipc/configs/cd/fccdn/fccdn.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Basic configurations of FCCDN - -_base_: ../_base_/airchange.yaml - -save_dir: ./test_tipc/output/cd/fccdn/ - -model: !Node - type: FCCDN - -learning_rate: 0.07 -lr_decay_power: 0.6 -log_interval_steps: 100 -save_interval_epochs: 3 diff --git a/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml b/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml new file mode 100644 index 0000000..81c29b6 --- /dev/null +++ b/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml @@ -0,0 +1,11 @@ +# Configurations of BiSeNet V2 with RSSeg dataset + +_base_: ../_base_/rsseg.yaml + +save_dir: ./test_tipc/output/seg/bisenetv2/ + +model: !Node + type: BiSeNet V2 + args: + in_channels: 10 + num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/bisenetv2/train_infer_python.txt b/test_tipc/configs/seg/bisenetv2/train_infer_python.txt new file mode 100644 index 0000000..2fcad98 --- /dev/null +++ b/test_tipc/configs/seg/bisenetv2/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:seg:bisenetv2 +python:python +gpu_list:0|0,1 +use_gpu:null|null +--precision:null +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 +--save_dir:adaptive +--train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4 +--model_path:null +--config:lite_train_lite_infer=./test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml|lite_train_whole_infer=./test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml|whole_train_whole_infer=./test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml +train_model_name:best_model +null:null +## +trainer:norm +norm_train:test_tipc/run_task.py train seg +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================export_params=========================== +--save_dir:adaptive +--model_dir:adaptive +--fixed_input_shape:[-1,10,512,512] +norm_export:deploy/export/export_model.py +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +===========================infer_params=========================== +infer_model:null +infer_export:null +infer_quant:False +inference:test_tipc/infer.py +--device:cpu|gpu +--enable_mkldnn:True +--cpu_threads:6 +--batch_size:1 +--use_trt:False +--precision:fp32 +--model_dir:null +--config:null +--save_log_path:null +--benchmark:True +--model_name:bisenetv2 +null:null \ No newline at end of file diff --git a/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml b/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml new file mode 100644 index 0000000..145ce47 --- /dev/null +++ b/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml @@ -0,0 +1,11 @@ +# Configurations of Fast-SCNN with RSSeg dataset + +_base_: ../_base_/rsseg.yaml + +save_dir: ./test_tipc/output/seg/fast_scnn/ + +model: !Node + type: Fast-SCNN + args: + in_channels: 10 + num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/fast_scnn/train_infer_python.txt b/test_tipc/configs/seg/fast_scnn/train_infer_python.txt new file mode 100644 index 0000000..f5bdcd5 --- /dev/null +++ b/test_tipc/configs/seg/fast_scnn/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:seg:fast_scnn +python:python +gpu_list:0|0,1 +use_gpu:null|null +--precision:null +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 +--save_dir:adaptive +--train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4 +--model_path:null +--config:lite_train_lite_infer=./test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml|lite_train_whole_infer=./test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml|whole_train_whole_infer=./test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml +train_model_name:best_model +null:null +## +trainer:norm +norm_train:test_tipc/run_task.py train seg +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================export_params=========================== +--save_dir:adaptive +--model_dir:adaptive +--fixed_input_shape:[-1,10,512,512] +norm_export:deploy/export/export_model.py +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +===========================infer_params=========================== +infer_model:null +infer_export:null +infer_quant:False +inference:test_tipc/infer.py +--device:cpu|gpu +--enable_mkldnn:True +--cpu_threads:6 +--batch_size:1 +--use_trt:False +--precision:fp32 +--model_dir:null +--config:null +--save_log_path:null +--benchmark:True +--model_name:fast_scnn +null:null \ No newline at end of file diff --git a/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml b/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml new file mode 100644 index 0000000..a7eff81 --- /dev/null +++ b/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml @@ -0,0 +1,11 @@ +# Configurations of HRNet with RSSeg dataset + +_base_: ../_base_/rsseg.yaml + +save_dir: ./test_tipc/output/seg/hrnet/ + +model: !Node + type: HRNet + args: + in_channels: 10 + num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/hrnet/train_infer_python.txt b/test_tipc/configs/seg/hrnet/train_infer_python.txt new file mode 100644 index 0000000..d1cafdc --- /dev/null +++ b/test_tipc/configs/seg/hrnet/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:seg:hrnet +python:python +gpu_list:0|0,1 +use_gpu:null|null +--precision:null +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 +--save_dir:adaptive +--train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4 +--model_path:null +--config:lite_train_lite_infer=./test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml|lite_train_whole_infer=./test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml|whole_train_whole_infer=./test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml +train_model_name:best_model +null:null +## +trainer:norm +norm_train:test_tipc/run_task.py train seg +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================export_params=========================== +--save_dir:adaptive +--model_dir:adaptive +--fixed_input_shape:[-1,10,512,512] +norm_export:deploy/export/export_model.py +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +===========================infer_params=========================== +infer_model:null +infer_export:null +infer_quant:False +inference:test_tipc/infer.py +--device:cpu|gpu +--enable_mkldnn:True +--cpu_threads:6 +--batch_size:1 +--use_trt:False +--precision:fp32 +--model_dir:null +--config:null +--save_log_path:null +--benchmark:True +--model_name:hrnet +null:null \ No newline at end of file diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md index b117cce..2d01ca3 100644 --- a/test_tipc/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -19,6 +19,7 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 变化检测 | FC-Siam-conc | 正常训练 | 正常训练 | IoU=65.79% | | 变化检测 | FC-Siam-diff | 正常训练 | 正常训练 | IoU=61.23% | | 变化检测 | FCCDN | 正常训练 | 正常训练 | IoU=24.42% | +| 场景分类 | CondenseNet V2 | 正常训练 | 正常训练 | Acc(top1)= | | 场景分类 | HRNet | 正常训练 | 正常训练 | Acc(top1)=99.37% | | 场景分类 | MobileNetV3 | 正常训练 | 正常训练 | Acc(top1)=99.58% | | 场景分类 | ResNet50-vd | 正常训练 | 正常训练 | Acc(top1)=99.26% | @@ -30,8 +31,11 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 目标检测 | PP-YOLO Tiny | 正常训练 | 正常训练 | mAP=44.27% | | 目标检测 | PP-YOLOv2 | 正常训练 | 正常训练 | mAP=59.37% | | 目标检测 | YOLOv3 | 正常训练 | 正常训练 | mAP=47.33% | +| 图像分割 | BiSeNet V2 | 正常训练 | 正常训练 | mIoU= | | 图像分割 | DeepLab V3+ | 正常训练 | 正常训练 | mIoU=56.05% | | 图像分割 | FarSeg | 正常训练 | 正常训练 | mIoU=49.58% | +| 图像分割 | Fast-SCNN | 正常训练 | 正常训练 | mIoU= | +| 图像分割 | HRNet | 正常训练 | 正常训练 | mIoU= | | 图像分割 | UNet | 正常训练 | 正常训练 | mIoU=55.50% | *注:参考预测精度为whole_train_whole_infer模式下单卡训练汇报的精度数据。* @@ -50,6 +54,7 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 变化检测 | FC-EF | 支持 | 支持 | 1 | | 变化检测 | FC-Siam-conc | 支持 | 支持 | 1 | | 变化检测 | FC-Siam-diff | 支持 | 支持 | 1 | +| 场景分类 | CondenseNet V2 | 支持 | 支持 | 1 | | 场景分类 | HRNet | 支持 | 支持 | 1 | | 场景分类 | MobileNetV3 | 支持 | 支持 | 1 | | 场景分类 | ResNet50-vd | 支持 | 支持 | 1 | @@ -61,8 +66,11 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 目标检测 | PP-YOLO Tiny | 支持 | 支持 | 1 | | 目标检测 | PP-YOLOv2 | 支持 | 支持 | 1 | | 目标检测 | YOLOv3 | 支持 | 支持 | 1 | +| 图像分割 | BiSeNet V2 | 支持 | 支持 | 1 | | 图像分割 | DeepLab V3+ | 支持 | 支持 | 1 | | 图像分割 | FarSeg | 支持 | 支持 | 1 | +| 图像分割 | Fast-SCNN | 支持 | 支持 | 1 | +| 图像分割 | HRNet | 支持 | 支持 | 1 | | 图像分割 | UNet | 支持 | 支持 | 1 | ## 2 测试流程 diff --git a/tutorials/train/README.md b/tutorials/train/README.md index 44c2491..105f98a 100644 --- a/tutorials/train/README.md +++ b/tutorials/train/README.md @@ -15,6 +15,7 @@ |change_detection/fccdn.py | 变化检测 | FCCDN | |change_detection/snunet.py | 变化检测 | SNUNet | |change_detection/stanet.py | 变化检测 | STANet | +|classification/condensenetv2.py | 场景分类 | CondenseNet V2 | |classification/hrnet.py | 场景分类 | HRNet | |classification/mobilenetv3.py | 场景分类 | MobileNetV3 | |classification/resnet50_vd.py | 场景分类 | ResNet50-vd | @@ -26,8 +27,11 @@ |object_detection/ppyolo_tiny.py | 目标检测 | PP-YOLO Tiny | |object_detection/ppyolov2.py | 目标检测 | PP-YOLOv2 | |object_detection/yolov3.py | 目标检测 | YOLOv3 | +|semantic_segmentation/bisenetv2.py | 图像分割 | BiSeNet V2 | |semantic_segmentation/deeplabv3p.py | 图像分割 | DeepLab V3+ | |semantic_segmentation/farseg.py | 图像分割 | FarSeg | +|semantic_segmentation/fast_scnn.py | 图像分割 | Fast-SCNN | +|semantic_segmentation/hrnet.py | 图像分割 | HRNet | |semantic_segmentation/unet.py | 图像分割 | UNet | ## 环境准备 diff --git a/tutorials/train/semantic_segmentation/bisenetv2.py b/tutorials/train/semantic_segmentation/bisenetv2.py new file mode 100644 index 0000000..6d2754d --- /dev/null +++ b/tutorials/train/semantic_segmentation/bisenetv2.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# 图像分割模型BiSeNet V2训练示例脚本 +# 执行此脚本前,请确认已正确安装PaddleRS库 + +import paddlers as pdrs +from paddlers import transforms as T + +# 数据集存放目录 +DATA_DIR = './data/rsseg/' +# 训练集`file_list`文件路径 +TRAIN_FILE_LIST_PATH = './data/rsseg/train.txt' +# 验证集`file_list`文件路径 +EVAL_FILE_LIST_PATH = './data/rsseg/val.txt' +# 数据集类别信息文件路径 +LABEL_LIST_PATH = './data/rsseg/labels.txt' +# 实验目录,保存输出的模型权重和结果 +EXP_DIR = './output/unet/' + +# 影像波段数量 +NUM_BANDS = 10 + +# 下载和解压多光谱地块分类数据集 +pdrs.utils.download_and_decompress( + 'https://paddlers.bj.bcebos.com/datasets/rsseg.zip', path='./data/') + +# 定义训练和验证时使用的数据变换(数据增强、预处理等) +# 使用Compose组合多种变换方式。Compose中包含的变换将按顺序串行执行 +# API说明:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/apis/data.md +train_transforms = T.Compose([ + # 读取影像 + T.DecodeImg(), + # 将影像缩放到512x512大小 + T.Resize(target_size=512), + # 以50%的概率实施随机水平翻转 + T.RandomHorizontalFlip(prob=0.5), + # 将数据归一化到[-1,1] + T.Normalize( + mean=[0.5] * NUM_BANDS, std=[0.5] * NUM_BANDS), + T.ArrangeSegmenter('train') +]) + +eval_transforms = T.Compose([ + T.DecodeImg(), + T.Resize(target_size=512), + # 验证阶段与训练阶段的数据归一化方式必须相同 + T.Normalize( + mean=[0.5] * NUM_BANDS, std=[0.5] * NUM_BANDS), + T.ReloadMask(), + T.ArrangeSegmenter('eval') +]) + +# 分别构建训练和验证所用的数据集 +train_dataset = pdrs.datasets.SegDataset( + data_dir=DATA_DIR, + file_list=TRAIN_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=train_transforms, + num_workers=0, + shuffle=True) + +eval_dataset = pdrs.datasets.SegDataset( + data_dir=DATA_DIR, + file_list=EVAL_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=eval_transforms, + num_workers=0, + shuffle=False) + +# 构建BiSeNet V2模型 +# 目前已支持的模型请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/intro/model_zoo.md +# 模型输入参数请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmenter.py +model = pdrs.tasks.seg.BiSeNetV2( + in_channels=NUM_BANDS, num_classes=len(train_dataset.labels)) + +# 执行模型训练 +model.train( + num_epochs=10, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + save_interval_epochs=5, + # 每多少次迭代记录一次日志 + log_interval_steps=4, + save_dir=EXP_DIR, + # 初始学习率大小 + learning_rate=0.001, + # 是否使用early stopping策略,当精度不再改善时提前终止训练 + early_stop=False, + # 是否启用VisualDL日志功能 + use_vdl=True, + # 指定从某个检查点继续训练 + resume_checkpoint=None) diff --git a/tutorials/train/semantic_segmentation/fast_scnn.py b/tutorials/train/semantic_segmentation/fast_scnn.py new file mode 100644 index 0000000..df15ece --- /dev/null +++ b/tutorials/train/semantic_segmentation/fast_scnn.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# 图像分割模型Fast-SCNN训练示例脚本 +# 执行此脚本前,请确认已正确安装PaddleRS库 + +import paddlers as pdrs +from paddlers import transforms as T + +# 数据集存放目录 +DATA_DIR = './data/rsseg/' +# 训练集`file_list`文件路径 +TRAIN_FILE_LIST_PATH = './data/rsseg/train.txt' +# 验证集`file_list`文件路径 +EVAL_FILE_LIST_PATH = './data/rsseg/val.txt' +# 数据集类别信息文件路径 +LABEL_LIST_PATH = './data/rsseg/labels.txt' +# 实验目录,保存输出的模型权重和结果 +EXP_DIR = './output/unet/' + +# 影像波段数量 +NUM_BANDS = 10 + +# 下载和解压多光谱地块分类数据集 +pdrs.utils.download_and_decompress( + 'https://paddlers.bj.bcebos.com/datasets/rsseg.zip', path='./data/') + +# 定义训练和验证时使用的数据变换(数据增强、预处理等) +# 使用Compose组合多种变换方式。Compose中包含的变换将按顺序串行执行 +# API说明:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/apis/data.md +train_transforms = T.Compose([ + # 读取影像 + T.DecodeImg(), + # 将影像缩放到512x512大小 + T.Resize(target_size=512), + # 以50%的概率实施随机水平翻转 + T.RandomHorizontalFlip(prob=0.5), + # 将数据归一化到[-1,1] + T.Normalize( + mean=[0.5] * NUM_BANDS, std=[0.5] * NUM_BANDS), + T.ArrangeSegmenter('train') +]) + +eval_transforms = T.Compose([ + T.DecodeImg(), + T.Resize(target_size=512), + # 验证阶段与训练阶段的数据归一化方式必须相同 + T.Normalize( + mean=[0.5] * NUM_BANDS, std=[0.5] * NUM_BANDS), + T.ReloadMask(), + T.ArrangeSegmenter('eval') +]) + +# 分别构建训练和验证所用的数据集 +train_dataset = pdrs.datasets.SegDataset( + data_dir=DATA_DIR, + file_list=TRAIN_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=train_transforms, + num_workers=0, + shuffle=True) + +eval_dataset = pdrs.datasets.SegDataset( + data_dir=DATA_DIR, + file_list=EVAL_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=eval_transforms, + num_workers=0, + shuffle=False) + +# 构建Fast-SCNN模型 +# 目前已支持的模型请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/intro/model_zoo.md +# 模型输入参数请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmenter.py +model = pdrs.tasks.seg.FastSCNN( + in_channels=NUM_BANDS, num_classes=len(train_dataset.labels)) + +# 执行模型训练 +model.train( + num_epochs=10, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + save_interval_epochs=5, + # 每多少次迭代记录一次日志 + log_interval_steps=4, + save_dir=EXP_DIR, + # 初始学习率大小 + learning_rate=0.001, + # 是否使用early stopping策略,当精度不再改善时提前终止训练 + early_stop=False, + # 是否启用VisualDL日志功能 + use_vdl=True, + # 指定从某个检查点继续训练 + resume_checkpoint=None) diff --git a/tutorials/train/semantic_segmentation/hrnet.py b/tutorials/train/semantic_segmentation/hrnet.py new file mode 100644 index 0000000..e8a9cdb --- /dev/null +++ b/tutorials/train/semantic_segmentation/hrnet.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# 图像分割模型HRNet训练示例脚本 +# 执行此脚本前,请确认已正确安装PaddleRS库 + +import paddlers as pdrs +from paddlers import transforms as T + +# 数据集存放目录 +DATA_DIR = './data/rsseg/' +# 训练集`file_list`文件路径 +TRAIN_FILE_LIST_PATH = './data/rsseg/train.txt' +# 验证集`file_list`文件路径 +EVAL_FILE_LIST_PATH = './data/rsseg/val.txt' +# 数据集类别信息文件路径 +LABEL_LIST_PATH = './data/rsseg/labels.txt' +# 实验目录,保存输出的模型权重和结果 +EXP_DIR = './output/unet/' + +# 影像波段数量 +NUM_BANDS = 10 + +# 下载和解压多光谱地块分类数据集 +pdrs.utils.download_and_decompress( + 'https://paddlers.bj.bcebos.com/datasets/rsseg.zip', path='./data/') + +# 定义训练和验证时使用的数据变换(数据增强、预处理等) +# 使用Compose组合多种变换方式。Compose中包含的变换将按顺序串行执行 +# API说明:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/apis/data.md +train_transforms = T.Compose([ + # 读取影像 + T.DecodeImg(), + # 将影像缩放到512x512大小 + T.Resize(target_size=512), + # 以50%的概率实施随机水平翻转 + T.RandomHorizontalFlip(prob=0.5), + # 将数据归一化到[-1,1] + T.Normalize( + mean=[0.5] * NUM_BANDS, std=[0.5] * NUM_BANDS), + T.ArrangeSegmenter('train') +]) + +eval_transforms = T.Compose([ + T.DecodeImg(), + T.Resize(target_size=512), + # 验证阶段与训练阶段的数据归一化方式必须相同 + T.Normalize( + mean=[0.5] * NUM_BANDS, std=[0.5] * NUM_BANDS), + T.ReloadMask(), + T.ArrangeSegmenter('eval') +]) + +# 分别构建训练和验证所用的数据集 +train_dataset = pdrs.datasets.SegDataset( + data_dir=DATA_DIR, + file_list=TRAIN_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=train_transforms, + num_workers=0, + shuffle=True) + +eval_dataset = pdrs.datasets.SegDataset( + data_dir=DATA_DIR, + file_list=EVAL_FILE_LIST_PATH, + label_list=LABEL_LIST_PATH, + transforms=eval_transforms, + num_workers=0, + shuffle=False) + +# 构建HRNet模型 +# 目前已支持的模型请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/intro/model_zoo.md +# 模型输入参数请参考:https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmenter.py +model = pdrs.tasks.seg.HRNet( + in_channels=NUM_BANDS, num_classes=len(train_dataset.labels)) + +# 执行模型训练 +model.train( + num_epochs=10, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + save_interval_epochs=5, + # 每多少次迭代记录一次日志 + log_interval_steps=4, + save_dir=EXP_DIR, + # 初始学习率大小 + learning_rate=0.001, + # 是否使用early stopping策略,当精度不再改善时提前终止训练 + early_stop=False, + # 是否启用VisualDL日志功能 + use_vdl=True, + # 指定从某个检查点继续训练 + resume_checkpoint=None) From 3929c7e26ea33bb8c02d166228d14fe687c05fa1 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Tue, 13 Sep 2022 21:13:33 +0800 Subject: [PATCH 07/15] Fix bugs --- paddlers/models/ppseg/core/predict.py | 6 +++--- paddlers/models/ppseg/core/train.py | 7 ++++--- paddlers/models/ppseg/core/val.py | 4 ++-- paddlers/models/ppseg/cvlibs/callbacks.py | 4 ++-- paddlers/models/ppseg/cvlibs/config.py | 6 +++--- paddlers/models/ppseg/cvlibs/manager.py | 4 ++-- paddlers/models/ppseg/cvlibs/param_init.py | 10 ++++----- paddlers/models/ppseg/datasets/ade.py | 12 +++++------ paddlers/models/ppseg/datasets/chase_db1.py | 10 ++++----- paddlers/models/ppseg/datasets/cityscapes.py | 6 +++--- paddlers/models/ppseg/datasets/cocostuff.py | 6 +++--- paddlers/models/ppseg/datasets/dataset.py | 10 ++++----- paddlers/models/ppseg/datasets/drive.py | 10 ++++----- paddlers/models/ppseg/datasets/eg1800.py | 12 +++++------ paddlers/models/ppseg/datasets/hrf.py | 10 ++++----- .../mini_deep_globe_road_extraction.py | 8 +++---- .../models/ppseg/datasets/optic_disc_seg.py | 8 +++---- .../models/ppseg/datasets/pascal_context.py | 6 +++--- .../models/ppseg/datasets/pp_humanseg14k.py | 4 ++-- paddlers/models/ppseg/datasets/pssl.py | 6 +++--- paddlers/models/ppseg/datasets/stare.py | 10 ++++----- paddlers/models/ppseg/datasets/supervisely.py | 12 +++++------ paddlers/models/ppseg/datasets/voc.py | 10 ++++----- paddlers/models/ppseg/models/ann.py | 6 +++--- .../models/ppseg/models/attention_unet.py | 6 +++--- .../models/ppseg/models/backbones/ghostnet.py | 4 ++-- .../models/ppseg/models/backbones/hrnet.py | 6 +++--- .../ppseg/models/backbones/lite_hrnet.py | 4 ++-- .../ppseg/models/backbones/mix_transformer.py | 6 +++--- .../ppseg/models/backbones/mobilenetv2.py | 4 ++-- .../ppseg/models/backbones/mobilenetv3.py | 6 +++--- .../ppseg/models/backbones/resnet_vd.py | 6 +++--- .../ppseg/models/backbones/shufflenetv2.py | 4 ++-- .../models/ppseg/models/backbones/stdcnet.py | 6 +++--- .../models/backbones/swin_transformer.py | 6 +++--- .../ppseg/models/backbones/top_transformer.py | 6 +++--- .../models/backbones/vision_transformer.py | 6 +++--- .../models/backbones/xception_deeplab.py | 6 +++--- paddlers/models/ppseg/models/bisenet.py | 6 +++--- paddlers/models/ppseg/models/bisenetv1.py | 6 +++--- paddlers/models/ppseg/models/ccnet.py | 6 +++--- paddlers/models/ppseg/models/danet.py | 6 +++--- paddlers/models/ppseg/models/ddrnet.py | 6 +++--- .../models/ppseg/models/decoupled_segnet.py | 10 ++++----- paddlers/models/ppseg/models/deeplab.py | 6 +++--- paddlers/models/ppseg/models/dmnet.py | 6 +++--- paddlers/models/ppseg/models/dnlnet.py | 6 +++--- paddlers/models/ppseg/models/emanet.py | 6 +++--- paddlers/models/ppseg/models/encnet.py | 6 +++--- paddlers/models/ppseg/models/enet.py | 6 +++--- paddlers/models/ppseg/models/espnet.py | 6 +++--- paddlers/models/ppseg/models/espnetv1.py | 6 +++--- paddlers/models/ppseg/models/fast_scnn.py | 6 +++--- paddlers/models/ppseg/models/fastfcn.py | 6 +++--- paddlers/models/ppseg/models/fcn.py | 6 +++--- paddlers/models/ppseg/models/gcnet.py | 6 +++--- paddlers/models/ppseg/models/ginet.py | 6 +++--- paddlers/models/ppseg/models/glore.py | 6 +++--- paddlers/models/ppseg/models/gscnn.py | 10 ++++----- paddlers/models/ppseg/models/hardnet.py | 6 +++--- .../models/ppseg/models/hrnet_contrast.py | 6 +++--- paddlers/models/ppseg/models/isanet.py | 6 +++--- .../models/ppseg/models/layers/activation.py | 2 +- .../models/ppseg/models/layers/attention.py | 2 +- .../models/ppseg/models/layers/layer_libs.py | 2 +- .../models/ppseg/models/layers/nonlocal2d.py | 2 +- .../ppseg/models/layers/pyramid_pool.py | 2 +- .../ppseg/models/layers/tensor_fusion.py | 4 ++-- .../losses/binary_cross_entropy_loss.py | 2 +- .../losses/bootstrapped_cross_entropy.py | 2 +- .../ppseg/models/losses/cross_entropy_loss.py | 2 +- .../decoupledsegnet_relax_boundary_loss.py | 2 +- .../models/losses/detail_aggregate_loss.py | 2 +- .../models/ppseg/models/losses/dice_loss.py | 2 +- .../models/losses/edge_attention_loss.py | 4 ++-- .../models/ppseg/models/losses/focal_loss.py | 2 +- .../models/losses/gscnn_dual_task_loss.py | 2 +- .../models/ppseg/models/losses/kl_loss.py | 2 +- .../models/ppseg/models/losses/l1_loss.py | 2 +- .../models/ppseg/models/losses/lovasz_loss.py | 2 +- .../models/losses/mean_square_error_loss.py | 2 +- .../models/ppseg/models/losses/mixed_loss.py | 2 +- .../models/losses/ohem_cross_entropy_loss.py | 2 +- .../models/losses/ohem_edge_attention_loss.py | 4 ++-- .../pixel_contrast_cross_entropy_loss.py | 2 +- .../models/losses/point_cross_entropy_loss.py | 2 +- .../models/ppseg/models/losses/rmi_loss.py | 2 +- .../losses/semantic_connectivity_loss.py | 2 +- .../semantic_encode_cross_entropy_loss.py | 2 +- paddlers/models/ppseg/models/lraspp.py | 6 +++--- .../models/ppseg/models/mla_transformer.py | 6 +++--- paddlers/models/ppseg/models/mobileseg.py | 6 +++--- paddlers/models/ppseg/models/ocrnet.py | 6 +++--- paddlers/models/ppseg/models/pfpnnet.py | 6 +++--- paddlers/models/ppseg/models/pointrend.py | 6 +++--- paddlers/models/ppseg/models/portraitnet.py | 4 ++-- paddlers/models/ppseg/models/pp_liteseg.py | 8 +++---- .../models/ppseg/models/pphumanseg_lite.py | 8 +++---- paddlers/models/ppseg/models/pspnet.py | 6 +++--- paddlers/models/ppseg/models/segformer.py | 6 +++--- paddlers/models/ppseg/models/segmenter.py | 6 +++--- paddlers/models/ppseg/models/segnet.py | 6 +++--- paddlers/models/ppseg/models/setr.py | 6 +++--- paddlers/models/ppseg/models/sfnet.py | 6 +++--- paddlers/models/ppseg/models/sinet.py | 6 +++--- paddlers/models/ppseg/models/stdcseg.py | 8 +++---- paddlers/models/ppseg/models/topformer.py | 8 +++---- paddlers/models/ppseg/models/u2net.py | 6 +++--- paddlers/models/ppseg/models/unet.py | 6 +++--- paddlers/models/ppseg/models/unet_3plus.py | 6 +++--- paddlers/models/ppseg/models/unet_plusplus.py | 8 +++---- paddlers/models/ppseg/models/upernet.py | 6 +++--- .../models/ppseg/transforms/transforms.py | 6 +++--- paddlers/models/ppseg/utils/env/seg_env.py | 2 +- paddlers/models/ppseg/utils/env/sys_env.py | 4 ++-- paddlers/models/ppseg/utils/utils.py | 4 ++-- paddlers/tasks/classifier.py | 2 +- paddlers/tasks/segmenter.py | 21 ++++++++++++------- paddlers/utils/checkpoint.py | 9 ++++---- .../train/classification/condensenetv2.py | 2 +- .../train/semantic_segmentation/bisenetv2.py | 2 +- .../train/semantic_segmentation/fast_scnn.py | 2 +- .../train/semantic_segmentation/hrnet.py | 2 +- 123 files changed, 342 insertions(+), 335 deletions(-) diff --git a/paddlers/models/ppseg/core/predict.py b/paddlers/models/ppseg/core/predict.py index 98097c7..68b6ea2 100644 --- a/paddlers/models/ppseg/core/predict.py +++ b/paddlers/models/ppseg/core/predict.py @@ -19,9 +19,9 @@ import cv2 import numpy as np import paddle -from paddleseg import utils -from paddleseg.core import infer -from paddleseg.utils import logger, progbar, visualize +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.core import infer +from paddlers.models.ppseg.utils import logger, progbar, visualize def mkdir(path): diff --git a/paddlers/models/ppseg/core/train.py b/paddlers/models/ppseg/core/train.py index fae72d2..0ef168c 100644 --- a/paddlers/models/ppseg/core/train.py +++ b/paddlers/models/ppseg/core/train.py @@ -20,9 +20,10 @@ import shutil import paddle import paddle.nn.functional as F -from paddleseg.utils import (TimeAverager, calculate_eta, resume, logger, - worker_init_fn, train_profiler, op_flops_funs) -from paddleseg.core.val import evaluate +from paddlers.models.ppseg.utils import (TimeAverager, calculate_eta, resume, + logger, worker_init_fn, train_profiler, + op_flops_funs) +from paddlers.models.ppseg.core.val import evaluate def check_logits_losses(logits_list, losses): diff --git a/paddlers/models/ppseg/core/val.py b/paddlers/models/ppseg/core/val.py index 80a820b..7e3915d 100644 --- a/paddlers/models/ppseg/core/val.py +++ b/paddlers/models/ppseg/core/val.py @@ -19,8 +19,8 @@ import time import paddle import paddle.nn.functional as F -from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar -from paddleseg.core import infer +from paddlers.models.ppseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar +from paddlers.models.ppseg.core import infer np.set_printoptions(suppress=True) diff --git a/paddlers/models/ppseg/cvlibs/callbacks.py b/paddlers/models/ppseg/cvlibs/callbacks.py index 1188b2c..075e1eb 100644 --- a/paddlers/models/ppseg/cvlibs/callbacks.py +++ b/paddlers/models/ppseg/cvlibs/callbacks.py @@ -19,8 +19,8 @@ import numpy as np import paddle from paddle.distributed.parallel import ParallelEnv from visualdl import LogWriter -from paddleseg.utils.progbar import Progbar -import paddleseg.utils.logger as logger +from paddlers.models.ppseg.utils.progbar import Progbar +import paddlers.models.ppseg.utils.logger as logger class CallbackList(object): diff --git a/paddlers/models/ppseg/cvlibs/config.py b/paddlers/models/ppseg/cvlibs/config.py index af9c76c..7dd96c5 100644 --- a/paddlers/models/ppseg/cvlibs/config.py +++ b/paddlers/models/ppseg/cvlibs/config.py @@ -22,8 +22,8 @@ import paddle import yaml import six -from paddleseg.cvlibs import manager -from paddleseg.utils import logger +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import logger class Config(object): @@ -54,7 +54,7 @@ class Config(object): Examples: - from paddleseg.cvlibs.config import Config + from paddlers.models.ppseg.cvlibs.config import Config # Create a cfg object with yaml file path. cfg = Config(yaml_cfg_path) diff --git a/paddlers/models/ppseg/cvlibs/manager.py b/paddlers/models/ppseg/cvlibs/manager.py index 8437445..18c8e21 100644 --- a/paddlers/models/ppseg/cvlibs/manager.py +++ b/paddlers/models/ppseg/cvlibs/manager.py @@ -31,7 +31,7 @@ class ComponentManager: Examples 1: - from paddleseg.cvlibs.manager import ComponentManager + from paddlers.models.ppseg.cvlibs.manager import ComponentManager model_manager = ComponentManager() @@ -49,7 +49,7 @@ class ComponentManager: Examples 2: # Or an easier way, using it as a Python decorator, while just add it above the class declaration. - from paddleseg.cvlibs.manager import ComponentManager + from paddlers.models.ppseg.cvlibs.manager import ComponentManager model_manager = ComponentManager() diff --git a/paddlers/models/ppseg/cvlibs/param_init.py b/paddlers/models/ppseg/cvlibs/param_init.py index 59dce4d..3d66695 100644 --- a/paddlers/models/ppseg/cvlibs/param_init.py +++ b/paddlers/models/ppseg/cvlibs/param_init.py @@ -24,7 +24,7 @@ def constant_init(param, **kwargs): Examples: - from paddleseg.cvlibs import param_init + from paddlers.models.ppseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -46,7 +46,7 @@ def normal_init(param, **kwargs): Examples: - from paddleseg.cvlibs import param_init + from paddlers.models.ppseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -79,7 +79,7 @@ def kaiming_normal_init(param, **kwargs): Examples: - from paddleseg.cvlibs import param_init + from paddlers.models.ppseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -109,7 +109,7 @@ def kaiming_uniform(param, **kwargs): Examples: - from paddleseg.cvlibs import param_init + from paddlers.models.ppseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) @@ -136,7 +136,7 @@ def xavier_uniform(param, **kwargs): Examples: - from paddleseg.cvlibs import param_init + from paddlers.models.ppseg.cvlibs import param_init import paddle.nn as nn linear = nn.Linear(2, 4) diff --git a/paddlers/models/ppseg/datasets/ade.py b/paddlers/models/ppseg/datasets/ade.py index 8a9a2e9..26579ff 100644 --- a/paddlers/models/ppseg/datasets/ade.py +++ b/paddlers/models/ppseg/datasets/ade.py @@ -17,12 +17,12 @@ import os import numpy as np from PIL import Image -from paddleseg.datasets import Dataset -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -import paddleseg.transforms.functional as F +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +import paddlers.models.ppseg.transforms.functional as F URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip" diff --git a/paddlers/models/ppseg/datasets/chase_db1.py b/paddlers/models/ppseg/datasets/chase_db1.py index 1b25380..9ddec59 100644 --- a/paddlers/models/ppseg/datasets/chase_db1.py +++ b/paddlers/models/ppseg/datasets/chase_db1.py @@ -14,11 +14,11 @@ import os -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -from paddleseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/chase_db1/chase_db1.zip' diff --git a/paddlers/models/ppseg/datasets/cityscapes.py b/paddlers/models/ppseg/datasets/cityscapes.py index 564926e..c750bb2 100644 --- a/paddlers/models/ppseg/datasets/cityscapes.py +++ b/paddlers/models/ppseg/datasets/cityscapes.py @@ -15,9 +15,9 @@ import os import glob -from paddleseg.datasets import Dataset -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/cocostuff.py b/paddlers/models/ppseg/datasets/cocostuff.py index ae66461..2b37176 100644 --- a/paddlers/models/ppseg/datasets/cocostuff.py +++ b/paddlers/models/ppseg/datasets/cocostuff.py @@ -15,9 +15,9 @@ import os import glob -from paddleseg.datasets import Dataset -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/dataset.py b/paddlers/models/ppseg/datasets/dataset.py index d518f5b..a52f0ee 100644 --- a/paddlers/models/ppseg/datasets/dataset.py +++ b/paddlers/models/ppseg/datasets/dataset.py @@ -18,9 +18,9 @@ import paddle import numpy as np from PIL import Image -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -import paddleseg.transforms.functional as F +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +import paddlers.models.ppseg.transforms.functional as F @manager.DATASETS.add_component @@ -46,8 +46,8 @@ class Dataset(paddle.io.Dataset): Examples: - import paddleseg.transforms as T - from paddleseg.datasets import Dataset + import paddlers.models.ppseg as ppseg.transforms as T + from paddlers.models.ppseg.datasets import Dataset transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()] dataset_root = 'dataset_root_path' diff --git a/paddlers/models/ppseg/datasets/drive.py b/paddlers/models/ppseg/datasets/drive.py index 8984aa0..f4180fc 100644 --- a/paddlers/models/ppseg/datasets/drive.py +++ b/paddlers/models/ppseg/datasets/drive.py @@ -14,11 +14,11 @@ import os -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -from paddleseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/drive/drive.zip' diff --git a/paddlers/models/ppseg/datasets/eg1800.py b/paddlers/models/ppseg/datasets/eg1800.py index 9005083..b43850a 100644 --- a/paddlers/models/ppseg/datasets/eg1800.py +++ b/paddlers/models/ppseg/datasets/eg1800.py @@ -18,12 +18,12 @@ import copy import cv2 import numpy as np -from paddleseg.datasets import Dataset -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -import paddleseg.transforms.functional as F +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +import paddlers.models.ppseg.transforms.functional as F URL = "https://paddleseg.bj.bcebos.com/dataset/EG1800.zip" diff --git a/paddlers/models/ppseg/datasets/hrf.py b/paddlers/models/ppseg/datasets/hrf.py index fb378a7..eadd8b2 100644 --- a/paddlers/models/ppseg/datasets/hrf.py +++ b/paddlers/models/ppseg/datasets/hrf.py @@ -14,11 +14,11 @@ import os -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -from paddleseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/hrf/hrf.zip' diff --git a/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py b/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py index 7180a9d..253967a 100644 --- a/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py +++ b/paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py @@ -15,10 +15,10 @@ import os from .dataset import Dataset -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose URL = "https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip" diff --git a/paddlers/models/ppseg/datasets/optic_disc_seg.py b/paddlers/models/ppseg/datasets/optic_disc_seg.py index 36332b0..805c80d 100644 --- a/paddlers/models/ppseg/datasets/optic_disc_seg.py +++ b/paddlers/models/ppseg/datasets/optic_disc_seg.py @@ -15,10 +15,10 @@ import os from .dataset import Dataset -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" diff --git a/paddlers/models/ppseg/datasets/pascal_context.py b/paddlers/models/ppseg/datasets/pascal_context.py index d76ce31..b7156a3 100644 --- a/paddlers/models/ppseg/datasets/pascal_context.py +++ b/paddlers/models/ppseg/datasets/pascal_context.py @@ -15,9 +15,9 @@ import os from PIL import Image -from paddleseg.datasets import Dataset -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/pp_humanseg14k.py b/paddlers/models/ppseg/datasets/pp_humanseg14k.py index e809611..ba124a3 100644 --- a/paddlers/models/ppseg/datasets/pp_humanseg14k.py +++ b/paddlers/models/ppseg/datasets/pp_humanseg14k.py @@ -15,8 +15,8 @@ import os from .dataset import Dataset -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/pssl.py b/paddlers/models/ppseg/datasets/pssl.py index 6ebe7fb..4816af4 100644 --- a/paddlers/models/ppseg/datasets/pssl.py +++ b/paddlers/models/ppseg/datasets/pssl.py @@ -15,9 +15,9 @@ import os import numpy as np -from paddleseg.datasets import Dataset -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose @manager.DATASETS.add_component diff --git a/paddlers/models/ppseg/datasets/stare.py b/paddlers/models/ppseg/datasets/stare.py index 5de8be5..1acf64e 100644 --- a/paddlers/models/ppseg/datasets/stare.py +++ b/paddlers/models/ppseg/datasets/stare.py @@ -14,11 +14,11 @@ import os -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -from paddleseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset URL = 'https://bj.bcebos.com/paddleseg/dataset/stare/stare.zip' diff --git a/paddlers/models/ppseg/datasets/supervisely.py b/paddlers/models/ppseg/datasets/supervisely.py index accfa46..3508cdd 100644 --- a/paddlers/models/ppseg/datasets/supervisely.py +++ b/paddlers/models/ppseg/datasets/supervisely.py @@ -18,12 +18,12 @@ import copy import cv2 import numpy as np -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose -from paddleseg.datasets import Dataset -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -import paddleseg.transforms.functional as F +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +import paddlers.models.ppseg.transforms.functional as F URL = "https://paddleseg.bj.bcebos.com/dataset/Supervisely_face.zip" diff --git a/paddlers/models/ppseg/datasets/voc.py b/paddlers/models/ppseg/datasets/voc.py index f48ad50..ffaf5d3 100644 --- a/paddlers/models/ppseg/datasets/voc.py +++ b/paddlers/models/ppseg/datasets/voc.py @@ -14,11 +14,11 @@ import os -from paddleseg.datasets import Dataset -from paddleseg.utils.download import download_file_and_uncompress -from paddleseg.utils import seg_env -from paddleseg.cvlibs import manager -from paddleseg.transforms import Compose +from paddlers.models.ppseg.datasets import Dataset +from paddlers.models.ppseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import seg_env +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import Compose URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" diff --git a/paddlers/models/ppseg/models/ann.py b/paddlers/models/ppseg/models/ann.py index aa2af1e..20b81b3 100644 --- a/paddlers/models/ppseg/models/ann.py +++ b/paddlers/models/ppseg/models/ann.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/attention_unet.py b/paddlers/models/ppseg/models/attention_unet.py index 9b05c87..350401f 100644 --- a/paddlers/models/ppseg/models/attention_unet.py +++ b/paddlers/models/ppseg/models/attention_unet.py @@ -14,9 +14,9 @@ import paddle import paddle.nn as nn -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg import utils import numpy as np diff --git a/paddlers/models/ppseg/models/backbones/ghostnet.py b/paddlers/models/ppseg/models/backbones/ghostnet.py index c545515..1ce3415 100644 --- a/paddlers/models/ppseg/models/backbones/ghostnet.py +++ b/paddlers/models/ppseg/models/backbones/ghostnet.py @@ -23,8 +23,8 @@ from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear from paddle.regularizer import L2Decay from paddle.nn.initializer import Uniform, KaimingNormal -from paddleseg.cvlibs import manager -from paddleseg.utils import utils, logger +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils, logger __all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"] diff --git a/paddlers/models/ppseg/models/backbones/hrnet.py b/paddlers/models/ppseg/models/backbones/hrnet.py index 96eb80f..1e45c18 100644 --- a/paddlers/models/ppseg/models/backbones/hrnet.py +++ b/paddlers/models/ppseg/models/backbones/hrnet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils __all__ = [ "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30", diff --git a/paddlers/models/ppseg/models/backbones/lite_hrnet.py b/paddlers/models/ppseg/models/backbones/lite_hrnet.py index 36e575b..18c244db 100644 --- a/paddlers/models/ppseg/models/backbones/lite_hrnet.py +++ b/paddlers/models/ppseg/models/backbones/lite_hrnet.py @@ -24,8 +24,8 @@ from paddle import ParamAttr from paddle.regularizer import L2Decay from paddle.nn.initializer import Normal, Constant -from paddleseg.cvlibs import manager -from paddleseg import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg import utils __all__ = [ "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive", diff --git a/paddlers/models/ppseg/models/backbones/mix_transformer.py b/paddlers/models/ppseg/models/backbones/mix_transformer.py index 62e4e99..87a638f 100644 --- a/paddlers/models/ppseg/models/backbones/mix_transformer.py +++ b/paddlers/models/ppseg/models/backbones/mix_transformer.py @@ -20,9 +20,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle.nn.initializer as paddle_init -from paddleseg.cvlibs import manager -from paddleseg.utils import utils -from paddleseg.models.backbones.transformer_utils import * +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.models.backbones.transformer_utils import * class Mlp(nn.Layer): diff --git a/paddlers/models/ppseg/models/backbones/mobilenetv2.py b/paddlers/models/ppseg/models/backbones/mobilenetv2.py index 16cf4e0..ea25eb1 100644 --- a/paddlers/models/ppseg/models/backbones/mobilenetv2.py +++ b/paddlers/models/ppseg/models/backbones/mobilenetv2.py @@ -19,8 +19,8 @@ import paddle.nn.functional as F from paddle.nn import Conv2D, BatchNorm, Linear, Dropout from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D -from paddleseg.cvlibs import manager -from paddleseg import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg import utils __all__ = [ "MobileNetV2_x0_25", diff --git a/paddlers/models/ppseg/models/backbones/mobilenetv3.py b/paddlers/models/ppseg/models/backbones/mobilenetv3.py index a8d37db..a673dec 100644 --- a/paddlers/models/ppseg/models/backbones/mobilenetv3.py +++ b/paddlers/models/ppseg/models/backbones/mobilenetv3.py @@ -18,9 +18,9 @@ from paddle import ParamAttr from paddle.regularizer import L2Decay from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear -from paddleseg.cvlibs import manager -from paddleseg.utils import utils, logger -from paddleseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils, logger +from paddlers.models.ppseg.models import layers __all__ = [ "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5", diff --git a/paddlers/models/ppseg/models/backbones/resnet_vd.py b/paddlers/models/ppseg/models/backbones/resnet_vd.py index 0820e61..059a8d0 100644 --- a/paddlers/models/ppseg/models/backbones/resnet_vd.py +++ b/paddlers/models/ppseg/models/backbones/resnet_vd.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils __all__ = [ "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd" diff --git a/paddlers/models/ppseg/models/backbones/shufflenetv2.py b/paddlers/models/ppseg/models/backbones/shufflenetv2.py index 0921e73..da9bece 100644 --- a/paddlers/models/ppseg/models/backbones/shufflenetv2.py +++ b/paddlers/models/ppseg/models/backbones/shufflenetv2.py @@ -18,8 +18,8 @@ from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Li from paddle.nn.initializer import KaimingNormal from paddle.nn.functional import swish -from paddleseg.cvlibs import manager -from paddleseg.utils import utils, logger +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils, logger __all__ = [ 'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', diff --git a/paddlers/models/ppseg/models/backbones/stdcnet.py b/paddlers/models/ppseg/models/backbones/stdcnet.py index 7dc89cf..1349288 100644 --- a/paddlers/models/ppseg/models/backbones/stdcnet.py +++ b/paddlers/models/ppseg/models/backbones/stdcnet.py @@ -17,9 +17,9 @@ import math import paddle import paddle.nn as nn -from paddleseg.utils import utils -from paddleseg.cvlibs import manager, param_init -from paddleseg.models.layers.layer_libs import SyncBatchNorm +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm __all__ = ["STDC1", "STDC2"] diff --git a/paddlers/models/ppseg/models/backbones/swin_transformer.py b/paddlers/models/ppseg/models/backbones/swin_transformer.py index ab5cb53..62a39f3 100644 --- a/paddlers/models/ppseg/models/backbones/swin_transformer.py +++ b/paddlers/models/ppseg/models/backbones/swin_transformer.py @@ -17,9 +17,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddleseg.cvlibs import manager -from paddleseg.utils import utils -from paddleseg.models.backbones.transformer_utils import * +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.models.backbones.transformer_utils import * class Mlp(nn.Layer): diff --git a/paddlers/models/ppseg/models/backbones/top_transformer.py b/paddlers/models/ppseg/models/backbones/top_transformer.py index 7f13ae5..f8555a4 100644 --- a/paddlers/models/ppseg/models/backbones/top_transformer.py +++ b/paddlers/models/ppseg/models/backbones/top_transformer.py @@ -19,9 +19,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg import utils -from paddleseg.models.backbones.transformer_utils import Identity, DropPath +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath __all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"] diff --git a/paddlers/models/ppseg/models/backbones/vision_transformer.py b/paddlers/models/ppseg/models/backbones/vision_transformer.py index ecc25d7..95190a5 100644 --- a/paddlers/models/ppseg/models/backbones/vision_transformer.py +++ b/paddlers/models/ppseg/models/backbones/vision_transformer.py @@ -20,9 +20,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddleseg.cvlibs import manager -from paddleseg.utils import utils, logger -from paddleseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils, logger +from paddlers.models.ppseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity class Mlp(nn.Layer): diff --git a/paddlers/models/ppseg/models/backbones/xception_deeplab.py b/paddlers/models/ppseg/models/backbones/xception_deeplab.py index 4480096..a287581 100644 --- a/paddlers/models/ppseg/models/backbones/xception_deeplab.py +++ b/paddlers/models/ppseg/models/backbones/xception_deeplab.py @@ -15,9 +15,9 @@ import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.utils import utils -from paddleseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.models import layers __all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"] diff --git a/paddlers/models/ppseg/models/bisenet.py b/paddlers/models/ppseg/models/bisenet.py index a22d617..b552af1 100644 --- a/paddlers/models/ppseg/models/bisenet.py +++ b/paddlers/models/ppseg/models/bisenet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/bisenetv1.py b/paddlers/models/ppseg/models/bisenetv1.py index 1e7b897..15c39ef 100644 --- a/paddlers/models/ppseg/models/bisenetv1.py +++ b/paddlers/models/ppseg/models/bisenetv1.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ccnet.py b/paddlers/models/ppseg/models/ccnet.py index e42154c..3315c01 100644 --- a/paddlers/models/ppseg/models/ccnet.py +++ b/paddlers/models/ppseg/models/ccnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/danet.py b/paddlers/models/ppseg/models/danet.py index c419131..dac6b67 100644 --- a/paddlers/models/ppseg/models/danet.py +++ b/paddlers/models/ppseg/models/danet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ddrnet.py b/paddlers/models/ppseg/models/ddrnet.py index b2a6992..63d51f9 100644 --- a/paddlers/models/ppseg/models/ddrnet.py +++ b/paddlers/models/ppseg/models/ddrnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils class DualResNet(nn.Layer): diff --git a/paddlers/models/ppseg/models/decoupled_segnet.py b/paddlers/models/ppseg/models/decoupled_segnet.py index 8386d6b..80ff4b8 100644 --- a/paddlers/models/ppseg/models/decoupled_segnet.py +++ b/paddlers/models/ppseg/models/decoupled_segnet.py @@ -18,11 +18,11 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.models.backbones import resnet_vd -from paddleseg.models import deeplab -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.models.backbones import resnet_vd +from paddlers.models.ppseg.models import deeplab +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/deeplab.py b/paddlers/models/ppseg/models/deeplab.py index 07863d5..3e50572 100644 --- a/paddlers/models/ppseg/models/deeplab.py +++ b/paddlers/models/ppseg/models/deeplab.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils __all__ = ['DeepLabV3P', 'DeepLabV3'] diff --git a/paddlers/models/ppseg/models/dmnet.py b/paddlers/models/ppseg/models/dmnet.py index c150ac6..52499a4 100644 --- a/paddlers/models/ppseg/models/dmnet.py +++ b/paddlers/models/ppseg/models/dmnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/dnlnet.py b/paddlers/models/ppseg/models/dnlnet.py index 527eab0..82e020a 100644 --- a/paddlers/models/ppseg/models/dnlnet.py +++ b/paddlers/models/ppseg/models/dnlnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/emanet.py b/paddlers/models/ppseg/models/emanet.py index 33eedcd..a6d6f5b 100644 --- a/paddlers/models/ppseg/models/emanet.py +++ b/paddlers/models/ppseg/models/emanet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/encnet.py b/paddlers/models/ppseg/models/encnet.py index 81bb9ef..ce58414 100644 --- a/paddlers/models/ppseg/models/encnet.py +++ b/paddlers/models/ppseg/models/encnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/enet.py b/paddlers/models/ppseg/models/enet.py index c677e46..832e521 100644 --- a/paddlers/models/ppseg/models/enet.py +++ b/paddlers/models/ppseg/models/enet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.models import layers -from paddleseg.cvlibs import manager, param_init +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager, param_init __all__ = ['ENet'] diff --git a/paddlers/models/ppseg/models/espnet.py b/paddlers/models/ppseg/models/espnet.py index 1751f0e..50de246 100644 --- a/paddlers/models/ppseg/models/espnet.py +++ b/paddlers/models/ppseg/models/espnet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/espnetv1.py b/paddlers/models/ppseg/models/espnetv1.py index 7f1142e..4515aa6 100644 --- a/paddlers/models/ppseg/models/espnetv1.py +++ b/paddlers/models/ppseg/models/espnetv1.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/fast_scnn.py b/paddlers/models/ppseg/models/fast_scnn.py index aa8905f..8e5100e 100644 --- a/paddlers/models/ppseg/models/fast_scnn.py +++ b/paddlers/models/ppseg/models/fast_scnn.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils __all__ = ['FastSCNN'] diff --git a/paddlers/models/ppseg/models/fastfcn.py b/paddlers/models/ppseg/models/fastfcn.py index 87c86eb..2fc9c74 100644 --- a/paddlers/models/ppseg/models/fastfcn.py +++ b/paddlers/models/ppseg/models/fastfcn.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/fcn.py b/paddlers/models/ppseg/models/fcn.py index e12aacd..19554ca 100644 --- a/paddlers/models/ppseg/models/fcn.py +++ b/paddlers/models/ppseg/models/fcn.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddleseg import utils -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/gcnet.py b/paddlers/models/ppseg/models/gcnet.py index fb5d8e3..c5fe7ae 100644 --- a/paddlers/models/ppseg/models/gcnet.py +++ b/paddlers/models/ppseg/models/gcnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ginet.py b/paddlers/models/ppseg/models/ginet.py index fe4b9ae..046cc39 100644 --- a/paddlers/models/ppseg/models/ginet.py +++ b/paddlers/models/ppseg/models/ginet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn from paddle.nn import functional as F -from paddleseg.utils import utils -from paddleseg.models import layers -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/glore.py b/paddlers/models/ppseg/models/glore.py index 12a26c1..76a6fe3 100644 --- a/paddlers/models/ppseg/models/glore.py +++ b/paddlers/models/ppseg/models/glore.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/gscnn.py b/paddlers/models/ppseg/models/gscnn.py index 9352751..50895aa 100644 --- a/paddlers/models/ppseg/models/gscnn.py +++ b/paddlers/models/ppseg/models/gscnn.py @@ -18,11 +18,11 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.models.backbones import resnet_vd -from paddleseg.models import deeplab -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.models.backbones import resnet_vd +from paddlers.models.ppseg.models import deeplab +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/hardnet.py b/paddlers/models/ppseg/models/hardnet.py index 8de3154..2542b02 100644 --- a/paddlers/models/ppseg/models/hardnet.py +++ b/paddlers/models/ppseg/models/hardnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/hrnet_contrast.py b/paddlers/models/ppseg/models/hrnet_contrast.py index dd5a206..30f289b 100644 --- a/paddlers/models/ppseg/models/hrnet_contrast.py +++ b/paddlers/models/ppseg/models/hrnet_contrast.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/isanet.py b/paddlers/models/ppseg/models/isanet.py index e8bb3df..13bd4f2 100644 --- a/paddlers/models/ppseg/models/isanet.py +++ b/paddlers/models/ppseg/models/isanet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/layers/activation.py b/paddlers/models/ppseg/models/layers/activation.py index 046ba87..7eb9173 100644 --- a/paddlers/models/ppseg/models/layers/activation.py +++ b/paddlers/models/ppseg/models/layers/activation.py @@ -33,7 +33,7 @@ class Activation(nn.Layer): Examples: - from paddleseg.models.common.activation import Activation + from paddlers.models.ppseg.models.common.activation import Activation relu = Activation("relu") print(relu) diff --git a/paddlers/models/ppseg/models/layers/attention.py b/paddlers/models/ppseg/models/layers/attention.py index e2db9ea..bceb642 100644 --- a/paddlers/models/ppseg/models/layers/attention.py +++ b/paddlers/models/ppseg/models/layers/attention.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers +from paddlers.models.ppseg.models import layers class AttentionBlock(nn.Layer): diff --git a/paddlers/models/ppseg/models/layers/layer_libs.py b/paddlers/models/ppseg/models/layers/layer_libs.py index 9a9b332..6dcf07a 100644 --- a/paddlers/models/ppseg/models/layers/layer_libs.py +++ b/paddlers/models/ppseg/models/layers/layer_libs.py @@ -17,7 +17,7 @@ import os import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers +from paddlers.models.ppseg.models import layers def SyncBatchNorm(*args, **kwargs): diff --git a/paddlers/models/ppseg/models/layers/nonlocal2d.py b/paddlers/models/ppseg/models/layers/nonlocal2d.py index 7552ff4..9d6386d 100644 --- a/paddlers/models/ppseg/models/layers/nonlocal2d.py +++ b/paddlers/models/ppseg/models/layers/nonlocal2d.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers +from paddlers.models.ppseg.models import layers class NonLocal2D(nn.Layer): diff --git a/paddlers/models/ppseg/models/layers/pyramid_pool.py b/paddlers/models/ppseg/models/layers/pyramid_pool.py index 9e26912..3694437 100644 --- a/paddlers/models/ppseg/models/layers/pyramid_pool.py +++ b/paddlers/models/ppseg/models/layers/pyramid_pool.py @@ -16,7 +16,7 @@ import paddle import paddle.nn.functional as F from paddle import nn -from paddleseg.models import layers +from paddlers.models.ppseg.models import layers class ASPPModule(nn.Layer): diff --git a/paddlers/models/ppseg/models/layers/tensor_fusion.py b/paddlers/models/ppseg/models/layers/tensor_fusion.py index da212b4..0e109c7 100644 --- a/paddlers/models/ppseg/models/layers/tensor_fusion.py +++ b/paddlers/models/ppseg/models/layers/tensor_fusion.py @@ -17,8 +17,8 @@ import paddle.nn as nn import paddle.nn.functional as F from paddle import ParamAttr from paddle.nn.initializer import Constant -from paddleseg.models import layers -from paddleseg.models.layers import tensor_fusion_helper as helper +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper class UAFM(nn.Layer): diff --git a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py index 4bf7bc7..fd1eddd 100644 --- a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py b/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py index a9d09ef..b1807fa 100644 --- a/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py +++ b/paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py index c934a0a..2dcfcd6 100644 --- a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py index eb02389..2048560 100644 --- a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py +++ b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py @@ -18,7 +18,7 @@ from paddle import nn import paddle.nn.functional as F from scipy.ndimage import shift -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py index d6b49c6..1da63f0 100644 --- a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py +++ b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/dice_loss.py b/paddlers/models/ppseg/models/losses/dice_loss.py index e7b8cef..3187029 100644 --- a/paddlers/models/ppseg/models/losses/dice_loss.py +++ b/paddlers/models/ppseg/models/losses/dice_loss.py @@ -13,7 +13,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/edge_attention_loss.py b/paddlers/models/ppseg/models/losses/edge_attention_loss.py index b000b75..44012bc 100644 --- a/paddlers/models/ppseg/models/losses/edge_attention_loss.py +++ b/paddlers/models/ppseg/models/losses/edge_attention_loss.py @@ -16,8 +16,8 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import losses +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import losses @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/focal_loss.py b/paddlers/models/ppseg/models/losses/focal_loss.py index 4b5edd0..d63f905 100644 --- a/paddlers/models/ppseg/models/losses/focal_loss.py +++ b/paddlers/models/ppseg/models/losses/focal_loss.py @@ -17,7 +17,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py b/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py index 6a3d81b..777e57a 100644 --- a/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py +++ b/paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py @@ -13,7 +13,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/kl_loss.py b/paddlers/models/ppseg/models/losses/kl_loss.py index 30cb925..23a4a6e 100644 --- a/paddlers/models/ppseg/models/losses/kl_loss.py +++ b/paddlers/models/ppseg/models/losses/kl_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/l1_loss.py b/paddlers/models/ppseg/models/losses/l1_loss.py index 5d41f36..5376135 100644 --- a/paddlers/models/ppseg/models/losses/l1_loss.py +++ b/paddlers/models/ppseg/models/losses/l1_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/lovasz_loss.py b/paddlers/models/ppseg/models/losses/lovasz_loss.py index 82dfaf9..16e755c 100644 --- a/paddlers/models/ppseg/models/losses/lovasz_loss.py +++ b/paddlers/models/ppseg/models/losses/lovasz_loss.py @@ -22,7 +22,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/mean_square_error_loss.py b/paddlers/models/ppseg/models/losses/mean_square_error_loss.py index e6fc891..3365268 100644 --- a/paddlers/models/ppseg/models/losses/mean_square_error_loss.py +++ b/paddlers/models/ppseg/models/losses/mean_square_error_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/mixed_loss.py b/paddlers/models/ppseg/models/losses/mixed_loss.py index 563b5c2..c850fa0 100644 --- a/paddlers/models/ppseg/models/losses/mixed_loss.py +++ b/paddlers/models/ppseg/models/losses/mixed_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py index c69d81e..5683521 100644 --- a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py b/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py index f37fe07..56db270 100644 --- a/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py +++ b/paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py @@ -16,8 +16,8 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import losses +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import losses @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py index 7abe865..2aa2017 100644 --- a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py index d43f6b2..ffea59a 100644 --- a/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/rmi_loss.py b/paddlers/models/ppseg/models/losses/rmi_loss.py index 271f738..964df91 100644 --- a/paddlers/models/ppseg/models/losses/rmi_loss.py +++ b/paddlers/models/ppseg/models/losses/rmi_loss.py @@ -17,7 +17,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager _euler_num = 2.718281828 _pi = 3.14159265 diff --git a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py index b54d545..7d51577 100644 --- a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py +++ b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py @@ -18,7 +18,7 @@ import paddle from paddle import nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py b/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py index 648ed35..1293521 100644 --- a/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py +++ b/paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py @@ -16,7 +16,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager +from paddlers.models.ppseg.cvlibs import manager @manager.LOSSES.add_component diff --git a/paddlers/models/ppseg/models/lraspp.py b/paddlers/models/ppseg/models/lraspp.py index 6f7db42..6e86bac 100644 --- a/paddlers/models/ppseg/models/lraspp.py +++ b/paddlers/models/ppseg/models/lraspp.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.models import layers -from paddleseg.cvlibs import manager +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/mla_transformer.py b/paddlers/models/ppseg/models/mla_transformer.py index d5647e7..2ae2cce 100644 --- a/paddlers/models/ppseg/models/mla_transformer.py +++ b/paddlers/models/ppseg/models/mla_transformer.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils class MLAHeads(nn.Layer): diff --git a/paddlers/models/ppseg/models/mobileseg.py b/paddlers/models/ppseg/models/mobileseg.py index 8d4ea5b..9e67723 100644 --- a/paddlers/models/ppseg/models/mobileseg.py +++ b/paddlers/models/ppseg/models/mobileseg.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.models import layers -from paddleseg.cvlibs import manager +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/ocrnet.py b/paddlers/models/ppseg/models/ocrnet.py index b1eb73f..1225642 100644 --- a/paddlers/models/ppseg/models/ocrnet.py +++ b/paddlers/models/ppseg/models/ocrnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/pfpnnet.py b/paddlers/models/ppseg/models/pfpnnet.py index 25143ac..3b6c17b 100644 --- a/paddlers/models/ppseg/models/pfpnnet.py +++ b/paddlers/models/ppseg/models/pfpnnet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/pointrend.py b/paddlers/models/ppseg/models/pointrend.py index b1f9b01..e36f6a6 100644 --- a/paddlers/models/ppseg/models/pointrend.py +++ b/paddlers/models/ppseg/models/pointrend.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/portraitnet.py b/paddlers/models/ppseg/models/portraitnet.py index 255eaca..649173e 100644 --- a/paddlers/models/ppseg/models/portraitnet.py +++ b/paddlers/models/ppseg/models/portraitnet.py @@ -14,8 +14,8 @@ import paddle.nn as nn -from paddleseg import utils -from paddleseg.cvlibs import manager +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/pp_liteseg.py b/paddlers/models/ppseg/models/pp_liteseg.py index 8467011..f76dafe 100644 --- a/paddlers/models/ppseg/models/pp_liteseg.py +++ b/paddlers/models/ppseg/models/pp_liteseg.py @@ -16,10 +16,10 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/pphumanseg_lite.py b/paddlers/models/ppseg/models/pphumanseg_lite.py index c206651..c2f731a 100644 --- a/paddlers/models/ppseg/models/pphumanseg_lite.py +++ b/paddlers/models/ppseg/models/pphumanseg_lite.py @@ -16,16 +16,16 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils __all__ = ['PPHumanSegLite'] @manager.MODELS.add_component class PPHumanSegLite(nn.Layer): - "A self-developed ultra lightweight model from PaddleSeg, is suitable for real-time scene segmentation on web or mobile terminals." + "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals." def __init__(self, num_classes, diff --git a/paddlers/models/ppseg/models/pspnet.py b/paddlers/models/ppseg/models/pspnet.py index 77283b6..5a6f6b7 100644 --- a/paddlers/models/ppseg/models/pspnet.py +++ b/paddlers/models/ppseg/models/pspnet.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/segformer.py b/paddlers/models/ppseg/models/segformer.py index af9c27b..9b22bbb 100644 --- a/paddlers/models/ppseg/models/segformer.py +++ b/paddlers/models/ppseg/models/segformer.py @@ -7,9 +7,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils class MLP(nn.Layer): diff --git a/paddlers/models/ppseg/models/segmenter.py b/paddlers/models/ppseg/models/segmenter.py index 84f9495..4bfb743 100644 --- a/paddlers/models/ppseg/models/segmenter.py +++ b/paddlers/models/ppseg/models/segmenter.py @@ -17,9 +17,9 @@ import paddle.nn as nn import paddle.nn.functional as F import numpy as np -from paddleseg.utils import utils -from paddleseg.cvlibs import manager, param_init -from paddleseg.models.backbones import vision_transformer, transformer_utils +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models.backbones import vision_transformer, transformer_utils __all__ = ['LinearSegmenter', 'MaskSegmenter'] diff --git a/paddlers/models/ppseg/models/segnet.py b/paddlers/models/ppseg/models/segnet.py index e861d13..8e36021 100644 --- a/paddlers/models/ppseg/models/segnet.py +++ b/paddlers/models/ppseg/models/segnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/setr.py b/paddlers/models/ppseg/models/setr.py index 0b95f16..8475987 100644 --- a/paddlers/models/ppseg/models/setr.py +++ b/paddlers/models/ppseg/models/setr.py @@ -16,9 +16,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle -from paddleseg.cvlibs import manager, param_init -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager, param_init +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/sfnet.py b/paddlers/models/ppseg/models/sfnet.py index d04dfbe..b9f9924 100644 --- a/paddlers/models/ppseg/models/sfnet.py +++ b/paddlers/models/ppseg/models/sfnet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/sinet.py b/paddlers/models/ppseg/models/sinet.py index 9381b44..df3c27a 100644 --- a/paddlers/models/ppseg/models/sinet.py +++ b/paddlers/models/ppseg/models/sinet.py @@ -18,9 +18,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]], [[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]], diff --git a/paddlers/models/ppseg/models/stdcseg.py b/paddlers/models/ppseg/models/stdcseg.py index a772881..cd2419f 100644 --- a/paddlers/models/ppseg/models/stdcseg.py +++ b/paddlers/models/ppseg/models/stdcseg.py @@ -16,10 +16,10 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.models import layers -from paddleseg.cvlibs import manager -from paddleseg.utils import utils +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import utils @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/topformer.py b/paddlers/models/ppseg/models/topformer.py index 29241cd..b270e71 100644 --- a/paddlers/models/ppseg/models/topformer.py +++ b/paddlers/models/ppseg/models/topformer.py @@ -18,10 +18,10 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils -from paddleseg.models.backbones.top_transformer import ConvBNAct +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils +from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/u2net.py b/paddlers/models/ppseg/models/u2net.py index c0f12b2..6e7a52d 100644 --- a/paddlers/models/ppseg/models/u2net.py +++ b/paddlers/models/ppseg/models/u2net.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models import layers -from paddleseg.utils import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers +from paddlers.models.ppseg.utils import utils __all__ = ['U2Net', 'U2Netp'] diff --git a/paddlers/models/ppseg/models/unet.py b/paddlers/models/ppseg/models/unet.py index 8ef1cd8..4a56006 100644 --- a/paddlers/models/ppseg/models/unet.py +++ b/paddlers/models/ppseg/models/unet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.cvlibs import manager -from paddleseg.models import layers +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/unet_3plus.py b/paddlers/models/ppseg/models/unet_3plus.py index 55c4520..57e55b9 100644 --- a/paddlers/models/ppseg/models/unet_3plus.py +++ b/paddlers/models/ppseg/models/unet_3plus.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg.cvlibs import manager -from paddleseg.models.layers.layer_libs import SyncBatchNorm -from paddleseg.cvlibs.param_init import kaiming_normal_init +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm +from paddlers.models.ppseg.cvlibs.param_init import kaiming_normal_init @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/unet_plusplus.py b/paddlers/models/ppseg/models/unet_plusplus.py index 0d728d6..05f2cd7 100644 --- a/paddlers/models/ppseg/models/unet_plusplus.py +++ b/paddlers/models/ppseg/models/unet_plusplus.py @@ -15,10 +15,10 @@ import paddle import paddle.nn as nn -from paddleseg.cvlibs import manager -from paddleseg.utils import load_entire_model -from paddleseg.cvlibs.param_init import kaiming_normal_init -from paddleseg.models.layers.layer_libs import SyncBatchNorm +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.utils import load_entire_model +from paddlers.models.ppseg.cvlibs.param_init import kaiming_normal_init +from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/models/upernet.py b/paddlers/models/ppseg/models/upernet.py index e024632..96c70ff 100644 --- a/paddlers/models/ppseg/models/upernet.py +++ b/paddlers/models/ppseg/models/upernet.py @@ -16,9 +16,9 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleseg import utils -from paddleseg.cvlibs import manager -from paddleseg.models import layers +from paddlers.models.ppseg import utils +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.models import layers @manager.MODELS.add_component diff --git a/paddlers/models/ppseg/transforms/transforms.py b/paddlers/models/ppseg/transforms/transforms.py index f73186d..d27ac85 100644 --- a/paddlers/models/ppseg/transforms/transforms.py +++ b/paddlers/models/ppseg/transforms/transforms.py @@ -19,9 +19,9 @@ import cv2 import numpy as np from PIL import Image -from paddleseg.cvlibs import manager -from paddleseg.transforms import functional -from paddleseg.utils import logger +from paddlers.models.ppseg.cvlibs import manager +from paddlers.models.ppseg.transforms import functional +from paddlers.models.ppseg.utils import logger @manager.TRANSFORMS.add_component diff --git a/paddlers/models/ppseg/utils/env/seg_env.py b/paddlers/models/ppseg/utils/env/seg_env.py index cf11dbd..6b91b79 100644 --- a/paddlers/models/ppseg/utils/env/seg_env.py +++ b/paddlers/models/ppseg/utils/env/seg_env.py @@ -22,7 +22,7 @@ PRETRAINED_MODEL_HOME : The directory to store the automatically downloaded pret import os -from paddleseg.utils import logger +from paddlers.models.ppseg.utils import logger def _get_user_home(): diff --git a/paddlers/models/ppseg/utils/env/sys_env.py b/paddlers/models/ppseg/utils/env/sys_env.py index 8681509..43879cd 100644 --- a/paddlers/models/ppseg/utils/env/sys_env.py +++ b/paddlers/models/ppseg/utils/env/sys_env.py @@ -20,7 +20,7 @@ import sys import cv2 import paddle -import paddleseg +import paddlers.models.ppseg as ppseg IS_WINDOWS = sys.platform == 'win32' @@ -121,7 +121,7 @@ def get_sys_env(): except: pass - env_info['PaddleSeg'] = paddleseg.__version__ + env_info['PaddleSeg'] = ppseg.__version__ env_info['PaddlePaddle'] = paddle.__version__ env_info['OpenCV'] = cv2.__version__ diff --git a/paddlers/models/ppseg/utils/utils.py b/paddlers/models/ppseg/utils/utils.py index 526d04e..ebffa40 100644 --- a/paddlers/models/ppseg/utils/utils.py +++ b/paddlers/models/ppseg/utils/utils.py @@ -22,8 +22,8 @@ from urllib.parse import urlparse, unquote import paddle -from paddleseg.utils import logger, seg_env -from paddleseg.utils.download import download_file_and_uncompress +from paddlers.models.ppseg.utils import logger, seg_env +from paddlers.models.ppseg.utils.download import download_file_and_uncompress @contextlib.contextmanager diff --git a/paddlers/tasks/classifier.py b/paddlers/tasks/classifier.py index 7af2c02..a29c3f7 100644 --- a/paddlers/tasks/classifier.py +++ b/paddlers/tasks/classifier.py @@ -631,7 +631,7 @@ class CondenseNetV2(BaseClassifier): num_classes=2, use_mixed_loss=False, losses=None, - in_chnanels=3, + in_channels=3, arch='A', **params): if arch not in ('A', 'B', 'C'): diff --git a/paddlers/tasks/segmenter.py b/paddlers/tasks/segmenter.py index 9e6b584..f1418ab 100644 --- a/paddlers/tasks/segmenter.py +++ b/paddlers/tasks/segmenter.py @@ -184,14 +184,7 @@ class BaseSegmenter(BaseModel): ) losses = [getattr(seg_losses, loss)() for loss in losses] loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))] - if self.model_name == 'FastSCNN': - loss_type *= 2 - loss_coef = [1.0, 0.4] - elif self.model_name == 'BiSeNetV2': - loss_type *= 5 - loss_coef = [1.0] * 5 - else: - loss_coef = [1.0] + loss_coef = [1.0] losses = {'types': loss_type, 'coef': loss_coef} return losses @@ -869,6 +862,12 @@ class FastSCNN(BaseSegmenter): losses=losses, **params) + def default_loss(self): + losses = super(FastSCNN, self).default_loss() + losses['types'] *= 2 + losses['coef'] = [1.0, 0.4] + return losses + class HRNet(BaseSegmenter): def __init__(self, @@ -918,6 +917,12 @@ class BiSeNetV2(BaseSegmenter): losses=losses, **params) + def default_loss(self): + losses = super(BiSeNetV2, self).default_loss() + losses['types'] *= 5 + losses['coef'] = [1.0] * 5 + return losses + class FarSeg(BaseSegmenter): def __init__(self, diff --git a/paddlers/utils/checkpoint.py b/paddlers/utils/checkpoint.py index b6ada51..401d7e3 100644 --- a/paddlers/utils/checkpoint.py +++ b/paddlers/utils/checkpoint.py @@ -493,11 +493,12 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None): num_params_loaded = 0 for k in model_state_dict: if k not in param_state_dict: - logging.warning("{} is not in pretrained model".format(k)) + logging.warning("{} is not in the pretrained model.".format( + k)) elif list(param_state_dict[k].shape) != list(model_state_dict[k] .shape): logging.warning( - "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})" + "[SKIP] Shape of parameters {} do not match. (pretrained: {} vs actual: {})" .format(k, param_state_dict[k].shape, model_state_dict[ k].shape)) else: @@ -507,11 +508,11 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None): logging.info("There are {}/{} variables loaded into {}.".format( num_params_loaded, len(model_state_dict), model_name)) else: - raise ValueError('The pretrained model directory is not Found: {}'. + raise ValueError('The pretrained model directory is not found: {}'. format(pretrain_weights)) else: logging.info( - 'No pretrained model to load, {} will be trained from scratch.'. + 'No pretrained model to load. {} will be trained from scratch.'. format(model_name)) diff --git a/tutorials/train/classification/condensenetv2.py b/tutorials/train/classification/condensenetv2.py index 62fd4f4..71f252e 100644 --- a/tutorials/train/classification/condensenetv2.py +++ b/tutorials/train/classification/condensenetv2.py @@ -15,7 +15,7 @@ EVAL_FILE_LIST_PATH = './data/ucmerced/val.txt' # 数据集类别信息文件路径 LABEL_LIST_PATH = './data/ucmerced/labels.txt' # 实验目录,保存输出的模型权重和结果 -EXP_DIR = './output/hrnet/' +EXP_DIR = './output/condensenetv2/' # 下载和解压UC Merced数据集 pdrs.utils.download_and_decompress( diff --git a/tutorials/train/semantic_segmentation/bisenetv2.py b/tutorials/train/semantic_segmentation/bisenetv2.py index 6d2754d..928d614 100644 --- a/tutorials/train/semantic_segmentation/bisenetv2.py +++ b/tutorials/train/semantic_segmentation/bisenetv2.py @@ -15,7 +15,7 @@ EVAL_FILE_LIST_PATH = './data/rsseg/val.txt' # 数据集类别信息文件路径 LABEL_LIST_PATH = './data/rsseg/labels.txt' # 实验目录,保存输出的模型权重和结果 -EXP_DIR = './output/unet/' +EXP_DIR = './output/bisenetv2/' # 影像波段数量 NUM_BANDS = 10 diff --git a/tutorials/train/semantic_segmentation/fast_scnn.py b/tutorials/train/semantic_segmentation/fast_scnn.py index df15ece..3dcd2ed 100644 --- a/tutorials/train/semantic_segmentation/fast_scnn.py +++ b/tutorials/train/semantic_segmentation/fast_scnn.py @@ -15,7 +15,7 @@ EVAL_FILE_LIST_PATH = './data/rsseg/val.txt' # 数据集类别信息文件路径 LABEL_LIST_PATH = './data/rsseg/labels.txt' # 实验目录,保存输出的模型权重和结果 -EXP_DIR = './output/unet/' +EXP_DIR = './output/fast_scnn/' # 影像波段数量 NUM_BANDS = 10 diff --git a/tutorials/train/semantic_segmentation/hrnet.py b/tutorials/train/semantic_segmentation/hrnet.py index e8a9cdb..b500198 100644 --- a/tutorials/train/semantic_segmentation/hrnet.py +++ b/tutorials/train/semantic_segmentation/hrnet.py @@ -15,7 +15,7 @@ EVAL_FILE_LIST_PATH = './data/rsseg/val.txt' # 数据集类别信息文件路径 LABEL_LIST_PATH = './data/rsseg/labels.txt' # 实验目录,保存输出的模型权重和结果 -EXP_DIR = './output/unet/' +EXP_DIR = './output/hrnet/' # 影像波段数量 NUM_BANDS = 10 From 1ef0067ad9169f1a6c6928544a304aa7de76f403 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Tue, 13 Sep 2022 21:13:56 +0800 Subject: [PATCH 08/15] Add fix seed --- examples/rs_research/config_utils.py | 1 + examples/rs_research/run_task.py | 7 +++++++ test_tipc/config_utils.py | 1 + test_tipc/run_task.py | 7 +++++++ 4 files changed, 16 insertions(+) diff --git a/examples/rs_research/config_utils.py b/examples/rs_research/config_utils.py index 10e7129..62996fe 100644 --- a/examples/rs_research/config_utils.py +++ b/examples/rs_research/config_utils.py @@ -133,6 +133,7 @@ def parse_args(*args, **kwargs): # Global settings parser.add_argument('cmd', choices=['train', 'eval']) parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg']) + parser.add_argument('--seed', type=int, default=None) # Data parser.add_argument('--datasets', type=dict, default={}) diff --git a/examples/rs_research/run_task.py b/examples/rs_research/run_task.py index a487c45..cab073a 100644 --- a/examples/rs_research/run_task.py +++ b/examples/rs_research/run_task.py @@ -15,7 +15,9 @@ # limitations under the License. import os +import random +import numpy as np # Import cv2 and sklearn before paddlers to solve the # "ImportError: dlopen: cannot load any more object with static TLS" issue. import cv2 @@ -62,6 +64,11 @@ if __name__ == '__main__': cfg = parse_args() print(format_cfg(cfg)) + if cfg['seed'] is not None: + random.seed(cfg['seed']) + np.random.seed(cfg['seed']) + paddle.seed(cfg['seed']) + # Automatically download data if cfg['download_on']: paddlers.utils.download_and_decompress( diff --git a/test_tipc/config_utils.py b/test_tipc/config_utils.py index 6e677b4..3fb17bd 100644 --- a/test_tipc/config_utils.py +++ b/test_tipc/config_utils.py @@ -119,6 +119,7 @@ def parse_args(*args, **kwargs): # Global settings parser.add_argument('cmd', choices=['train', 'eval']) parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg']) + parser.add_argument('--seed', type=int, default=None) # Data parser.add_argument('--datasets', type=dict, default={}) diff --git a/test_tipc/run_task.py b/test_tipc/run_task.py index 923415b..7c1d40d 100644 --- a/test_tipc/run_task.py +++ b/test_tipc/run_task.py @@ -1,7 +1,9 @@ #!/usr/bin/env python import os +import random +import numpy as np # Import cv2 and sklearn before paddlers to solve the # "ImportError: dlopen: cannot load any more object with static TLS" issue. import cv2 @@ -46,6 +48,11 @@ if __name__ == '__main__': cfg = parse_args() print(format_cfg(cfg)) + if cfg['seed'] is not None: + random.seed(cfg['seed']) + np.random.seed(cfg['seed']) + paddle.seed(cfg['seed']) + # Automatically download data if cfg['download_on']: paddlers.utils.download_and_decompress( From 413d9a660600b86fbc8211a1ce63dfb186c06b3f Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Tue, 13 Sep 2022 21:14:12 +0800 Subject: [PATCH 09/15] Change seg dataset to rsseg_rgb --- test_tipc/prepare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 0198213..0896264 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -38,7 +38,7 @@ if [[ ${MODE} == 'lite_train_lite_infer' \ elif [[ ${task_name} == 'res' ]]; then download_and_unzip_dataset "${DATA_DIR}" rssr https://paddlers.bj.bcebos.com/datasets/rssr_mini.zip elif [[ ${task_name} == 'seg' ]]; then - download_and_unzip_dataset "${DATA_DIR}" rsseg https://paddlers.bj.bcebos.com/datasets/rsseg_mini.zip + download_and_unzip_dataset "${DATA_DIR}" rsseg https://paddlers.bj.bcebos.com/datasets/rsseg_rgb.zip fi elif [[ ${MODE} == 'whole_train_whole_infer' ]]; then @@ -63,7 +63,7 @@ elif [[ ${MODE} == 'whole_train_whole_infer' ]]; then elif [[ ${task_name} == 'res' ]]; then download_and_unzip_dataset "${DATA_DIR}" rssr https://paddlers.bj.bcebos.com/datasets/rssr.zip elif [[ ${task_name} == 'seg' ]]; then - download_and_unzip_dataset "${DATA_DIR}" rsseg https://paddlers.bj.bcebos.com/datasets/rsseg.zip + download_and_unzip_dataset "${DATA_DIR}" rsseg https://paddlers.bj.bcebos.com/datasets/rsseg_rgb.zip fi fi From e83cda7b46a686451babb63600c1c7ab8f2ee181 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Tue, 13 Sep 2022 21:15:09 +0800 Subject: [PATCH 10/15] Fix seed and add statistics --- test_tipc/configs/cd/_base_/airchange.yaml | 2 ++ test_tipc/configs/cd/_base_/levircd.yaml | 2 ++ test_tipc/configs/clas/_base_/ucmerced.yaml | 2 ++ .../clas/condensenetv2/train_infer_python.txt | 2 +- test_tipc/configs/det/_base_/rsod.yaml | 2 ++ test_tipc/configs/det/_base_/sarship.yaml | 2 ++ test_tipc/configs/res/_base_/rssr.yaml | 2 ++ test_tipc/configs/seg/_base_/rsseg.yaml | 10 ++++++---- .../configs/seg/bisenetv2/bisenetv2_rsseg.yaml | 4 ++-- .../configs/seg/bisenetv2/train_infer_python.txt | 2 +- .../configs/seg/deeplabv3p/deeplabv3p_rsseg.yaml | 2 +- .../configs/seg/deeplabv3p/train_infer_python.txt | 4 ++-- test_tipc/configs/seg/farseg/farseg_rsseg.yaml | 2 +- .../configs/seg/farseg/train_infer_python.txt | 2 +- .../configs/seg/fast_scnn/fast_scnn_rsseg.yaml | 4 ++-- .../configs/seg/fast_scnn/train_infer_python.txt | 2 +- test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml | 2 +- test_tipc/configs/seg/hrnet/train_infer_python.txt | 2 +- test_tipc/configs/seg/unet/train_infer_python.txt | 2 +- test_tipc/configs/seg/unet/unet_rsseg.yaml | 2 +- test_tipc/docs/test_train_inference_python.md | 14 +++++++------- 21 files changed, 41 insertions(+), 27 deletions(-) diff --git a/test_tipc/configs/cd/_base_/airchange.yaml b/test_tipc/configs/cd/_base_/airchange.yaml index f41f05a..035819d 100644 --- a/test_tipc/configs/cd/_base_/airchange.yaml +++ b/test_tipc/configs/cd/_base_/airchange.yaml @@ -1,5 +1,7 @@ # Basic configurations of AirChange dataset +seed: 1024 + datasets: train: !Node type: CDDataset diff --git a/test_tipc/configs/cd/_base_/levircd.yaml b/test_tipc/configs/cd/_base_/levircd.yaml index f14607d..3ec2099 100644 --- a/test_tipc/configs/cd/_base_/levircd.yaml +++ b/test_tipc/configs/cd/_base_/levircd.yaml @@ -1,5 +1,7 @@ # Basic configurations of LEVIR-CD dataset +seed: 1024 + datasets: train: !Node type: CDDataset diff --git a/test_tipc/configs/clas/_base_/ucmerced.yaml b/test_tipc/configs/clas/_base_/ucmerced.yaml index 1b3b79d..88027b1 100644 --- a/test_tipc/configs/clas/_base_/ucmerced.yaml +++ b/test_tipc/configs/clas/_base_/ucmerced.yaml @@ -1,5 +1,7 @@ # Basic configurations of UCMerced dataset +seed: 1024 + datasets: train: !Node type: ClasDataset diff --git a/test_tipc/configs/clas/condensenetv2/train_infer_python.txt b/test_tipc/configs/clas/condensenetv2/train_infer_python.txt index 0e8832b..d274570 100644 --- a/test_tipc/configs/clas/condensenetv2/train_infer_python.txt +++ b/test_tipc/configs/clas/condensenetv2/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=10 +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=16|lite_train_whole_infer=16|whole_train_whole_infer=16 --model_path:null diff --git a/test_tipc/configs/det/_base_/rsod.yaml b/test_tipc/configs/det/_base_/rsod.yaml index a4fcf69..1d348d5 100644 --- a/test_tipc/configs/det/_base_/rsod.yaml +++ b/test_tipc/configs/det/_base_/rsod.yaml @@ -1,5 +1,7 @@ # Basic configurations of RSOD dataset +seed: 1024 + datasets: train: !Node type: VOCDetDataset diff --git a/test_tipc/configs/det/_base_/sarship.yaml b/test_tipc/configs/det/_base_/sarship.yaml index ba38220..7d7b898 100644 --- a/test_tipc/configs/det/_base_/sarship.yaml +++ b/test_tipc/configs/det/_base_/sarship.yaml @@ -1,5 +1,7 @@ # Basic configurations of SARShip dataset +seed: 1024 + datasets: train: !Node type: VOCDetDataset diff --git a/test_tipc/configs/res/_base_/rssr.yaml b/test_tipc/configs/res/_base_/rssr.yaml index c2d5265..80415c4 100644 --- a/test_tipc/configs/res/_base_/rssr.yaml +++ b/test_tipc/configs/res/_base_/rssr.yaml @@ -1,5 +1,7 @@ # Basic configurations of RSSR dataset +seed: 1024 + datasets: train: !Node type: ResDataset diff --git a/test_tipc/configs/seg/_base_/rsseg.yaml b/test_tipc/configs/seg/_base_/rsseg.yaml index de5b469..d80bc3d 100644 --- a/test_tipc/configs/seg/_base_/rsseg.yaml +++ b/test_tipc/configs/seg/_base_/rsseg.yaml @@ -1,5 +1,7 @@ # Basic configurations of RSSeg dataset +seed: 1024 + datasets: train: !Node type: SegDataset @@ -32,8 +34,8 @@ transforms: - !Node type: Normalize args: - mean: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] - std: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] - !Node type: ArrangeSegmenter args: ['train'] @@ -47,8 +49,8 @@ transforms: - !Node type: Normalize args: - mean: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] - std: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] - !Node type: ArrangeSegmenter args: ['eval'] diff --git a/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml b/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml index 81c29b6..5e69ea3 100644 --- a/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml +++ b/test_tipc/configs/seg/bisenetv2/bisenetv2_rsseg.yaml @@ -5,7 +5,7 @@ _base_: ../_base_/rsseg.yaml save_dir: ./test_tipc/output/seg/bisenetv2/ model: !Node - type: BiSeNet V2 + type: BiSeNetV2 args: - in_channels: 10 + in_channels: 3 num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/bisenetv2/train_infer_python.txt b/test_tipc/configs/seg/bisenetv2/train_infer_python.txt index 2fcad98..fc8eddb 100644 --- a/test_tipc/configs/seg/bisenetv2/train_infer_python.txt +++ b/test_tipc/configs/seg/bisenetv2/train_infer_python.txt @@ -27,7 +27,7 @@ null:null ===========================export_params=========================== --save_dir:adaptive --model_dir:adaptive ---fixed_input_shape:[-1,10,512,512] +--fixed_input_shape:[-1,3,512,512] norm_export:deploy/export/export_model.py quant_export:null fpgm_export:null diff --git a/test_tipc/configs/seg/deeplabv3p/deeplabv3p_rsseg.yaml b/test_tipc/configs/seg/deeplabv3p/deeplabv3p_rsseg.yaml index c7e1248..d75af0b 100644 --- a/test_tipc/configs/seg/deeplabv3p/deeplabv3p_rsseg.yaml +++ b/test_tipc/configs/seg/deeplabv3p/deeplabv3p_rsseg.yaml @@ -7,5 +7,5 @@ save_dir: ./test_tipc/output/seg/deeplabv3p/ model: !Node type: DeepLabV3P args: - in_channels: 10 + in_channels: 3 num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/deeplabv3p/train_infer_python.txt b/test_tipc/configs/seg/deeplabv3p/train_infer_python.txt index de7cac6..bfc38f4 100644 --- a/test_tipc/configs/seg/deeplabv3p/train_infer_python.txt +++ b/test_tipc/configs/seg/deeplabv3p/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=30 +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4 --model_path:null @@ -27,7 +27,7 @@ null:null ===========================export_params=========================== --save_dir:adaptive --model_dir:adaptive ---fixed_input_shape:[-1,10,512,512] +--fixed_input_shape:[-1,3,512,512] norm_export:deploy/export/export_model.py quant_export:null fpgm_export:null diff --git a/test_tipc/configs/seg/farseg/farseg_rsseg.yaml b/test_tipc/configs/seg/farseg/farseg_rsseg.yaml index fa6d97b..fa68772 100644 --- a/test_tipc/configs/seg/farseg/farseg_rsseg.yaml +++ b/test_tipc/configs/seg/farseg/farseg_rsseg.yaml @@ -7,5 +7,5 @@ save_dir: ./test_tipc/output/seg/farseg/ model: !Node type: FarSeg args: - in_channels: 10 + in_channels: 3 num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/farseg/train_infer_python.txt b/test_tipc/configs/seg/farseg/train_infer_python.txt index 6619052..cf48e48 100644 --- a/test_tipc/configs/seg/farseg/train_infer_python.txt +++ b/test_tipc/configs/seg/farseg/train_infer_python.txt @@ -27,7 +27,7 @@ null:null ===========================export_params=========================== --save_dir:adaptive --model_dir:adaptive ---fixed_input_shape:[-1,10,512,512] +--fixed_input_shape:[-1,3,512,512] norm_export:deploy/export/export_model.py quant_export:null fpgm_export:null diff --git a/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml b/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml index 145ce47..76adcfd 100644 --- a/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml +++ b/test_tipc/configs/seg/fast_scnn/fast_scnn_rsseg.yaml @@ -5,7 +5,7 @@ _base_: ../_base_/rsseg.yaml save_dir: ./test_tipc/output/seg/fast_scnn/ model: !Node - type: Fast-SCNN + type: FastSCNN args: - in_channels: 10 + in_channels: 3 num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/fast_scnn/train_infer_python.txt b/test_tipc/configs/seg/fast_scnn/train_infer_python.txt index f5bdcd5..169aa88 100644 --- a/test_tipc/configs/seg/fast_scnn/train_infer_python.txt +++ b/test_tipc/configs/seg/fast_scnn/train_infer_python.txt @@ -27,7 +27,7 @@ null:null ===========================export_params=========================== --save_dir:adaptive --model_dir:adaptive ---fixed_input_shape:[-1,10,512,512] +--fixed_input_shape:[-1,3,512,512] norm_export:deploy/export/export_model.py quant_export:null fpgm_export:null diff --git a/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml b/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml index a7eff81..5ee67f9 100644 --- a/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml +++ b/test_tipc/configs/seg/hrnet/hrnet_rsseg.yaml @@ -7,5 +7,5 @@ save_dir: ./test_tipc/output/seg/hrnet/ model: !Node type: HRNet args: - in_channels: 10 + in_channels: 3 num_classes: 5 \ No newline at end of file diff --git a/test_tipc/configs/seg/hrnet/train_infer_python.txt b/test_tipc/configs/seg/hrnet/train_infer_python.txt index d1cafdc..63b128b 100644 --- a/test_tipc/configs/seg/hrnet/train_infer_python.txt +++ b/test_tipc/configs/seg/hrnet/train_infer_python.txt @@ -27,7 +27,7 @@ null:null ===========================export_params=========================== --save_dir:adaptive --model_dir:adaptive ---fixed_input_shape:[-1,10,512,512] +--fixed_input_shape:[-1,3,512,512] norm_export:deploy/export/export_model.py quant_export:null fpgm_export:null diff --git a/test_tipc/configs/seg/unet/train_infer_python.txt b/test_tipc/configs/seg/unet/train_infer_python.txt index 8abf325..a98a032 100644 --- a/test_tipc/configs/seg/unet/train_infer_python.txt +++ b/test_tipc/configs/seg/unet/train_infer_python.txt @@ -27,7 +27,7 @@ null:null ===========================export_params=========================== --save_dir:adaptive --model_dir:adaptive ---fixed_input_shape:[-1,10,512,512] +--fixed_input_shape:[-1,3,512,512] norm_export:deploy/export/export_model.py quant_export:null fpgm_export:null diff --git a/test_tipc/configs/seg/unet/unet_rsseg.yaml b/test_tipc/configs/seg/unet/unet_rsseg.yaml index 18211b5..c40f68e 100644 --- a/test_tipc/configs/seg/unet/unet_rsseg.yaml +++ b/test_tipc/configs/seg/unet/unet_rsseg.yaml @@ -7,5 +7,5 @@ save_dir: ./test_tipc/output/seg/unet/ model: !Node type: UNet args: - in_channels: 10 + in_channels: 3 num_classes: 5 \ No newline at end of file diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md index 2d01ca3..95ecd82 100644 --- a/test_tipc/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -19,7 +19,7 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 变化检测 | FC-Siam-conc | 正常训练 | 正常训练 | IoU=65.79% | | 变化检测 | FC-Siam-diff | 正常训练 | 正常训练 | IoU=61.23% | | 变化检测 | FCCDN | 正常训练 | 正常训练 | IoU=24.42% | -| 场景分类 | CondenseNet V2 | 正常训练 | 正常训练 | Acc(top1)= | +| 场景分类 | CondenseNet V2 | 正常训练 | 正常训练 | Acc(top1)=60.42% | | 场景分类 | HRNet | 正常训练 | 正常训练 | Acc(top1)=99.37% | | 场景分类 | MobileNetV3 | 正常训练 | 正常训练 | Acc(top1)=99.58% | | 场景分类 | ResNet50-vd | 正常训练 | 正常训练 | Acc(top1)=99.26% | @@ -31,12 +31,12 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 目标检测 | PP-YOLO Tiny | 正常训练 | 正常训练 | mAP=44.27% | | 目标检测 | PP-YOLOv2 | 正常训练 | 正常训练 | mAP=59.37% | | 目标检测 | YOLOv3 | 正常训练 | 正常训练 | mAP=47.33% | -| 图像分割 | BiSeNet V2 | 正常训练 | 正常训练 | mIoU= | -| 图像分割 | DeepLab V3+ | 正常训练 | 正常训练 | mIoU=56.05% | -| 图像分割 | FarSeg | 正常训练 | 正常训练 | mIoU=49.58% | -| 图像分割 | Fast-SCNN | 正常训练 | 正常训练 | mIoU= | -| 图像分割 | HRNet | 正常训练 | 正常训练 | mIoU= | -| 图像分割 | UNet | 正常训练 | 正常训练 | mIoU=55.50% | +| 图像分割 | BiSeNet V2 | 正常训练 | 正常训练 | mIoU=70.20 | +| 图像分割 | DeepLab V3+ | 正常训练 | 正常训练 | mIoU=64.59% | +| 图像分割 | FarSeg | 正常训练 | 正常训练 | mIoU=50.45% | +| 图像分割 | Fast-SCNN | 正常训练 | 正常训练 | mIoU=48.97% | +| 图像分割 | HRNet | 正常训练 | 正常训练 | mIoU=33.49% | +| 图像分割 | UNet | 正常训练 | 正常训练 | mIoU=72.64% | *注:参考预测精度为whole_train_whole_infer模式下单卡训练汇报的精度数据。* From 9c1b2ea2fec003459752dc8a28b4cc60baec5b51 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Wed, 14 Sep 2022 10:34:59 +0800 Subject: [PATCH 11/15] Add model type check --- paddlers/deploy/predictor.py | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/paddlers/deploy/predictor.py b/paddlers/deploy/predictor.py index 24bb337..24cc3ec 100644 --- a/paddlers/deploy/predictor.py +++ b/paddlers/deploy/predictor.py @@ -103,11 +103,11 @@ class Predictor(object): config.enable_use_gpu(200, gpu_id) config.switch_ir_optim(True) if use_trt: - if self._model.model_type == 'segmenter': + if self.model_type == 'segmenter': logging.warning( "Semantic segmentation models do not support TensorRT acceleration, " "TensorRT is forcibly disabled.") - elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__: + elif self.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__: logging.warning( "RCNN models do not support TensorRT acceleration, " "TensorRT is forcibly disabled.") @@ -150,30 +150,29 @@ class Predictor(object): def preprocess(self, images, transforms): preprocessed_samples = self._model.preprocess( images, transforms, to_tensor=False) - if self._model.model_type == 'classifier': + if self.model_type == 'classifier': preprocessed_samples = {'image': preprocessed_samples[0]} - elif self._model.model_type == 'segmenter': + elif self.model_type == 'segmenter': preprocessed_samples = { 'image': preprocessed_samples[0], 'ori_shape': preprocessed_samples[1] } - elif self._model.model_type == 'detector': + elif self.model_type == 'detector': pass - elif self._model.model_type == 'change_detector': + elif self.model_type == 'change_detector': preprocessed_samples = { 'image': preprocessed_samples[0], 'image2': preprocessed_samples[1], 'ori_shape': preprocessed_samples[2] } - elif self._model.model_type == 'restorer': + elif self.model_type == 'restorer': preprocessed_samples = { 'image': preprocessed_samples[0], 'tar_shape': preprocessed_samples[1] } else: logging.error( - "Invalid model type {}".format(self._model.model_type), - exit=True) + "Invalid model type {}".format(self.model_type), exit=True) return preprocessed_samples def postprocess(self, @@ -182,7 +181,7 @@ class Predictor(object): ori_shape=None, tar_shape=None, transforms=None): - if self._model.model_type == 'classifier': + if self.model_type == 'classifier': true_topk = min(self._model.num_classes, topk) if self._model.postprocess is None: self._model.build_postprocess_from_labels(topk) @@ -198,7 +197,7 @@ class Predictor(object): 'scores_map': s, 'label_names_map': n, } for l, s, n in zip(class_ids, scores, label_names)] - elif self._model.model_type in ('segmenter', 'change_detector'): + elif self.model_type in ('segmenter', 'change_detector'): label_map, score_map = self._model.postprocess( net_outputs, batch_origin_shape=ori_shape, @@ -207,13 +206,13 @@ class Predictor(object): 'label_map': l, 'score_map': s } for l, s in zip(label_map, score_map)] - elif self._model.model_type == 'detector': + elif self.model_type == 'detector': net_outputs = { k: v for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs) } preds = self._model.postprocess(net_outputs) - elif self._model.model_type == 'restorer': + elif self.model_type == 'restorer': res_maps = self._model.postprocess( net_outputs[0], batch_tar_shape=tar_shape, @@ -221,8 +220,7 @@ class Predictor(object): preds = [{'res_map': res_map} for res_map in res_maps] else: logging.error( - "Invalid model type {}.".format(self._model.model_type), - exit=True) + "Invalid model type {}.".format(self.model_type), exit=True) return preds @@ -360,6 +358,12 @@ class Predictor(object): batch_size (int, optional): Batch size used in inference. Defaults to 1. quiet (bool, optional): If True, disable the progress bar. Defaults to False. """ + + if self.model_type not in ('segmenter', 'change_detector'): + raise RuntimeError( + "Model type is {}, which does not support inference with sliding windows.". + format(self.model_type)) + slider_predict( partial( self.predict, quiet=True), @@ -375,3 +379,7 @@ class Predictor(object): def batch_predict(self, image_list, **params): return self.predict(img_file=image_list, **params) + + @property + def model_type(self): + return self._model.model_type From 92a5086c792494d3a5c95c76d904d371dd8d5024 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Wed, 14 Sep 2022 19:36:20 +0800 Subject: [PATCH 12/15] Remove unused classes in fccdn --- paddlers/rs_models/cd/losses/fccdn_loss.py | 63 +++------------------- paddlers/tasks/change_detector.py | 2 +- tutorials/train/change_detection/fccdn.py | 2 +- 3 files changed, 10 insertions(+), 57 deletions(-) diff --git a/paddlers/rs_models/cd/losses/fccdn_loss.py b/paddlers/rs_models/cd/losses/fccdn_loss.py index 49d2b4c..259367f 100644 --- a/paddlers/rs_models/cd/losses/fccdn_loss.py +++ b/paddlers/rs_models/cd/losses/fccdn_loss.py @@ -43,42 +43,13 @@ class DiceLoss(nn.Layer): return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true) -class MultiClassDiceLoss(nn.Layer): - def __init__( - self, - weight, - batch=True, - ignore_index=-1, - do_softmax=False, - **kwargs, ): - super(MultiClassDiceLoss, self).__init__() - self.ignore_index = ignore_index - self.weight = weight - self.do_softmax = do_softmax - self.binary_diceloss = DiceLoss(batch) - - def forward(self, y_pred, y_true): - if self.do_softmax: - y_pred = paddle.nn.functional.softmax(y_pred, axis=1) - y_true = F.one_hot(y_true.long(), y_pred.shape[1]).permute(0, 3, 1, 2) - total_loss = 0.0 - tmp_i = 0.0 - for i in range(y_pred.shape[1]): - if i != self.ignore_index: - diceloss = self.binary_diceloss(y_pred[:, i, :, :], - y_true[:, i, :, :]) - total_loss += paddle.multiply(diceloss, self.weight[i]) - tmp_i += 1.0 - return total_loss / tmp_i - - class DiceBCELoss(nn.Layer): """Binary change detection task loss""" def __init__(self): super(DiceBCELoss, self).__init__() self.bce_loss = nn.BCELoss() - self.binnary_dice = DiceLoss() + self.binary_dice = DiceLoss() def forward(self, scores, labels, do_sigmoid=True): if len(scores.shape) > 3: @@ -87,29 +58,11 @@ class DiceBCELoss(nn.Layer): labels = labels.squeeze(1) if do_sigmoid: scores = paddle.nn.functional.sigmoid(scores.clone()) - diceloss = self.binnary_dice(scores, labels) + diceloss = self.binary_dice(scores, labels) bceloss = self.bce_loss(scores, labels) return diceloss + bceloss -class McDiceBCELoss(nn.Layer): - """Multi-class change detection task loss""" - - def __init__(self, weight, do_sigmoid=True): - super(McDiceBCELoss, self).__init__() - self.ce_loss = nn.CrossEntropyLoss(weight) - self.dice = MultiClassDiceLoss(weight, do_sigmoid) - - def forward(self, scores, labels): - if len(scores.shape) < 4: - scores = scores.unsqueeze(1) - if len(labels.shape) < 4: - labels = labels.unsqueeze(1) - diceloss = self.dice(scores, labels) - bceloss = self.ce_loss(scores, labels) - return diceloss + bceloss - - def fccdn_ssl_loss(logits_list, labels): """ Self-supervised learning loss for change detection. @@ -160,11 +113,11 @@ def fccdn_ssl_loss(logits_list, labels): # Seg loss labels_downsample = labels_downsample.astype(paddle.float32) - loss_aux = 0.2 * criterion_ssl(out1, pred_seg_post_tmp1, False) - loss_aux += 0.2 * criterion_ssl(out2, pred_seg_pre_tmp1, False) - loss_aux += 0.2 * criterion_ssl( - out3, labels_downsample - pred_seg_post_tmp2, False) - loss_aux += 0.2 * criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2, - False) + loss_aux = criterion_ssl(out1, pred_seg_post_tmp1, False) + loss_aux += criterion_ssl(out2, pred_seg_pre_tmp1, False) + loss_aux += criterion_ssl(out3, labels_downsample - pred_seg_post_tmp2, + False) + loss_aux += criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2, + False) return loss_aux diff --git a/paddlers/tasks/change_detector.py b/paddlers/tasks/change_detector.py index 6df35d8..6f6cb69 100644 --- a/paddlers/tasks/change_detector.py +++ b/paddlers/tasks/change_detector.py @@ -1067,7 +1067,7 @@ class FCCDN(BaseChangeDetector): return { 'types': [seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss], - 'coef': [1.0, 1.0] + 'coef': [1.0, 0.2] } else: raise ValueError( diff --git a/tutorials/train/change_detection/fccdn.py b/tutorials/train/change_detection/fccdn.py index 318fa0e..7ac6fa7 100644 --- a/tutorials/train/change_detection/fccdn.py +++ b/tutorials/train/change_detection/fccdn.py @@ -78,7 +78,7 @@ model = pdrs.tasks.cd.FCCDN() # 执行模型训练 model.train( - num_epochs=10, + num_epochs=15, train_dataset=train_dataset, train_batch_size=4, eval_dataset=eval_dataset, From f0a43b9e0eef3df87cca7fd6f4d2fe12047e3e46 Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Thu, 15 Sep 2022 10:18:12 +0800 Subject: [PATCH 13/15] Fix restoration bugs --- docs/intro/model_zoo.md | 2 +- paddlers/tasks/restorer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/intro/model_zoo.md b/docs/intro/model_zoo.md index f8b520d..faf7cd2 100644 --- a/docs/intro/model_zoo.md +++ b/docs/intro/model_zoo.md @@ -25,7 +25,7 @@ PaddleRS目前已支持的全部模型如下(标注\*的为遥感专用模型 | 场景分类 | MobileNetV3 | 否 | | 场景分类 | ResNet50-vd | 否 | | 图像复原 | DRN | 否 | -| 图像复原 | ESRGAN | 否 | +| 图像复原 | ESRGAN | 是 | | 图像复原 | LESRCNN | 否 | | 目标检测 | Faster R-CNN | 否 | | 目标检测 | PP-YOLO | 否 | diff --git a/paddlers/tasks/restorer.py b/paddlers/tasks/restorer.py index ce7e931..ff8708b 100644 --- a/paddlers/tasks/restorer.py +++ b/paddlers/tasks/restorer.py @@ -773,7 +773,7 @@ class LESRCNN(BaseRestorer): group=1, **params): params.update({ - 'scale': sr_factor, + 'scale': sr_factor if sr_factor is not None else 1, 'multi_scale': multi_scale, 'group': group }) From c81b66a39b0fa01115b590c02d67e5a80464b70b Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Thu, 15 Sep 2022 12:04:40 +0800 Subject: [PATCH 14/15] Add example --- examples/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/README.md b/examples/README.md index 18f0105..d6f6b29 100644 --- a/examples/README.md +++ b/examples/README.md @@ -53,3 +53,4 @@ PaddleRS提供从科学研究到产业应用的丰富示例,希望帮助遥感 |[【官方】第十一届 “中国软件杯”百度遥感赛项:目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测,比赛基线| |[【十一届软件杯】遥感解译赛道:变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测,高分方案| |[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测,高分方案| +|[遥感变化检测助力信贷场景下工程进度管控](https://aistudio.baidu.com/aistudio/projectdetail/4543160)|古代飞|产业范例|变化检测,金融风控| From afec4186feb7c7e26e70645d35469ed6330e605d Mon Sep 17 00:00:00 2001 From: Bobholamovic Date: Fri, 16 Sep 2022 15:16:26 +0800 Subject: [PATCH 15/15] Update CE statistics --- .../configs/cd/fc_ef/train_infer_python.txt | 2 +- .../cd/fc_siam_conc/train_infer_python.txt | 2 +- .../cd/fc_siam_diff/train_infer_python.txt | 2 +- test_tipc/configs/cd/fccdn/fccdn_levircd.yaml | 4 +- .../configs/cd/fccdn/train_infer_python.txt | 2 +- .../det/faster_rcnn/train_infer_python.txt | 2 +- .../configs/det/yolov3/train_infer_python.txt | 2 +- test_tipc/docs/test_train_inference_python.md | 58 +++++++++---------- 8 files changed, 38 insertions(+), 36 deletions(-) diff --git a/test_tipc/configs/cd/fc_ef/train_infer_python.txt b/test_tipc/configs/cd/fc_ef/train_infer_python.txt index 73da148..fec5049 100644 --- a/test_tipc/configs/cd/fc_ef/train_infer_python.txt +++ b/test_tipc/configs/cd/fc_ef/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=20 +--num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=10 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=8 --model_path:null diff --git a/test_tipc/configs/cd/fc_siam_conc/train_infer_python.txt b/test_tipc/configs/cd/fc_siam_conc/train_infer_python.txt index db1ade5..47e9bdb 100644 --- a/test_tipc/configs/cd/fc_siam_conc/train_infer_python.txt +++ b/test_tipc/configs/cd/fc_siam_conc/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=20 +--num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=10 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=8 --model_path:null diff --git a/test_tipc/configs/cd/fc_siam_diff/train_infer_python.txt b/test_tipc/configs/cd/fc_siam_diff/train_infer_python.txt index 245e4ed..cba8b57 100644 --- a/test_tipc/configs/cd/fc_siam_diff/train_infer_python.txt +++ b/test_tipc/configs/cd/fc_siam_diff/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=20 +--num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=10 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=8 --model_path:null diff --git a/test_tipc/configs/cd/fccdn/fccdn_levircd.yaml b/test_tipc/configs/cd/fccdn/fccdn_levircd.yaml index 02586cb..2beddde 100644 --- a/test_tipc/configs/cd/fccdn/fccdn_levircd.yaml +++ b/test_tipc/configs/cd/fccdn/fccdn_levircd.yaml @@ -5,4 +5,6 @@ _base_: ../_base_/levircd.yaml save_dir: ./test_tipc/output/cd/fccdn/ model: !Node - type: FCCDN \ No newline at end of file + type: FCCDN + +learning_rate: 0.001 \ No newline at end of file diff --git a/test_tipc/configs/cd/fccdn/train_infer_python.txt b/test_tipc/configs/cd/fccdn/train_infer_python.txt index b18ae87..aea6c3c 100644 --- a/test_tipc/configs/cd/fccdn/train_infer_python.txt +++ b/test_tipc/configs/cd/fccdn/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=10 +--num_epochs:lite_train_lite_infer=5|lite_train_whole_infer=5|whole_train_whole_infer=20 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=8 --model_path:null diff --git a/test_tipc/configs/det/faster_rcnn/train_infer_python.txt b/test_tipc/configs/det/faster_rcnn/train_infer_python.txt index 679d81e..4aed668 100644 --- a/test_tipc/configs/det/faster_rcnn/train_infer_python.txt +++ b/test_tipc/configs/det/faster_rcnn/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=10 +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4 --model_path:null diff --git a/test_tipc/configs/det/yolov3/train_infer_python.txt b/test_tipc/configs/det/yolov3/train_infer_python.txt index b60be01..f46627c 100644 --- a/test_tipc/configs/det/yolov3/train_infer_python.txt +++ b/test_tipc/configs/det/yolov3/train_infer_python.txt @@ -4,7 +4,7 @@ python:python gpu_list:0|0,1 use_gpu:null|null --precision:null ---num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=10 +--num_epochs:lite_train_lite_infer=3|lite_train_whole_infer=3|whole_train_whole_infer=20 --save_dir:adaptive --train_batch_size:lite_train_lite_infer=4|lite_train_whole_infer=4|whole_train_whole_infer=4 --model_path:null diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md index 95ecd82..e5ba50c 100644 --- a/test_tipc/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -8,35 +8,35 @@ Linux GPU/CPU 基础训练推理测试的主程序为`test_train_inference_pytho | 任务类别 | 模型名称 | 单机单卡 | 单机多卡 | 参考预测精度 | | :----: | :----: | :----: | :----: | :----: | -| 变化检测 | BIT | 正常训练 | 正常训练 | IoU=71.02% | -| 变化检测 | CDNet | 正常训练 | 正常训练 | IoU=56.02% | -| 变化检测 | ChangeFormer | 正常训练 | 正常训练 | IoU=61.65% | -| 变化检测 | DSAMNet | 正常训练 | 正常训练 | IoU=69.76% | -| 变化检测 | DSIFN | 正常训练 | 正常训练 | IoU=72.88% | -| 变化检测 | SNUNet | 正常训练 | 正常训练 | IoU=68.46% | -| 变化检测 | STANet | 正常训练 | 正常训练 | IoU=65.11% | -| 变化检测 | FC-EF | 正常训练 | 正常训练 | IoU=64.22% | -| 变化检测 | FC-Siam-conc | 正常训练 | 正常训练 | IoU=65.79% | -| 变化检测 | FC-Siam-diff | 正常训练 | 正常训练 | IoU=61.23% | -| 变化检测 | FCCDN | 正常训练 | 正常训练 | IoU=24.42% | -| 场景分类 | CondenseNet V2 | 正常训练 | 正常训练 | Acc(top1)=60.42% | -| 场景分类 | HRNet | 正常训练 | 正常训练 | Acc(top1)=99.37% | -| 场景分类 | MobileNetV3 | 正常训练 | 正常训练 | Acc(top1)=99.58% | -| 场景分类 | ResNet50-vd | 正常训练 | 正常训练 | Acc(top1)=99.26% | -| 图像复原 | DRN | 正常训练 | 正常训练 | PSNR=24.23 | -| 图像复原 | ESRGAN | 正常训练 | 正常训练 | PSNR=21.30 | -| 图像复原 | LESRCNN | 正常训练 | 正常训练 | PSNR=23.18 | -| 目标检测 | Faster R-CNN | 正常训练 | 正常训练 | mAP=46.99% | -| 目标检测 | PP-YOLO | 正常训练 | 正常训练 | mAP=56.02% | -| 目标检测 | PP-YOLO Tiny | 正常训练 | 正常训练 | mAP=44.27% | -| 目标检测 | PP-YOLOv2 | 正常训练 | 正常训练 | mAP=59.37% | -| 目标检测 | YOLOv3 | 正常训练 | 正常训练 | mAP=47.33% | -| 图像分割 | BiSeNet V2 | 正常训练 | 正常训练 | mIoU=70.20 | -| 图像分割 | DeepLab V3+ | 正常训练 | 正常训练 | mIoU=64.59% | -| 图像分割 | FarSeg | 正常训练 | 正常训练 | mIoU=50.45% | -| 图像分割 | Fast-SCNN | 正常训练 | 正常训练 | mIoU=48.97% | -| 图像分割 | HRNet | 正常训练 | 正常训练 | mIoU=33.49% | -| 图像分割 | UNet | 正常训练 | 正常训练 | mIoU=72.64% | +| 变化检测 | BIT | 正常训练 | 正常训练 | IoU=71.01% | +| 变化检测 | CDNet | 正常训练 | 正常训练 | IoU=55.10% | +| 变化检测 | ChangeFormer | 正常训练 | 正常训练 | IoU=61.09% | +| 变化检测 | DSAMNet | 正常训练 | 正常训练 | IoU=69.02% | +| 变化检测 | DSIFN | 正常训练 | 正常训练 | IoU=72.36% | +| 变化检测 | FC-EF | 正常训练 | 正常训练 | IoU=57.18% | +| 变化检测 | FC-Siam-conc | 正常训练 | 正常训练 | IoU=52.82% | +| 变化检测 | FC-Siam-diff | 正常训练 | 正常训练 | IoU=58.30% | +| 变化检测 | FCCDN | 正常训练 | 正常训练 | IoU=23.94% | +| 变化检测 | SNUNet | 正常训练 | 正常训练 | IoU=67.66% | +| 变化检测 | STANet | 正常训练 | 正常训练 | IoU=67.23% | +| 场景分类 | CondenseNet V2 | 正常训练 | 正常训练 | Acc(top1)=60.53% | +| 场景分类 | HRNet | 正常训练 | 正常训练 | Acc(top1)=99.47% | +| 场景分类 | MobileNetV3 | 正常训练 | 正常训练 | Acc(top1)=99.57% | +| 场景分类 | ResNet50-vd | 正常训练 | 正常训练 | Acc(top1)=99.37% | +| 目标检测 | Faster R-CNN | 正常训练 | 正常训练 | 暂无稳定精度 | +| 目标检测 | PP-YOLO | 正常训练 | 正常训练 | 暂无稳定精度 | +| 目标检测 | PP-YOLO Tiny | 正常训练 | 正常训练 | 暂无稳定精度 | +| 目标检测 | PP-YOLOv2 | 正常训练 | 正常训练 | 暂无稳定精度 | +| 目标检测 | YOLOv3 | 正常训练 | 正常训练 | 暂无稳定精度 | +| 图像复原 | DRN | 正常训练 | 正常训练 | PSNR=24.14 | +| 图像复原 | ESRGAN | 正常训练 | 正常训练 | PSNR=21.25 | +| 图像复原 | LESRCNN | 正常训练 | 正常训练 | PSNR=22.96 | +| 图像分割 | BiSeNet V2 | 正常训练 | 正常训练 | mIoU=70.52% | +| 图像分割 | DeepLab V3+ | 正常训练 | 正常训练 | mIoU=64.41% | +| 图像分割 | FarSeg | 正常训练 | 正常训练 | mIoU=50.74% | +| 图像分割 | Fast-SCNN | 正常训练 | 正常训练 | mIoU=49.27% | +| 图像分割 | HRNet | 正常训练 | 正常训练 | mIoU=33.03% | +| 图像分割 | UNet | 正常训练 | 正常训练 | mIoU=72.58% | *注:参考预测精度为whole_train_whole_infer模式下单卡训练汇报的精度数据。*