Merge pull request #40 from Bobholamovic/update_ppseg
[Feat] Update ppseg and Add CondenseNet V2own
commit
ebceda8419
143 changed files with 8360 additions and 1965 deletions
@ -0,0 +1 @@ |
||||
ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef |
@ -0,0 +1,135 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import numpy as np |
||||
|
||||
from paddlers.models.ppseg.datasets import Dataset |
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.transforms import Compose |
||||
|
||||
|
||||
@manager.DATASETS.add_component |
||||
class PSSLDataset(Dataset): |
||||
""" |
||||
The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label |
||||
is computed by the Consensus explanation algorithm. |
||||
|
||||
The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation |
||||
Models" (https://arxiv.org/abs/2207.03335). |
||||
|
||||
The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification |
||||
Models: An Empirical Study" (https://arxiv.org/abs/2109.00707). |
||||
|
||||
To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure |
||||
as follows: |
||||
|
||||
imagenet_root |
||||
| |
||||
|--train |
||||
| |--n01440764 |
||||
| | |--n01440764_10026.JPEG |
||||
| | |--... |
||||
| |--nxxxxxxxx |
||||
| |--... |
||||
|
||||
where only the "train" set is needed. |
||||
|
||||
The PSSL dataset has the folder structure as follows: |
||||
|
||||
pssl_root |
||||
| |
||||
|--train |
||||
| |--n01440764 |
||||
| | |--n01440764_10026.JPEG_eiseg.npz |
||||
| | |--... |
||||
| |--nxxxxxxxx |
||||
| |--... |
||||
| |
||||
|--imagenet_lsvrc_2015_synsets.txt |
||||
|--train.txt |
||||
|
||||
where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset. |
||||
|
||||
Args: |
||||
transforms (list): Transforms for image. |
||||
imagenet_root (str): The path to the original ImageNet dataset. |
||||
pssl_root (str): The path to the PSSL dataset. |
||||
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. |
||||
edge (bool, optional): Whether to compute edge while training. Default: False. |
||||
""" |
||||
ignore_index = 1001 # 0~999 is target class, 1000 is bg |
||||
NUM_CLASSES = 1001 # consider target class and bg |
||||
|
||||
def __init__(self, |
||||
transforms, |
||||
imagenet_root, |
||||
pssl_root, |
||||
mode='train', |
||||
edge=False): |
||||
mode = mode.lower() |
||||
if mode not in ['train']: |
||||
raise ValueError("mode should be 'train', but got {}.".format(mode)) |
||||
if transforms is None: |
||||
raise ValueError("`transforms` is necessary, but it is None.") |
||||
|
||||
self.transforms = Compose(transforms) |
||||
self.mode = mode |
||||
self.edge = edge |
||||
|
||||
self.num_classes = self.NUM_CLASSES |
||||
self.ignore_index = self.num_classes # 1001 |
||||
self.file_list = [] |
||||
self.class_id_dict = {} |
||||
|
||||
if imagenet_root is None or not os.path.isdir(pssl_root): |
||||
raise ValueError( |
||||
"The dataset is not Found or the folder structure is nonconfoumance." |
||||
) |
||||
|
||||
train_list_file = os.path.join(pssl_root, "train.txt") |
||||
if not os.path.exists(train_list_file): |
||||
raise ValueError("Train list file isn't exists.") |
||||
for idx, line in enumerate(open(train_list_file)): |
||||
# line: train/n04118776/n04118776_45912.JPEG_eiseg.npz |
||||
label_path = line.strip() |
||||
img_path = label_path.split('.JPEG')[0] + '.JPEG' |
||||
label_path = os.path.join(pssl_root, label_path) |
||||
img_path = os.path.join(imagenet_root, img_path) |
||||
self.file_list.append([img_path, label_path]) |
||||
|
||||
# mapping class name to class id. |
||||
class_id_file = os.path.join(pssl_root, |
||||
"imagenet_lsvrc_2015_synsets.txt") |
||||
if not os.path.exists(class_id_file): |
||||
raise ValueError("Class id file isn't exists.") |
||||
for idx, line in enumerate(open(class_id_file)): |
||||
class_name = line.strip() |
||||
self.class_id_dict[class_name] = idx |
||||
|
||||
def __getitem__(self, idx): |
||||
image_path, label_path = self.file_list[idx] |
||||
|
||||
# transform label |
||||
class_name = (image_path.split('/')[-1]).split('_')[0] |
||||
class_id = self.class_id_dict[class_name] |
||||
|
||||
pssl_seg = np.load(label_path)['arr_0'] |
||||
gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000 |
||||
# [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training. |
||||
gt_semantic_seg[pssl_seg == 1] = class_id |
||||
|
||||
im, label = self.transforms(im=image_path, label=gt_semantic_seg) |
||||
|
||||
return im, label |
@ -0,0 +1,318 @@ |
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch |
||||
|
||||
import math |
||||
import paddle |
||||
from paddle import ParamAttr |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import Uniform, KaimingNormal |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.utils import utils, logger |
||||
|
||||
__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"] |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
groups=1, |
||||
act="relu", |
||||
name=None): |
||||
super(ConvBNLayer, self).__init__() |
||||
self._conv = Conv2D( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=(kernel_size - 1) // 2, |
||||
groups=groups, |
||||
weight_attr=ParamAttr( |
||||
initializer=KaimingNormal(), name=name + "_weights"), |
||||
bias_attr=False) |
||||
bn_name = name + "_bn" |
||||
|
||||
self._batch_norm = BatchNorm( |
||||
num_channels=out_channels, |
||||
act=act, |
||||
param_attr=ParamAttr( |
||||
name=bn_name + "_scale", regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr( |
||||
name=bn_name + "_offset", regularizer=L2Decay(0.0)), |
||||
moving_mean_name=bn_name + "_mean", |
||||
moving_variance_name=bn_name + "_variance") |
||||
|
||||
def forward(self, inputs): |
||||
y = self._conv(inputs) |
||||
y = self._batch_norm(y) |
||||
return y |
||||
|
||||
|
||||
class SEBlock(nn.Layer): |
||||
def __init__(self, num_channels, reduction_ratio=4, name=None): |
||||
super(SEBlock, self).__init__() |
||||
self.pool2d_gap = AdaptiveAvgPool2D(1) |
||||
self._num_channels = num_channels |
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0) |
||||
med_ch = num_channels // reduction_ratio |
||||
self.squeeze = Linear( |
||||
num_channels, |
||||
med_ch, |
||||
weight_attr=ParamAttr( |
||||
initializer=Uniform(-stdv, stdv), name=name + "_1_weights"), |
||||
bias_attr=ParamAttr(name=name + "_1_offset")) |
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0) |
||||
self.excitation = Linear( |
||||
med_ch, |
||||
num_channels, |
||||
weight_attr=ParamAttr( |
||||
initializer=Uniform(-stdv, stdv), name=name + "_2_weights"), |
||||
bias_attr=ParamAttr(name=name + "_2_offset")) |
||||
|
||||
def forward(self, inputs): |
||||
pool = self.pool2d_gap(inputs) |
||||
pool = paddle.squeeze(pool, axis=[2, 3]) |
||||
squeeze = self.squeeze(pool) |
||||
squeeze = F.relu(squeeze) |
||||
excitation = self.excitation(squeeze) |
||||
excitation = paddle.clip(x=excitation, min=0, max=1) |
||||
excitation = paddle.unsqueeze(excitation, axis=[2, 3]) |
||||
out = paddle.multiply(inputs, excitation) |
||||
return out |
||||
|
||||
|
||||
class GhostModule(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
output_channels, |
||||
kernel_size=1, |
||||
ratio=2, |
||||
dw_size=3, |
||||
stride=1, |
||||
relu=True, |
||||
name=None): |
||||
super(GhostModule, self).__init__() |
||||
init_channels = int(math.ceil(output_channels / ratio)) |
||||
new_channels = int(init_channels * (ratio - 1)) |
||||
self.primary_conv = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=init_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
groups=1, |
||||
act="relu" if relu else None, |
||||
name=name + "_primary_conv") |
||||
self.cheap_operation = ConvBNLayer( |
||||
in_channels=init_channels, |
||||
out_channels=new_channels, |
||||
kernel_size=dw_size, |
||||
stride=1, |
||||
groups=init_channels, |
||||
act="relu" if relu else None, |
||||
name=name + "_cheap_operation") |
||||
|
||||
def forward(self, inputs): |
||||
x = self.primary_conv(inputs) |
||||
y = self.cheap_operation(x) |
||||
out = paddle.concat([x, y], axis=1) |
||||
return out |
||||
|
||||
|
||||
class GhostBottleneck(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
hidden_dim, |
||||
output_channels, |
||||
kernel_size, |
||||
stride, |
||||
use_se, |
||||
name=None): |
||||
super(GhostBottleneck, self).__init__() |
||||
self._stride = stride |
||||
self._use_se = use_se |
||||
self._num_channels = in_channels |
||||
self._output_channels = output_channels |
||||
self.ghost_module_1 = GhostModule( |
||||
in_channels=in_channels, |
||||
output_channels=hidden_dim, |
||||
kernel_size=1, |
||||
stride=1, |
||||
relu=True, |
||||
name=name + "_ghost_module_1") |
||||
if stride == 2: |
||||
self.depthwise_conv = ConvBNLayer( |
||||
in_channels=hidden_dim, |
||||
out_channels=hidden_dim, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
groups=hidden_dim, |
||||
act=None, |
||||
name=name + |
||||
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. |
||||
) |
||||
if use_se: |
||||
self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se") |
||||
self.ghost_module_2 = GhostModule( |
||||
in_channels=hidden_dim, |
||||
output_channels=output_channels, |
||||
kernel_size=1, |
||||
relu=False, |
||||
name=name + "_ghost_module_2") |
||||
if stride != 1 or in_channels != output_channels: |
||||
self.shortcut_depthwise = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=in_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
groups=in_channels, |
||||
act=None, |
||||
name=name + |
||||
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. |
||||
) |
||||
self.shortcut_conv = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=output_channels, |
||||
kernel_size=1, |
||||
stride=1, |
||||
groups=1, |
||||
act=None, |
||||
name=name + "_shortcut_conv") |
||||
|
||||
def forward(self, inputs): |
||||
x = self.ghost_module_1(inputs) |
||||
if self._stride == 2: |
||||
x = self.depthwise_conv(x) |
||||
if self._use_se: |
||||
x = self.se_block(x) |
||||
x = self.ghost_module_2(x) |
||||
if self._stride == 1 and self._num_channels == self._output_channels: |
||||
shortcut = inputs |
||||
else: |
||||
shortcut = self.shortcut_depthwise(inputs) |
||||
shortcut = self.shortcut_conv(shortcut) |
||||
return paddle.add(x=x, y=shortcut) |
||||
|
||||
|
||||
class GhostNet(nn.Layer): |
||||
def __init__(self, scale, in_channels=3, pretrained=None): |
||||
super(GhostNet, self).__init__() |
||||
self.cfgs = [ |
||||
# k, t, c, SE, s |
||||
[3, 16, 16, 0, 1], |
||||
[3, 48, 24, 0, 2], |
||||
[3, 72, 24, 0, 1], # x4 |
||||
[5, 72, 40, 1, 2], |
||||
[5, 120, 40, 1, 1], # x8 |
||||
[3, 240, 80, 0, 2], |
||||
[3, 200, 80, 0, 1], |
||||
[3, 184, 80, 0, 1], |
||||
[3, 184, 80, 0, 1], |
||||
[3, 480, 112, 1, 1], |
||||
[3, 672, 112, 1, 1], # x16 |
||||
[5, 672, 160, 1, 2], |
||||
[5, 960, 160, 0, 1], |
||||
[5, 960, 160, 1, 1], |
||||
[5, 960, 160, 0, 1], |
||||
[5, 960, 160, 1, 1] # x32 |
||||
] |
||||
self.scale = scale |
||||
self.pretrained = pretrained |
||||
|
||||
output_channels = int(self._make_divisible(16 * self.scale, 4)) |
||||
self.conv1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=output_channels, |
||||
kernel_size=3, |
||||
stride=2, |
||||
groups=1, |
||||
act="relu", |
||||
name="conv1") |
||||
|
||||
# build inverted residual blocks |
||||
self.out_index = [2, 4, 10, 15] |
||||
self.feat_channels = [] |
||||
self.ghost_bottleneck_list = [] |
||||
for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs): |
||||
in_channels = output_channels |
||||
output_channels = int(self._make_divisible(c * self.scale, 4)) |
||||
hidden_dim = int(self._make_divisible(exp_size * self.scale, 4)) |
||||
ghost_bottleneck = self.add_sublayer( |
||||
name="_ghostbottleneck_" + str(idx), |
||||
sublayer=GhostBottleneck( |
||||
in_channels=in_channels, |
||||
hidden_dim=hidden_dim, |
||||
output_channels=output_channels, |
||||
kernel_size=k, |
||||
stride=s, |
||||
use_se=use_se, |
||||
name="_ghostbottleneck_" + str(idx))) |
||||
self.ghost_bottleneck_list.append(ghost_bottleneck) |
||||
if idx in self.out_index: |
||||
self.feat_channels.append(output_channels) |
||||
|
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def forward(self, inputs): |
||||
feat_list = [] |
||||
x = self.conv1(inputs) |
||||
for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list): |
||||
x = ghost_bottleneck(x) |
||||
if idx in self.out_index: |
||||
feat_list.append(x) |
||||
return feat_list |
||||
|
||||
def _make_divisible(self, v, divisor, min_value=None): |
||||
""" |
||||
This function is taken from the original tf repo. |
||||
It ensures that all layers have a channel number that is divisible by 8 |
||||
It can be seen here: |
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py |
||||
""" |
||||
if min_value is None: |
||||
min_value = divisor |
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) |
||||
# Make sure that round down does not go down by more than 10%. |
||||
if new_v < 0.9 * v: |
||||
new_v += divisor |
||||
return new_v |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def GhostNet_x0_5(**kwargs): |
||||
model = GhostNet(scale=0.5, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def GhostNet_x1_0(**kwargs): |
||||
model = GhostNet(scale=1.0, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def GhostNet_x1_3(**kwargs): |
||||
model = GhostNet(scale=1.3, **kwargs) |
||||
return model |
@ -0,0 +1,974 @@ |
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This code is based on |
||||
https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py |
||||
""" |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from numbers import Integral |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import Normal, Constant |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg import utils |
||||
|
||||
__all__ = [ |
||||
"Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive", |
||||
"Lite_HRNet_wider_naive", "LiteHRNet" |
||||
] |
||||
|
||||
|
||||
def Conv2d(in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
dilation=1, |
||||
groups=1, |
||||
bias=True, |
||||
weight_init=Normal(std=0.001), |
||||
bias_init=Constant(0.)): |
||||
weight_attr = paddle.framework.ParamAttr(initializer=weight_init) |
||||
if bias: |
||||
bias_attr = paddle.framework.ParamAttr(initializer=bias_init) |
||||
else: |
||||
bias_attr = False |
||||
conv = nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride, |
||||
padding, |
||||
dilation, |
||||
groups, |
||||
weight_attr=weight_attr, |
||||
bias_attr=bias_attr) |
||||
return conv |
||||
|
||||
|
||||
def channel_shuffle(x, groups): |
||||
x_shape = paddle.shape(x) |
||||
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3] |
||||
num_channels = x.shape[1] |
||||
channels_per_group = num_channels // groups |
||||
|
||||
x = paddle.reshape( |
||||
x=x, shape=[batch_size, groups, channels_per_group, height, width]) |
||||
x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) |
||||
x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width]) |
||||
|
||||
return x |
||||
|
||||
|
||||
class ConvNormLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size, |
||||
stride=1, |
||||
groups=1, |
||||
norm_type=None, |
||||
norm_groups=32, |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
act=None): |
||||
super(ConvNormLayer, self).__init__() |
||||
self.act = act |
||||
norm_lr = 0. if freeze_norm else 1. |
||||
if norm_type is not None: |
||||
assert norm_type in ['bn', 'sync_bn', 'gn'], \ |
||||
"norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type) |
||||
param_attr = ParamAttr( |
||||
initializer=Constant(1.0), |
||||
learning_rate=norm_lr, |
||||
regularizer=L2Decay(norm_decay), ) |
||||
bias_attr = ParamAttr( |
||||
learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) |
||||
global_stats = True if freeze_norm else None |
||||
if norm_type in ['bn', 'sync_bn']: |
||||
self.norm = nn.BatchNorm2D( |
||||
ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr, |
||||
use_global_stats=global_stats, ) |
||||
elif norm_type == 'gn': |
||||
self.norm = nn.GroupNorm( |
||||
num_groups=norm_groups, |
||||
num_channels=ch_out, |
||||
weight_attr=param_attr, |
||||
bias_attr=bias_attr) |
||||
norm_params = self.norm.parameters() |
||||
if freeze_norm: |
||||
for param in norm_params: |
||||
param.stop_gradient = True |
||||
conv_bias_attr = False |
||||
else: |
||||
conv_bias_attr = True |
||||
self.norm = None |
||||
|
||||
self.conv = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=(filter_size - 1) // 2, |
||||
groups=groups, |
||||
weight_attr=ParamAttr(initializer=Normal( |
||||
mean=0., std=0.001)), |
||||
bias_attr=conv_bias_attr) |
||||
|
||||
def forward(self, inputs): |
||||
out = self.conv(inputs) |
||||
if self.norm is not None: |
||||
out = self.norm(out) |
||||
|
||||
if self.act == 'relu': |
||||
out = F.relu(out) |
||||
elif self.act == 'sigmoid': |
||||
out = F.sigmoid(out) |
||||
return out |
||||
|
||||
|
||||
class DepthWiseSeparableConvNormLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size, |
||||
stride=1, |
||||
dw_norm_type=None, |
||||
pw_norm_type=None, |
||||
norm_decay=0., |
||||
freeze_norm=False, |
||||
dw_act=None, |
||||
pw_act=None): |
||||
super(DepthWiseSeparableConvNormLayer, self).__init__() |
||||
self.depthwise_conv = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_in, |
||||
filter_size=filter_size, |
||||
stride=stride, |
||||
groups=ch_in, |
||||
norm_type=dw_norm_type, |
||||
act=dw_act, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, ) |
||||
self.pointwise_conv = ConvNormLayer( |
||||
ch_in=ch_in, |
||||
ch_out=ch_out, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=pw_norm_type, |
||||
act=pw_act, |
||||
norm_decay=norm_decay, |
||||
freeze_norm=freeze_norm, ) |
||||
|
||||
def forward(self, x): |
||||
x = self.depthwise_conv(x) |
||||
x = self.pointwise_conv(x) |
||||
return x |
||||
|
||||
|
||||
class CrossResolutionWeightingModule(nn.Layer): |
||||
def __init__(self, |
||||
channels, |
||||
ratio=16, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(CrossResolutionWeightingModule, self).__init__() |
||||
self.channels = channels |
||||
total_channel = sum(channels) |
||||
self.conv1 = ConvNormLayer( |
||||
ch_in=total_channel, |
||||
ch_out=total_channel // ratio, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.conv2 = ConvNormLayer( |
||||
ch_in=total_channel // ratio, |
||||
ch_out=total_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='sigmoid', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
def forward(self, x): |
||||
out = [] |
||||
for idx, xi in enumerate(x[:-1]): |
||||
kernel_size = stride = pow(2, len(x) - idx - 1) |
||||
xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride) |
||||
out.append(xi) |
||||
out.append(x[-1]) |
||||
|
||||
out = paddle.concat(out, 1) |
||||
out = self.conv1(out) |
||||
out = self.conv2(out) |
||||
out = paddle.split(out, self.channels, 1) |
||||
out = [ |
||||
s * F.interpolate( |
||||
a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out) |
||||
] |
||||
return out |
||||
|
||||
|
||||
class SpatialWeightingModule(nn.Layer): |
||||
def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.): |
||||
super(SpatialWeightingModule, self).__init__() |
||||
self.global_avgpooling = nn.AdaptiveAvgPool2D(1) |
||||
self.conv1 = ConvNormLayer( |
||||
ch_in=in_channel, |
||||
ch_out=in_channel // ratio, |
||||
filter_size=1, |
||||
stride=1, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.conv2 = ConvNormLayer( |
||||
ch_in=in_channel // ratio, |
||||
ch_out=in_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
act='sigmoid', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
def forward(self, x): |
||||
out = self.global_avgpooling(x) |
||||
out = self.conv1(out) |
||||
out = self.conv2(out) |
||||
return x * out |
||||
|
||||
|
||||
class ConditionalChannelWeightingBlock(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
stride, |
||||
reduce_ratio, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(ConditionalChannelWeightingBlock, self).__init__() |
||||
assert stride in [1, 2] |
||||
branch_channels = [channel // 2 for channel in in_channels] |
||||
|
||||
self.cross_resolution_weighting = CrossResolutionWeightingModule( |
||||
branch_channels, |
||||
ratio=reduce_ratio, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.depthwise_convs = nn.LayerList([ |
||||
ConvNormLayer( |
||||
channel, |
||||
channel, |
||||
filter_size=3, |
||||
stride=stride, |
||||
groups=channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) for channel in branch_channels |
||||
]) |
||||
|
||||
self.spatial_weighting = nn.LayerList([ |
||||
SpatialWeightingModule( |
||||
channel, |
||||
ratio=4, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) for channel in branch_channels |
||||
]) |
||||
|
||||
def forward(self, x): |
||||
x = [s.chunk(2, axis=1) for s in x] |
||||
x1 = [s[0] for s in x] |
||||
x2 = [s[1] for s in x] |
||||
|
||||
x2 = self.cross_resolution_weighting(x2) |
||||
x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)] |
||||
x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)] |
||||
|
||||
out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)] |
||||
out = [channel_shuffle(s, groups=2) for s in out] |
||||
return out |
||||
|
||||
|
||||
class ShuffleUnit(nn.Layer): |
||||
def __init__(self, |
||||
in_channel, |
||||
out_channel, |
||||
stride, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(ShuffleUnit, self).__init__() |
||||
branch_channel = out_channel // 2 |
||||
self.stride = stride |
||||
if self.stride == 1: |
||||
assert in_channel == branch_channel * 2, \ |
||||
"when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2) |
||||
if stride > 1: |
||||
self.branch1 = nn.Sequential( |
||||
ConvNormLayer( |
||||
ch_in=in_channel, |
||||
ch_out=in_channel, |
||||
filter_size=3, |
||||
stride=self.stride, |
||||
groups=in_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=in_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), ) |
||||
self.branch2 = nn.Sequential( |
||||
ConvNormLayer( |
||||
ch_in=branch_channel if stride == 1 else in_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=3, |
||||
stride=self.stride, |
||||
groups=branch_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), ) |
||||
|
||||
def forward(self, x): |
||||
if self.stride > 1: |
||||
x1 = self.branch1(x) |
||||
x2 = self.branch2(x) |
||||
else: |
||||
x1, x2 = x.chunk(2, axis=1) |
||||
x2 = self.branch2(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
out = channel_shuffle(out, groups=2) |
||||
return out |
||||
|
||||
|
||||
class IterativeHead(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(IterativeHead, self).__init__() |
||||
num_branches = len(in_channels) |
||||
self.in_channels = in_channels[::-1] |
||||
|
||||
projects = [] |
||||
for i in range(num_branches): |
||||
if i != num_branches - 1: |
||||
projects.append( |
||||
DepthWiseSeparableConvNormLayer( |
||||
ch_in=self.in_channels[i], |
||||
ch_out=self.in_channels[i + 1], |
||||
filter_size=3, |
||||
stride=1, |
||||
dw_act=None, |
||||
pw_act='relu', |
||||
dw_norm_type=norm_type, |
||||
pw_norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
else: |
||||
projects.append( |
||||
DepthWiseSeparableConvNormLayer( |
||||
ch_in=self.in_channels[i], |
||||
ch_out=self.in_channels[i], |
||||
filter_size=3, |
||||
stride=1, |
||||
dw_act=None, |
||||
pw_act='relu', |
||||
dw_norm_type=norm_type, |
||||
pw_norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
self.projects = nn.LayerList(projects) |
||||
|
||||
def forward(self, x): |
||||
x = x[::-1] |
||||
y = [] |
||||
last_x = None |
||||
for i, s in enumerate(x): |
||||
if last_x is not None: |
||||
last_x = F.interpolate( |
||||
last_x, |
||||
size=paddle.shape(s)[-2:], |
||||
mode='bilinear', |
||||
align_corners=True) |
||||
s = s + last_x |
||||
s = self.projects[i](s) |
||||
y.append(s) |
||||
last_x = s |
||||
|
||||
return y[::-1] |
||||
|
||||
|
||||
class Stem(nn.Layer): |
||||
def __init__(self, |
||||
in_channel, |
||||
stem_channel, |
||||
out_channel, |
||||
expand_ratio, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(Stem, self).__init__() |
||||
self.conv1 = ConvNormLayer( |
||||
in_channel, |
||||
stem_channel, |
||||
filter_size=3, |
||||
stride=2, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
mid_channel = int(round(stem_channel * expand_ratio)) |
||||
branch_channel = stem_channel // 2 |
||||
if stem_channel == out_channel: |
||||
inc_channel = out_channel - branch_channel |
||||
else: |
||||
inc_channel = out_channel - stem_channel |
||||
self.branch1 = nn.Sequential( |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=branch_channel, |
||||
filter_size=3, |
||||
stride=2, |
||||
groups=branch_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), |
||||
ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=inc_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay), ) |
||||
self.expand_conv = ConvNormLayer( |
||||
ch_in=branch_channel, |
||||
ch_out=mid_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.depthwise_conv = ConvNormLayer( |
||||
ch_in=mid_channel, |
||||
ch_out=mid_channel, |
||||
filter_size=3, |
||||
stride=2, |
||||
groups=mid_channel, |
||||
norm_type=norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
self.linear_conv = ConvNormLayer( |
||||
ch_in=mid_channel, |
||||
ch_out=branch_channel |
||||
if stem_channel == out_channel else stem_channel, |
||||
filter_size=1, |
||||
stride=1, |
||||
norm_type=norm_type, |
||||
act='relu', |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
def forward(self, x): |
||||
x = self.conv1(x) |
||||
x1, x2 = x.chunk(2, axis=1) |
||||
x1 = self.branch1(x1) |
||||
x2 = self.expand_conv(x2) |
||||
x2 = self.depthwise_conv(x2) |
||||
x2 = self.linear_conv(x2) |
||||
out = paddle.concat([x1, x2], axis=1) |
||||
out = channel_shuffle(out, groups=2) |
||||
|
||||
return out |
||||
|
||||
|
||||
class LiteHRNetModule(nn.Layer): |
||||
def __init__(self, |
||||
num_branches, |
||||
num_blocks, |
||||
in_channels, |
||||
reduce_ratio, |
||||
module_type, |
||||
multiscale_output=False, |
||||
with_fuse=True, |
||||
norm_type='bn', |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
super(LiteHRNetModule, self).__init__() |
||||
assert num_branches == len(in_channels),\ |
||||
"num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels)) |
||||
assert module_type in [ |
||||
'LITE', 'NAIVE' |
||||
], "module_type should be one of ['LITE', 'NAIVE']" |
||||
self.num_branches = num_branches |
||||
self.in_channels = in_channels |
||||
self.multiscale_output = multiscale_output |
||||
self.with_fuse = with_fuse |
||||
self.norm_type = 'bn' |
||||
self.module_type = module_type |
||||
|
||||
if self.module_type == 'LITE': |
||||
self.layers = self._make_weighting_blocks( |
||||
num_blocks, |
||||
reduce_ratio, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
elif self.module_type == 'NAIVE': |
||||
self.layers = self._make_naive_branches( |
||||
num_branches, |
||||
num_blocks, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay) |
||||
|
||||
if self.with_fuse: |
||||
self.fuse_layers = self._make_fuse_layers( |
||||
freeze_norm=freeze_norm, norm_decay=norm_decay) |
||||
self.relu = nn.ReLU() |
||||
|
||||
def _make_weighting_blocks(self, |
||||
num_blocks, |
||||
reduce_ratio, |
||||
stride=1, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
layers = [] |
||||
for i in range(num_blocks): |
||||
layers.append( |
||||
ConditionalChannelWeightingBlock( |
||||
self.in_channels, |
||||
stride=stride, |
||||
reduce_ratio=reduce_ratio, |
||||
norm_type=self.norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
return nn.Sequential(*layers) |
||||
|
||||
def _make_naive_branches(self, |
||||
num_branches, |
||||
num_blocks, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
branches = [] |
||||
for branch_idx in range(num_branches): |
||||
layers = [] |
||||
for i in range(num_blocks): |
||||
layers.append( |
||||
ShuffleUnit( |
||||
self.in_channels[branch_idx], |
||||
self.in_channels[branch_idx], |
||||
stride=1, |
||||
norm_type=self.norm_type, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
branches.append(nn.Sequential(*layers)) |
||||
return nn.LayerList(branches) |
||||
|
||||
def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.): |
||||
if self.num_branches == 1: |
||||
return None |
||||
fuse_layers = [] |
||||
num_out_branches = self.num_branches if self.multiscale_output else 1 |
||||
for i in range(num_out_branches): |
||||
fuse_layer = [] |
||||
for j in range(self.num_branches): |
||||
if j > i: |
||||
fuse_layer.append( |
||||
nn.Sequential( |
||||
Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[i], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[i]), |
||||
nn.Upsample( |
||||
scale_factor=2**(j - i), mode='nearest'))) |
||||
elif j == i: |
||||
fuse_layer.append(None) |
||||
else: |
||||
conv_downsamples = [] |
||||
for k in range(i - j): |
||||
if k == i - j - 1: |
||||
conv_downsamples.append( |
||||
nn.Sequential( |
||||
Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[j], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
groups=self.in_channels[j], |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[j]), |
||||
Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[i], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[i]))) |
||||
else: |
||||
conv_downsamples.append( |
||||
nn.Sequential( |
||||
Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[j], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
groups=self.in_channels[j], |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[j]), |
||||
Conv2d( |
||||
self.in_channels[j], |
||||
self.in_channels[j], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(self.in_channels[j]), |
||||
nn.ReLU())) |
||||
|
||||
fuse_layer.append(nn.Sequential(*conv_downsamples)) |
||||
fuse_layers.append(nn.LayerList(fuse_layer)) |
||||
|
||||
return nn.LayerList(fuse_layers) |
||||
|
||||
def forward(self, x): |
||||
if self.num_branches == 1: |
||||
return [self.layers[0](x[0])] |
||||
if self.module_type == 'LITE': |
||||
out = self.layers(x) |
||||
elif self.module_type == 'NAIVE': |
||||
for i in range(self.num_branches): |
||||
x[i] = self.layers[i](x[i]) |
||||
out = x |
||||
if self.with_fuse: |
||||
out_fuse = [] |
||||
for i in range(len(self.fuse_layers)): |
||||
y = out[0] if i == 0 else self.fuse_layers[i][0](out[0]) |
||||
for j in range(self.num_branches): |
||||
if j == 0: |
||||
y += y |
||||
elif i == j: |
||||
y += out[j] |
||||
else: |
||||
y += self.fuse_layers[i][j](out[j]) |
||||
if i == 0: |
||||
out[i] = y |
||||
out_fuse.append(self.relu(y)) |
||||
out = out_fuse |
||||
elif not self.multiscale_output: |
||||
out = [out[0]] |
||||
return out |
||||
|
||||
|
||||
class LiteHRNet(nn.Layer): |
||||
""" |
||||
@inproceedings{Yulitehrnet21, |
||||
title={Lite-HRNet: A Lightweight High-Resolution Network}, |
||||
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong}, |
||||
booktitle={CVPR},year={2021} |
||||
} |
||||
|
||||
Args: |
||||
network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"], |
||||
"naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet. |
||||
"wider_naive": Naive network with wider channels in each block. |
||||
"lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting. |
||||
"lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18. |
||||
in_channels (int, optional): The channels of input image. Default: 3. |
||||
freeze_at (int): the stage to freeze |
||||
freeze_norm (bool): whether to freeze norm in HRNet |
||||
norm_decay (float): weight decay for normalization layer weights |
||||
return_idx (List): the stage to return |
||||
""" |
||||
|
||||
def __init__(self, |
||||
network_type, |
||||
in_channels=3, |
||||
freeze_at=0, |
||||
freeze_norm=True, |
||||
norm_decay=0., |
||||
return_idx=[0, 1, 2, 3], |
||||
use_head=False, |
||||
pretrained=None): |
||||
super(LiteHRNet, self).__init__() |
||||
if isinstance(return_idx, Integral): |
||||
return_idx = [return_idx] |
||||
assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \ |
||||
"the network_type should be one of [lite_18, lite_30, naive, wider_naive]" |
||||
assert len(return_idx) > 0, "need one or more return index" |
||||
self.freeze_at = freeze_at |
||||
self.freeze_norm = freeze_norm |
||||
self.norm_decay = norm_decay |
||||
self.return_idx = return_idx |
||||
self.norm_type = 'bn' |
||||
self.use_head = use_head |
||||
self.pretrained = pretrained |
||||
|
||||
self.module_configs = { |
||||
"lite_18": { |
||||
"num_modules": [2, 4, 2], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["LITE", "LITE", "LITE"], |
||||
"reduce_ratios": [8, 8, 8], |
||||
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
}, |
||||
"lite_30": { |
||||
"num_modules": [3, 8, 3], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["LITE", "LITE", "LITE"], |
||||
"reduce_ratios": [8, 8, 8], |
||||
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
}, |
||||
"naive": { |
||||
"num_modules": [2, 4, 2], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["NAIVE", "NAIVE", "NAIVE"], |
||||
"reduce_ratios": [1, 1, 1], |
||||
"num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]], |
||||
}, |
||||
"wider_naive": { |
||||
"num_modules": [2, 4, 2], |
||||
"num_branches": [2, 3, 4], |
||||
"num_blocks": [2, 2, 2], |
||||
"module_type": ["NAIVE", "NAIVE", "NAIVE"], |
||||
"reduce_ratios": [1, 1, 1], |
||||
"num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]], |
||||
}, |
||||
} |
||||
|
||||
self.stages_config = self.module_configs[network_type] |
||||
|
||||
self.stem = Stem(in_channels, 32, 32, 1) |
||||
num_channels_pre_layer = [32] |
||||
for stage_idx in range(3): |
||||
num_channels = self.stages_config["num_channels"][stage_idx] |
||||
setattr(self, 'transition{}'.format(stage_idx), |
||||
self._make_transition_layer(num_channels_pre_layer, |
||||
num_channels, self.freeze_norm, |
||||
self.norm_decay)) |
||||
stage, num_channels_pre_layer = self._make_stage( |
||||
self.stages_config, stage_idx, num_channels, True, |
||||
self.freeze_norm, self.norm_decay) |
||||
setattr(self, 'stage{}'.format(stage_idx), stage) |
||||
|
||||
num_channels = self.stages_config["num_channels"][-1] |
||||
self.feat_channels = num_channels |
||||
|
||||
if self.use_head: |
||||
self.head_layer = IterativeHead(num_channels_pre_layer, 'bn', |
||||
self.freeze_norm, self.norm_decay) |
||||
|
||||
self.feat_channels = [num_channels[0]] |
||||
for i in range(1, len(num_channels)): |
||||
self.feat_channels.append(num_channels[i] // 2) |
||||
|
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def _make_transition_layer(self, |
||||
num_channels_pre_layer, |
||||
num_channels_cur_layer, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
num_branches_pre = len(num_channels_pre_layer) |
||||
num_branches_cur = len(num_channels_cur_layer) |
||||
transition_layers = [] |
||||
for i in range(num_branches_cur): |
||||
if i < num_branches_pre: |
||||
if num_channels_cur_layer[i] != num_channels_pre_layer[i]: |
||||
transition_layers.append( |
||||
nn.Sequential( |
||||
Conv2d( |
||||
num_channels_pre_layer[i], |
||||
num_channels_pre_layer[i], |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
groups=num_channels_pre_layer[i], |
||||
bias=False), |
||||
nn.BatchNorm2D(num_channels_pre_layer[i]), |
||||
Conv2d( |
||||
num_channels_pre_layer[i], |
||||
num_channels_cur_layer[i], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(num_channels_cur_layer[i]), |
||||
nn.ReLU())) |
||||
else: |
||||
transition_layers.append(None) |
||||
else: |
||||
conv_downsamples = [] |
||||
for j in range(i + 1 - num_branches_pre): |
||||
conv_downsamples.append( |
||||
nn.Sequential( |
||||
Conv2d( |
||||
num_channels_pre_layer[-1], |
||||
num_channels_pre_layer[-1], |
||||
groups=num_channels_pre_layer[-1], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(num_channels_pre_layer[-1]), |
||||
Conv2d( |
||||
num_channels_pre_layer[-1], |
||||
num_channels_cur_layer[i] |
||||
if j == i - num_branches_pre else |
||||
num_channels_pre_layer[-1], |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
bias=False, ), |
||||
nn.BatchNorm2D(num_channels_cur_layer[i] |
||||
if j == i - num_branches_pre else |
||||
num_channels_pre_layer[-1]), |
||||
nn.ReLU())) |
||||
transition_layers.append(nn.Sequential(*conv_downsamples)) |
||||
return nn.LayerList(transition_layers) |
||||
|
||||
def _make_stage(self, |
||||
stages_config, |
||||
stage_idx, |
||||
in_channels, |
||||
multiscale_output, |
||||
freeze_norm=False, |
||||
norm_decay=0.): |
||||
num_modules = stages_config["num_modules"][stage_idx] |
||||
num_branches = stages_config["num_branches"][stage_idx] |
||||
num_blocks = stages_config["num_blocks"][stage_idx] |
||||
reduce_ratio = stages_config['reduce_ratios'][stage_idx] |
||||
module_type = stages_config['module_type'][stage_idx] |
||||
|
||||
modules = [] |
||||
for i in range(num_modules): |
||||
if not multiscale_output and i == num_modules - 1: |
||||
reset_multiscale_output = False |
||||
else: |
||||
reset_multiscale_output = True |
||||
modules.append( |
||||
LiteHRNetModule( |
||||
num_branches, |
||||
num_blocks, |
||||
in_channels, |
||||
reduce_ratio, |
||||
module_type, |
||||
multiscale_output=reset_multiscale_output, |
||||
with_fuse=True, |
||||
freeze_norm=freeze_norm, |
||||
norm_decay=norm_decay)) |
||||
in_channels = modules[-1].in_channels |
||||
return nn.Sequential(*modules), in_channels |
||||
|
||||
def forward(self, x): |
||||
x = self.stem(x) |
||||
|
||||
y_list = [x] |
||||
for stage_idx in range(3): |
||||
x_list = [] |
||||
transition = getattr(self, 'transition{}'.format(stage_idx)) |
||||
for j in range(self.stages_config["num_branches"][stage_idx]): |
||||
if transition[j] is not None: |
||||
if j >= len(y_list): |
||||
x_list.append(transition[j](y_list[-1])) |
||||
else: |
||||
x_list.append(transition[j](y_list[j])) |
||||
else: |
||||
x_list.append(y_list[j]) |
||||
y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list) |
||||
|
||||
if self.use_head: |
||||
y_list = self.head_layer(y_list) |
||||
|
||||
res = [] |
||||
for i, layer in enumerate(y_list): |
||||
if i == self.freeze_at: |
||||
layer.stop_gradient = True |
||||
if i in self.return_idx: |
||||
res.append(layer) |
||||
return res |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def Lite_HRNet_18(**kwargs): |
||||
model = LiteHRNet(network_type="lite_18", **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def Lite_HRNet_30(**kwargs): |
||||
model = LiteHRNet(network_type="lite_30", **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def Lite_HRNet_naive(**kwargs): |
||||
model = LiteHRNet(network_type="naive", **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def Lite_HRNet_wider_naive(**kwargs): |
||||
model = LiteHRNet(network_type="wider_naive", **kwargs) |
||||
return model |
@ -0,0 +1,315 @@ |
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
from paddle import ParamAttr, reshape, transpose, concat, split |
||||
from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear |
||||
from paddle.nn.initializer import KaimingNormal |
||||
from paddle.nn.functional import swish |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.utils import utils, logger |
||||
|
||||
__all__ = [ |
||||
'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', |
||||
'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0', |
||||
'ShuffleNetV2_swish' |
||||
] |
||||
|
||||
|
||||
def channel_shuffle(x, groups): |
||||
x_shape = paddle.shape(x) |
||||
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3] |
||||
num_channels = x.shape[1] |
||||
channels_per_group = num_channels // groups |
||||
|
||||
# reshape |
||||
x = reshape( |
||||
x=x, shape=[batch_size, groups, channels_per_group, height, width]) |
||||
|
||||
# transpose |
||||
x = transpose(x=x, perm=[0, 2, 1, 3, 4]) |
||||
|
||||
# flatten |
||||
x = reshape(x=x, shape=[batch_size, num_channels, height, width]) |
||||
|
||||
return x |
||||
|
||||
|
||||
class ConvBNLayer(Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride, |
||||
padding, |
||||
groups=1, |
||||
act=None, |
||||
name=None, ): |
||||
super(ConvBNLayer, self).__init__() |
||||
self._conv = Conv2D( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=groups, |
||||
weight_attr=ParamAttr( |
||||
initializer=KaimingNormal(), name=name + "_weights"), |
||||
bias_attr=False) |
||||
|
||||
self._batch_norm = BatchNorm( |
||||
out_channels, |
||||
param_attr=ParamAttr(name=name + "_bn_scale"), |
||||
bias_attr=ParamAttr(name=name + "_bn_offset"), |
||||
act=act, |
||||
moving_mean_name=name + "_bn_mean", |
||||
moving_variance_name=name + "_bn_variance") |
||||
|
||||
def forward(self, inputs): |
||||
y = self._conv(inputs) |
||||
y = self._batch_norm(y) |
||||
return y |
||||
|
||||
|
||||
class InvertedResidual(Layer): |
||||
def __init__(self, in_channels, out_channels, stride, act="relu", |
||||
name=None): |
||||
super(InvertedResidual, self).__init__() |
||||
self._conv_pw = ConvBNLayer( |
||||
in_channels=in_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act, |
||||
name='stage_' + name + '_conv1') |
||||
self._conv_dw = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=out_channels // 2, |
||||
act=None, |
||||
name='stage_' + name + '_conv2') |
||||
self._conv_linear = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act, |
||||
name='stage_' + name + '_conv3') |
||||
|
||||
def forward(self, inputs): |
||||
x1, x2 = split( |
||||
inputs, |
||||
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], |
||||
axis=1) |
||||
x2 = self._conv_pw(x2) |
||||
x2 = self._conv_dw(x2) |
||||
x2 = self._conv_linear(x2) |
||||
out = concat([x1, x2], axis=1) |
||||
return channel_shuffle(out, 2) |
||||
|
||||
|
||||
class InvertedResidualDS(Layer): |
||||
def __init__(self, in_channels, out_channels, stride, act="relu", |
||||
name=None): |
||||
super(InvertedResidualDS, self).__init__() |
||||
|
||||
# branch1 |
||||
self._conv_dw_1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=in_channels, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=in_channels, |
||||
act=None, |
||||
name='stage_' + name + '_conv4') |
||||
self._conv_linear_1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act, |
||||
name='stage_' + name + '_conv5') |
||||
# branch2 |
||||
self._conv_pw_2 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act, |
||||
name='stage_' + name + '_conv1') |
||||
self._conv_dw_2 = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
groups=out_channels // 2, |
||||
act=None, |
||||
name='stage_' + name + '_conv2') |
||||
self._conv_linear_2 = ConvBNLayer( |
||||
in_channels=out_channels // 2, |
||||
out_channels=out_channels // 2, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
act=act, |
||||
name='stage_' + name + '_conv3') |
||||
|
||||
def forward(self, inputs): |
||||
x1 = self._conv_dw_1(inputs) |
||||
x1 = self._conv_linear_1(x1) |
||||
x2 = self._conv_pw_2(inputs) |
||||
x2 = self._conv_dw_2(x2) |
||||
x2 = self._conv_linear_2(x2) |
||||
out = concat([x1, x2], axis=1) |
||||
|
||||
return channel_shuffle(out, 2) |
||||
|
||||
|
||||
class ShuffleNet(Layer): |
||||
def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None): |
||||
super(ShuffleNet, self).__init__() |
||||
self.scale = scale |
||||
self.pretrained = pretrained |
||||
stage_repeats = [4, 8, 4] |
||||
|
||||
if scale == 0.25: |
||||
stage_out_channels = [-1, 24, 24, 48, 96, 512] |
||||
elif scale == 0.33: |
||||
stage_out_channels = [-1, 24, 32, 64, 128, 512] |
||||
elif scale == 0.5: |
||||
stage_out_channels = [-1, 24, 48, 96, 192, 1024] |
||||
elif scale == 1.0: |
||||
stage_out_channels = [-1, 24, 116, 232, 464, 1024] |
||||
elif scale == 1.5: |
||||
stage_out_channels = [-1, 24, 176, 352, 704, 1024] |
||||
elif scale == 2.0: |
||||
stage_out_channels = [-1, 24, 224, 488, 976, 2048] |
||||
else: |
||||
raise NotImplementedError("This scale size:[" + str(scale) + |
||||
"] is not implemented!") |
||||
|
||||
self.out_index = [3, 11, 15] |
||||
self.feat_channels = stage_out_channels[1:5] |
||||
|
||||
# 1. conv1 |
||||
self._conv1 = ConvBNLayer( |
||||
in_channels=in_channels, |
||||
out_channels=stage_out_channels[1], |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
act=act, |
||||
name='stage1_conv') |
||||
self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) |
||||
|
||||
# 2. bottleneck sequences |
||||
self._block_list = [] |
||||
for stage_id, num_repeat in enumerate(stage_repeats): |
||||
for i in range(num_repeat): |
||||
if i == 0: |
||||
block = self.add_sublayer( |
||||
name=str(stage_id + 2) + '_' + str(i + 1), |
||||
sublayer=InvertedResidualDS( |
||||
in_channels=stage_out_channels[stage_id + 1], |
||||
out_channels=stage_out_channels[stage_id + 2], |
||||
stride=2, |
||||
act=act, |
||||
name=str(stage_id + 2) + '_' + str(i + 1))) |
||||
else: |
||||
block = self.add_sublayer( |
||||
name=str(stage_id + 2) + '_' + str(i + 1), |
||||
sublayer=InvertedResidual( |
||||
in_channels=stage_out_channels[stage_id + 2], |
||||
out_channels=stage_out_channels[stage_id + 2], |
||||
stride=1, |
||||
act=act, |
||||
name=str(stage_id + 2) + '_' + str(i + 1))) |
||||
self._block_list.append(block) |
||||
|
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def forward(self, inputs): |
||||
feat_list = [] |
||||
|
||||
y = self._conv1(inputs) |
||||
y = self._max_pool(y) |
||||
feat_list.append(y) |
||||
|
||||
for idx, inv in enumerate(self._block_list): |
||||
y = inv(y) |
||||
if idx in self.out_index: |
||||
feat_list.append(y) |
||||
return feat_list |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_x0_25(**kwargs): |
||||
model = ShuffleNet(scale=0.25, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_x0_33(**kwargs): |
||||
model = ShuffleNet(scale=0.33, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_x0_5(**kwargs): |
||||
model = ShuffleNet(scale=0.5, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_x1_0(**kwargs): |
||||
model = ShuffleNet(scale=1.0, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_x1_5(**kwargs): |
||||
model = ShuffleNet(scale=1.5, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_x2_0(**kwargs): |
||||
model = ShuffleNet(scale=2.0, **kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def ShuffleNetV2_swish(**kwargs): |
||||
model = ShuffleNet(scale=1.0, act="swish", **kwargs) |
||||
return model |
@ -0,0 +1,716 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT |
||||
""" |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg import utils |
||||
from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath |
||||
|
||||
__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"] |
||||
|
||||
|
||||
def make_divisible(val, divisor, min_value=None): |
||||
""" |
||||
This function is taken from the original tf repo. |
||||
It ensures that all layers have a channel number that is divisible by 8 |
||||
It can be seen here: |
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py |
||||
""" |
||||
if min_value is None: |
||||
min_value = divisor |
||||
new_v = max(min_value, int(val + divisor / 2) // divisor * divisor) |
||||
# Make sure that round down does not go down by more than 10%. |
||||
if new_v < 0.9 * val: |
||||
new_v += divisor |
||||
return new_v |
||||
|
||||
|
||||
class HSigmoid(nn.Layer): |
||||
def __init__(self, inplace=True): |
||||
super().__init__() |
||||
self.relu = nn.ReLU6() |
||||
|
||||
def forward(self, x): |
||||
return self.relu(x + 3) / 6 |
||||
|
||||
|
||||
class Conv2DBN(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
ks=1, |
||||
stride=1, |
||||
pad=0, |
||||
dilation=1, |
||||
groups=1, |
||||
bn_weight_init=1, |
||||
lr_mult=1.0): |
||||
super().__init__() |
||||
conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult) |
||||
self.c = nn.Conv2D( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=ks, |
||||
stride=stride, |
||||
padding=pad, |
||||
dilation=dilation, |
||||
groups=groups, |
||||
weight_attr=conv_weight_attr, |
||||
bias_attr=False) |
||||
bn_weight_attr = paddle.ParamAttr( |
||||
initializer=nn.initializer.Constant(bn_weight_init), |
||||
learning_rate=lr_mult) |
||||
bn_bias_attr = paddle.ParamAttr( |
||||
initializer=nn.initializer.Constant(0), learning_rate=lr_mult) |
||||
self.bn = nn.BatchNorm2D( |
||||
out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr) |
||||
|
||||
def forward(self, inputs): |
||||
out = self.c(inputs) |
||||
out = self.bn(out) |
||||
return out |
||||
|
||||
|
||||
class ConvBNAct(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
norm=nn.BatchNorm2D, |
||||
act=None, |
||||
bias_attr=False, |
||||
lr_mult=1.0): |
||||
super(ConvBNAct, self).__init__() |
||||
param_attr = paddle.ParamAttr(learning_rate=lr_mult) |
||||
self.conv = nn.Conv2D( |
||||
in_channels=in_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=groups, |
||||
weight_attr=param_attr, |
||||
bias_attr=param_attr if bias_attr else False) |
||||
self.act = act() if act is not None else Identity() |
||||
self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \ |
||||
if norm is not None else Identity() |
||||
|
||||
def forward(self, x): |
||||
x = self.conv(x) |
||||
x = self.bn(x) |
||||
x = self.act(x) |
||||
return x |
||||
|
||||
|
||||
class MLP(nn.Layer): |
||||
def __init__(self, |
||||
in_features, |
||||
hidden_features=None, |
||||
out_features=None, |
||||
act_layer=nn.ReLU, |
||||
drop=0., |
||||
lr_mult=1.0): |
||||
super().__init__() |
||||
out_features = out_features or in_features |
||||
hidden_features = hidden_features or in_features |
||||
self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult) |
||||
param_attr = paddle.ParamAttr(learning_rate=lr_mult) |
||||
self.dwconv = nn.Conv2D( |
||||
hidden_features, |
||||
hidden_features, |
||||
3, |
||||
1, |
||||
1, |
||||
groups=hidden_features, |
||||
weight_attr=param_attr, |
||||
bias_attr=param_attr) |
||||
self.act = act_layer() |
||||
self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult) |
||||
self.drop = nn.Dropout(drop) |
||||
|
||||
def forward(self, x): |
||||
x = self.fc1(x) |
||||
x = self.dwconv(x) |
||||
x = self.act(x) |
||||
x = self.drop(x) |
||||
x = self.fc2(x) |
||||
x = self.drop(x) |
||||
return x |
||||
|
||||
|
||||
class InvertedResidual(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride, |
||||
expand_ratio, |
||||
activations=None, |
||||
lr_mult=1.0): |
||||
super(InvertedResidual, self).__init__() |
||||
assert stride in [1, 2], "The stride should be 1 or 2." |
||||
|
||||
if activations is None: |
||||
activations = nn.ReLU |
||||
|
||||
hidden_dim = int(round(in_channels * expand_ratio)) |
||||
self.use_res_connect = stride == 1 and in_channels == out_channels |
||||
|
||||
layers = [] |
||||
if expand_ratio != 1: |
||||
layers.append( |
||||
Conv2DBN( |
||||
in_channels, hidden_dim, ks=1, lr_mult=lr_mult)) |
||||
layers.append(activations()) |
||||
layers.extend([ |
||||
Conv2DBN( |
||||
hidden_dim, |
||||
hidden_dim, |
||||
ks=kernel_size, |
||||
stride=stride, |
||||
pad=kernel_size // 2, |
||||
groups=hidden_dim, |
||||
lr_mult=lr_mult), activations(), Conv2DBN( |
||||
hidden_dim, out_channels, ks=1, lr_mult=lr_mult) |
||||
]) |
||||
self.conv = nn.Sequential(*layers) |
||||
self.out_channels = out_channels |
||||
|
||||
def forward(self, x): |
||||
if self.use_res_connect: |
||||
return x + self.conv(x) |
||||
else: |
||||
return self.conv(x) |
||||
|
||||
|
||||
class TokenPyramidModule(nn.Layer): |
||||
def __init__(self, |
||||
cfgs, |
||||
out_indices, |
||||
in_channels=3, |
||||
inp_channel=16, |
||||
activation=nn.ReLU, |
||||
width_mult=1., |
||||
lr_mult=1.): |
||||
super().__init__() |
||||
self.out_indices = out_indices |
||||
|
||||
self.stem = nn.Sequential( |
||||
Conv2DBN( |
||||
in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult), |
||||
activation()) |
||||
|
||||
self.layers = [] |
||||
for i, (k, t, c, s) in enumerate(cfgs): |
||||
output_channel = make_divisible(c * width_mult, 8) |
||||
exp_size = t * inp_channel |
||||
exp_size = make_divisible(exp_size * width_mult, 8) |
||||
layer_name = 'layer{}'.format(i + 1) |
||||
layer = InvertedResidual( |
||||
inp_channel, |
||||
output_channel, |
||||
kernel_size=k, |
||||
stride=s, |
||||
expand_ratio=t, |
||||
activations=activation, |
||||
lr_mult=lr_mult) |
||||
self.add_sublayer(layer_name, layer) |
||||
self.layers.append(layer_name) |
||||
inp_channel = output_channel |
||||
|
||||
def forward(self, x): |
||||
outs = [] |
||||
x = self.stem(x) |
||||
for i, layer_name in enumerate(self.layers): |
||||
layer = getattr(self, layer_name) |
||||
x = layer(x) |
||||
if i in self.out_indices: |
||||
outs.append(x) |
||||
return outs |
||||
|
||||
|
||||
class Attention(nn.Layer): |
||||
def __init__(self, |
||||
dim, |
||||
key_dim, |
||||
num_heads, |
||||
attn_ratio=4, |
||||
activation=None, |
||||
lr_mult=1.0): |
||||
super().__init__() |
||||
self.num_heads = num_heads |
||||
self.scale = key_dim**-0.5 |
||||
self.key_dim = key_dim |
||||
self.nh_kd = nh_kd = key_dim * num_heads |
||||
self.d = int(attn_ratio * key_dim) |
||||
self.dh = int(attn_ratio * key_dim) * num_heads |
||||
self.attn_ratio = attn_ratio |
||||
|
||||
self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult) |
||||
self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult) |
||||
self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult) |
||||
|
||||
self.proj = nn.Sequential( |
||||
activation(), |
||||
Conv2DBN( |
||||
self.dh, dim, bn_weight_init=0, lr_mult=lr_mult)) |
||||
|
||||
def forward(self, x): |
||||
x_shape = paddle.shape(x) |
||||
H, W = x_shape[2], x_shape[3] |
||||
|
||||
qq = self.to_q(x).reshape( |
||||
[0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2]) |
||||
kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1]) |
||||
vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose( |
||||
[0, 1, 3, 2]) |
||||
|
||||
attn = paddle.matmul(qq, kk) |
||||
attn = F.softmax(attn, axis=-1) |
||||
|
||||
xx = paddle.matmul(attn, vv) |
||||
|
||||
xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W]) |
||||
xx = self.proj(xx) |
||||
return xx |
||||
|
||||
|
||||
class Block(nn.Layer): |
||||
def __init__(self, |
||||
dim, |
||||
key_dim, |
||||
num_heads, |
||||
mlp_ratios=4., |
||||
attn_ratio=2., |
||||
drop=0., |
||||
drop_path=0., |
||||
act_layer=nn.ReLU, |
||||
lr_mult=1.0): |
||||
super().__init__() |
||||
self.dim = dim |
||||
self.num_heads = num_heads |
||||
self.mlp_ratios = mlp_ratios |
||||
|
||||
self.attn = Attention( |
||||
dim, |
||||
key_dim=key_dim, |
||||
num_heads=num_heads, |
||||
attn_ratio=attn_ratio, |
||||
activation=act_layer, |
||||
lr_mult=lr_mult) |
||||
|
||||
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here |
||||
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() |
||||
mlp_hidden_dim = int(dim * mlp_ratios) |
||||
self.mlp = MLP(in_features=dim, |
||||
hidden_features=mlp_hidden_dim, |
||||
act_layer=act_layer, |
||||
drop=drop, |
||||
lr_mult=lr_mult) |
||||
|
||||
def forward(self, x): |
||||
h = x |
||||
x = self.attn(x) |
||||
x = self.drop_path(x) |
||||
x = h + x |
||||
|
||||
h = x |
||||
x = self.mlp(x) |
||||
x = self.drop_path(x) |
||||
x = x + h |
||||
return x |
||||
|
||||
|
||||
class BasicLayer(nn.Layer): |
||||
def __init__(self, |
||||
block_num, |
||||
embedding_dim, |
||||
key_dim, |
||||
num_heads, |
||||
mlp_ratios=4., |
||||
attn_ratio=2., |
||||
drop=0., |
||||
attn_drop=0., |
||||
drop_path=0., |
||||
act_layer=None, |
||||
lr_mult=1.0): |
||||
super().__init__() |
||||
self.block_num = block_num |
||||
|
||||
self.transformer_blocks = nn.LayerList() |
||||
for i in range(self.block_num): |
||||
self.transformer_blocks.append( |
||||
Block( |
||||
embedding_dim, |
||||
key_dim=key_dim, |
||||
num_heads=num_heads, |
||||
mlp_ratios=mlp_ratios, |
||||
attn_ratio=attn_ratio, |
||||
drop=drop, |
||||
drop_path=drop_path[i] |
||||
if isinstance(drop_path, list) else drop_path, |
||||
act_layer=act_layer, |
||||
lr_mult=lr_mult)) |
||||
|
||||
def forward(self, x): |
||||
# token * N |
||||
for i in range(self.block_num): |
||||
x = self.transformer_blocks[i](x) |
||||
return x |
||||
|
||||
|
||||
class PyramidPoolAgg(nn.Layer): |
||||
def __init__(self, stride): |
||||
super().__init__() |
||||
self.stride = stride |
||||
self.tmp = Identity() # avoid the error of paddle.flops |
||||
|
||||
def forward(self, inputs): |
||||
''' |
||||
# The F.adaptive_avg_pool2d does not support the (H, W) be Tensor, |
||||
# so exporting the inference model will raise error. |
||||
_, _, H, W = inputs[-1].shape |
||||
H = (H - 1) // self.stride + 1 |
||||
W = (W - 1) // self.stride + 1 |
||||
return paddle.concat( |
||||
[F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1) |
||||
''' |
||||
out = [] |
||||
ks = 2**len(inputs) |
||||
stride = self.stride**len(inputs) |
||||
for x in inputs: |
||||
x = F.avg_pool2d(x, int(ks), int(stride)) |
||||
ks /= 2 |
||||
stride /= 2 |
||||
out.append(x) |
||||
out = paddle.concat(out, axis=1) |
||||
return out |
||||
|
||||
|
||||
class InjectionMultiSum(nn.Layer): |
||||
def __init__(self, in_channels, out_channels, activations=None, |
||||
lr_mult=1.0): |
||||
super(InjectionMultiSum, self).__init__() |
||||
|
||||
self.local_embedding = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult) |
||||
self.global_embedding = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult) |
||||
self.global_act = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, lr_mult=lr_mult) |
||||
self.act = HSigmoid() |
||||
|
||||
def forward(self, x_low, x_global): |
||||
xl_hw = paddle.shape(x_low)[2:] |
||||
local_feat = self.local_embedding(x_low) |
||||
|
||||
global_act = self.global_act(x_global) |
||||
sig_act = F.interpolate( |
||||
self.act(global_act), xl_hw, mode='bilinear', align_corners=False) |
||||
|
||||
global_feat = self.global_embedding(x_global) |
||||
global_feat = F.interpolate( |
||||
global_feat, xl_hw, mode='bilinear', align_corners=False) |
||||
|
||||
out = local_feat * sig_act + global_feat |
||||
return out |
||||
|
||||
|
||||
class InjectionMultiSumCBR(nn.Layer): |
||||
def __init__(self, in_channels, out_channels, activations=None): |
||||
''' |
||||
local_embedding: conv-bn-relu |
||||
global_embedding: conv-bn-relu |
||||
global_act: conv |
||||
''' |
||||
super(InjectionMultiSumCBR, self).__init__() |
||||
|
||||
self.local_embedding = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1) |
||||
self.global_embedding = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1) |
||||
self.global_act = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, norm=None, act=None) |
||||
self.act = HSigmoid() |
||||
|
||||
def forward(self, x_low, x_global): |
||||
xl_hw = paddle.shape(x)[2:] |
||||
local_feat = self.local_embedding(x_low) |
||||
# kernel |
||||
global_act = self.global_act(x_global) |
||||
global_act = F.interpolate( |
||||
self.act(global_act), xl_hw, mode='bilinear', align_corners=False) |
||||
# feat_h |
||||
global_feat = self.global_embedding(x_global) |
||||
global_feat = F.interpolate( |
||||
global_feat, xl_hw, mode='bilinear', align_corners=False) |
||||
out = local_feat * global_act + global_feat |
||||
return out |
||||
|
||||
|
||||
class FuseBlockSum(nn.Layer): |
||||
def __init__(self, in_channels, out_channels, activations=None): |
||||
super(FuseBlockSum, self).__init__() |
||||
|
||||
self.fuse1 = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, act=None) |
||||
self.fuse2 = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, act=None) |
||||
|
||||
def forward(self, x_low, x_high): |
||||
xl_hw = paddle.shape(x)[2:] |
||||
inp = self.fuse1(x_low) |
||||
kernel = self.fuse2(x_high) |
||||
feat_h = F.interpolate( |
||||
kernel, xl_hw, mode='bilinear', align_corners=False) |
||||
out = inp + feat_h |
||||
return out |
||||
|
||||
|
||||
class FuseBlockMulti(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
stride=1, |
||||
activations=None, ): |
||||
super(FuseBlockMulti, self).__init__() |
||||
assert stride in [1, 2], "The stride should be 1 or 2." |
||||
|
||||
self.fuse1 = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, act=None) |
||||
self.fuse2 = ConvBNAct( |
||||
in_channels, out_channels, kernel_size=1, act=None) |
||||
self.act = HSigmoid() |
||||
|
||||
def forward(self, x_low, x_high): |
||||
xl_hw = paddle.shape(x)[2:] |
||||
inp = self.fuse1(x_low) |
||||
sig_act = self.fuse2(x_high) |
||||
sig_act = F.interpolate( |
||||
self.act(sig_act), xl_hw, mode='bilinear', align_corners=False) |
||||
out = inp * sig_act |
||||
return out |
||||
|
||||
|
||||
SIM_BLOCK = { |
||||
"fuse_sum": FuseBlockSum, |
||||
"fuse_multi": FuseBlockMulti, |
||||
"multi_sum": InjectionMultiSum, |
||||
"multi_sum_cbr": InjectionMultiSumCBR, |
||||
} |
||||
|
||||
|
||||
class TopTransformer(nn.Layer): |
||||
def __init__(self, |
||||
cfgs, |
||||
injection_out_channels, |
||||
encoder_out_indices, |
||||
trans_out_indices=[1, 2, 3], |
||||
depths=4, |
||||
key_dim=16, |
||||
num_heads=8, |
||||
attn_ratios=2, |
||||
mlp_ratios=2, |
||||
c2t_stride=2, |
||||
drop_path_rate=0., |
||||
act_layer=nn.ReLU6, |
||||
injection_type="muli_sum", |
||||
injection=True, |
||||
lr_mult=1.0, |
||||
in_channels=3, |
||||
pretrained=None): |
||||
super().__init__() |
||||
self.feat_channels = [ |
||||
c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices |
||||
] |
||||
self.injection_out_channels = injection_out_channels |
||||
self.injection = injection |
||||
self.embed_dim = sum(self.feat_channels) |
||||
self.trans_out_indices = trans_out_indices |
||||
|
||||
self.tpm = TokenPyramidModule( |
||||
cfgs=cfgs, |
||||
out_indices=encoder_out_indices, |
||||
in_channels=in_channels, |
||||
lr_mult=lr_mult) |
||||
self.ppa = PyramidPoolAgg(stride=c2t_stride) |
||||
|
||||
dpr = [x.item() for x in \ |
||||
paddle.linspace(0, drop_path_rate, depths)] |
||||
self.trans = BasicLayer( |
||||
block_num=depths, |
||||
embedding_dim=self.embed_dim, |
||||
key_dim=key_dim, |
||||
num_heads=num_heads, |
||||
mlp_ratios=mlp_ratios, |
||||
attn_ratio=attn_ratios, |
||||
drop=0, |
||||
attn_drop=0, |
||||
drop_path=dpr, |
||||
act_layer=act_layer, |
||||
lr_mult=lr_mult) |
||||
|
||||
self.SIM = nn.LayerList() |
||||
inj_module = SIM_BLOCK[injection_type] |
||||
if self.injection: |
||||
for i in range(len(self.feat_channels)): |
||||
if i in trans_out_indices: |
||||
self.SIM.append( |
||||
inj_module( |
||||
self.feat_channels[i], |
||||
injection_out_channels[i], |
||||
activations=act_layer, |
||||
lr_mult=lr_mult)) |
||||
else: |
||||
self.SIM.append(Identity()) |
||||
|
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def forward(self, x): |
||||
ouputs = self.tpm(x) |
||||
out = self.ppa(ouputs) |
||||
out = self.trans(out) |
||||
|
||||
if self.injection: |
||||
xx = out.split(self.feat_channels, axis=1) |
||||
results = [] |
||||
for i in range(len(self.feat_channels)): |
||||
if i in self.trans_out_indices: |
||||
local_tokens = ouputs[i] |
||||
global_semantics = xx[i] |
||||
out_ = self.SIM[i](local_tokens, global_semantics) |
||||
results.append(out_) |
||||
return results |
||||
else: |
||||
ouputs.append(out) |
||||
return ouputs |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def TopTransformer_Base(**kwargs): |
||||
cfgs = [ |
||||
# k, t, c, s |
||||
[3, 1, 16, 1], # 1/2 |
||||
[3, 4, 32, 2], # 1/4 1 |
||||
[3, 3, 32, 1], # |
||||
[5, 3, 64, 2], # 1/8 3 |
||||
[5, 3, 64, 1], # |
||||
[3, 3, 128, 2], # 1/16 5 |
||||
[3, 3, 128, 1], # |
||||
[5, 6, 160, 2], # 1/32 7 |
||||
[5, 6, 160, 1], # |
||||
[3, 6, 160, 1], # |
||||
] |
||||
|
||||
model = TopTransformer( |
||||
cfgs=cfgs, |
||||
injection_out_channels=[None, 256, 256, 256], |
||||
encoder_out_indices=[2, 4, 6, 9], |
||||
trans_out_indices=[1, 2, 3], |
||||
depths=4, |
||||
key_dim=16, |
||||
num_heads=8, |
||||
attn_ratios=2, |
||||
mlp_ratios=2, |
||||
c2t_stride=2, |
||||
drop_path_rate=0., |
||||
act_layer=nn.ReLU6, |
||||
injection_type="multi_sum", |
||||
injection=True, |
||||
**kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def TopTransformer_Small(**kwargs): |
||||
cfgs = [ |
||||
# k, t, c, s |
||||
[3, 1, 16, 1], # 1/2 |
||||
[3, 4, 24, 2], # 1/4 1 |
||||
[3, 3, 24, 1], # |
||||
[5, 3, 48, 2], # 1/8 3 |
||||
[5, 3, 48, 1], # |
||||
[3, 3, 96, 2], # 1/16 5 |
||||
[3, 3, 96, 1], # |
||||
[5, 6, 128, 2], # 1/32 7 |
||||
[5, 6, 128, 1], # |
||||
[3, 6, 128, 1], # |
||||
] |
||||
|
||||
model = TopTransformer( |
||||
cfgs=cfgs, |
||||
injection_out_channels=[None, 192, 192, 192], |
||||
encoder_out_indices=[2, 4, 6, 9], |
||||
trans_out_indices=[1, 2, 3], |
||||
depths=4, |
||||
key_dim=16, |
||||
num_heads=6, |
||||
attn_ratios=2, |
||||
mlp_ratios=2, |
||||
c2t_stride=2, |
||||
drop_path_rate=0., |
||||
act_layer=nn.ReLU6, |
||||
injection_type="multi_sum", |
||||
injection=True, |
||||
**kwargs) |
||||
return model |
||||
|
||||
|
||||
@manager.BACKBONES.add_component |
||||
def TopTransformer_Tiny(**kwargs): |
||||
cfgs = [ |
||||
# k, t, c, s |
||||
[3, 1, 16, 1], # 1/2 |
||||
[3, 4, 16, 2], # 1/4 1 |
||||
[3, 3, 16, 1], # |
||||
[5, 3, 32, 2], # 1/8 3 |
||||
[5, 3, 32, 1], # |
||||
[3, 3, 64, 2], # 1/16 5 |
||||
[3, 3, 64, 1], # |
||||
[5, 6, 96, 2], # 1/32 7 |
||||
[5, 6, 96, 1], # |
||||
] |
||||
|
||||
model = TopTransformer( |
||||
cfgs=cfgs, |
||||
injection_out_channels=[None, 128, 128, 128], |
||||
encoder_out_indices=[2, 4, 6, 8], |
||||
trans_out_indices=[1, 2, 3], |
||||
depths=4, |
||||
key_dim=16, |
||||
num_heads=4, |
||||
attn_ratios=2, |
||||
mlp_ratios=2, |
||||
c2t_stride=2, |
||||
drop_path_rate=0., |
||||
act_layer=nn.ReLU6, |
||||
injection_type="multi_sum", |
||||
injection=True, |
||||
**kwargs) |
||||
return model |
@ -0,0 +1,174 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.utils import utils |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class CCNet(nn.Layer): |
||||
""" |
||||
The CCNet implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to |
||||
Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation" |
||||
(https://arxiv.org/abs/1811.11721) |
||||
|
||||
Args: |
||||
num_classes (int): The unique number of target classes. |
||||
backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd. |
||||
backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3). |
||||
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. |
||||
dropout_prob (float, optional): The probability of dropout. Default: 0.0. |
||||
recurrence (int, optional): The number of recurrent operations. Defautl: 1. |
||||
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, |
||||
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
backbone_indices=(2, 3), |
||||
enable_auxiliary_loss=True, |
||||
dropout_prob=0.0, |
||||
recurrence=1, |
||||
align_corners=False, |
||||
pretrained=None): |
||||
super().__init__() |
||||
self.enable_auxiliary_loss = enable_auxiliary_loss |
||||
self.recurrence = recurrence |
||||
self.align_corners = align_corners |
||||
|
||||
self.backbone = backbone |
||||
self.backbone_indices = backbone_indices |
||||
backbone_channels = [ |
||||
backbone.feat_channels[i] for i in backbone_indices |
||||
] |
||||
|
||||
if enable_auxiliary_loss: |
||||
self.aux_head = layers.AuxLayer( |
||||
backbone_channels[0], |
||||
512, |
||||
num_classes, |
||||
dropout_prob=dropout_prob) |
||||
self.head = RCCAModule( |
||||
backbone_channels[1], |
||||
512, |
||||
num_classes, |
||||
dropout_prob=dropout_prob, |
||||
recurrence=recurrence) |
||||
self.pretrained = pretrained |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def forward(self, x): |
||||
feat_list = self.backbone(x) |
||||
logit_list = [] |
||||
output = self.head(feat_list[self.backbone_indices[-1]]) |
||||
logit_list.append(output) |
||||
if self.training and self.enable_auxiliary_loss: |
||||
aux_out = self.aux_head(feat_list[self.backbone_indices[-2]]) |
||||
logit_list.append(aux_out) |
||||
return [ |
||||
F.interpolate( |
||||
logit, |
||||
paddle.shape(x)[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) for logit in logit_list |
||||
] |
||||
|
||||
|
||||
class RCCAModule(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
num_classes, |
||||
dropout_prob=0.1, |
||||
recurrence=1): |
||||
super().__init__() |
||||
inter_channels = in_channels // 4 |
||||
self.recurrence = recurrence |
||||
self.conva = layers.ConvBNLeakyReLU( |
||||
in_channels, inter_channels, 3, padding=1, bias_attr=False) |
||||
self.cca = CrissCrossAttention(inter_channels) |
||||
self.convb = layers.ConvBNLeakyReLU( |
||||
inter_channels, inter_channels, 3, padding=1, bias_attr=False) |
||||
self.out = layers.AuxLayer( |
||||
in_channels + inter_channels, |
||||
out_channels, |
||||
num_classes, |
||||
dropout_prob=dropout_prob) |
||||
|
||||
def forward(self, x): |
||||
feat = self.conva(x) |
||||
for i in range(self.recurrence): |
||||
feat = self.cca(feat) |
||||
feat = self.convb(feat) |
||||
output = self.out(paddle.concat([x, feat], axis=1)) |
||||
return output |
||||
|
||||
|
||||
class CrissCrossAttention(nn.Layer): |
||||
def __init__(self, in_channels): |
||||
super().__init__() |
||||
self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1) |
||||
self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1) |
||||
self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1) |
||||
self.softmax = nn.Softmax(axis=3) |
||||
self.gamma = self.create_parameter( |
||||
shape=(1, ), default_initializer=nn.initializer.Constant(0)) |
||||
self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf')) |
||||
|
||||
def forward(self, x): |
||||
b, c, h, w = paddle.shape(x) |
||||
proj_q = self.q_conv(x) |
||||
proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape( |
||||
[b * w, -1, h]).transpose([0, 2, 1]) |
||||
proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape( |
||||
[b * h, -1, w]).transpose([0, 2, 1]) |
||||
|
||||
proj_k = self.k_conv(x) |
||||
proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h]) |
||||
proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w]) |
||||
|
||||
proj_v = self.v_conv(x) |
||||
proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h]) |
||||
proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w]) |
||||
|
||||
energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape( |
||||
[b, w, h, h]).transpose([0, 2, 1, 3]) |
||||
energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w]) |
||||
concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3)) |
||||
|
||||
attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape( |
||||
[b * w, h, h]) |
||||
attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w]) |
||||
out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape( |
||||
[b, w, -1, h]).transpose([0, 2, 3, 1]) |
||||
out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape( |
||||
[b, h, -1, w]).transpose([0, 2, 1, 3]) |
||||
return self.gamma * (out_h + out_w) + x |
||||
|
||||
def Inf(self, B, H, W): |
||||
return -paddle.tile( |
||||
paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0), |
||||
[B * W, 1, 1]) |
@ -0,0 +1,403 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager, param_init |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.utils import utils |
||||
|
||||
|
||||
class DualResNet(nn.Layer): |
||||
""" |
||||
The DDRNet implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to |
||||
Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes" |
||||
(https://arxiv.org/abs/2101.06085) |
||||
|
||||
Args: |
||||
num_classes (int): The unique number of target classes. |
||||
in_channels (int, optional): Number of input channels. Default: 3. |
||||
block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2]. |
||||
planes (int): Base channels in network. Default: 64. |
||||
spp_planes (int): Branch channels for DAPPM. Default: 128. |
||||
head_planes (int): Mid channels of segmentation head. Default: 128. |
||||
enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
in_channels=3, |
||||
block_layers=[2, 2, 2, 2], |
||||
planes=64, |
||||
spp_planes=128, |
||||
head_planes=128, |
||||
enable_auxiliary_loss=False, |
||||
pretrained=None): |
||||
super().__init__() |
||||
highres_planes = planes * 2 |
||||
self.enable_auxiliary_loss = enable_auxiliary_loss |
||||
self.conv1 = nn.Sequential( |
||||
layers.ConvBNReLU( |
||||
in_channels, planes, kernel_size=3, stride=2, padding=1), |
||||
layers.ConvBNReLU( |
||||
planes, planes, kernel_size=3, stride=2, padding=1), ) |
||||
self.relu = nn.ReLU() |
||||
self.layer1 = self._make_layers(BasicBlock, planes, planes, |
||||
block_layers[0]) |
||||
self.layer2 = self._make_layers( |
||||
BasicBlock, planes, planes * 2, block_layers[1], stride=2) |
||||
self.layer3 = self._make_layers( |
||||
BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2) |
||||
self.layer4 = self._make_layers( |
||||
BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2) |
||||
|
||||
self.compression3 = layers.ConvBN( |
||||
planes * 4, highres_planes, kernel_size=1, bias_attr=False) |
||||
|
||||
self.compression4 = layers.ConvBN( |
||||
planes * 8, highres_planes, kernel_size=1, bias_attr=False) |
||||
|
||||
self.down3 = layers.ConvBN( |
||||
highres_planes, |
||||
planes * 4, |
||||
kernel_size=3, |
||||
stride=2, |
||||
bias_attr=False) |
||||
|
||||
self.down4 = nn.Sequential( |
||||
layers.ConvBNReLU( |
||||
highres_planes, |
||||
planes * 4, |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
bias_attr=False), |
||||
layers.ConvBN( |
||||
planes * 4, |
||||
planes * 8, |
||||
kernel_size=3, |
||||
stride=2, |
||||
padding=1, |
||||
bias_attr=False)) |
||||
|
||||
self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes, |
||||
2) |
||||
self.layer4_ = self._make_layers(BasicBlock, highres_planes, |
||||
highres_planes, 2) |
||||
self.layer5_ = self._make_layers(Bottleneck, highres_planes, |
||||
highres_planes, 1) |
||||
self.layer5 = self._make_layers( |
||||
Bottleneck, planes * 8, planes * 8, 1, stride=2) |
||||
|
||||
self.spp = DAPPM(planes * 16, spp_planes, planes * 4) |
||||
if self.enable_auxiliary_loss: |
||||
self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes) |
||||
self.head = DDRNetHead(planes * 4, head_planes, num_classes) |
||||
|
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
else: |
||||
for m in self.sublayers(): |
||||
if isinstance(m, nn.Conv2D): |
||||
param_init.kaiming_normal_init(m.weight) |
||||
elif isinstance(m, nn.BatchNorm2D): |
||||
param_init.constant_init(m.weight, value=1) |
||||
param_init.constant_init(m.bias, value=0) |
||||
|
||||
def _make_layers(self, block, inplanes, planes, blocks, stride=1): |
||||
downsample = None |
||||
if stride != 1 or inplanes != planes * block.expansion: |
||||
downsample = nn.Sequential( |
||||
nn.Conv2D( |
||||
inplanes, |
||||
planes * block.expansion, |
||||
kernel_size=1, |
||||
stride=stride, |
||||
bias_attr=False), |
||||
nn.BatchNorm2D(planes * block.expansion), ) |
||||
layers = [] |
||||
layers.append(block(inplanes, planes, stride, downsample)) |
||||
inplanes = planes * block.expansion |
||||
for i in range(1, blocks): |
||||
if i == (blocks - 1): |
||||
layers.append(block(inplanes, planes, stride=1, no_relu=True)) |
||||
else: |
||||
layers.append(block(inplanes, planes, stride=1, no_relu=False)) |
||||
return nn.Sequential(*layers) |
||||
|
||||
def forward(self, x): |
||||
n, c, h, w = paddle.shape(x) |
||||
width_output = w // 8 |
||||
height_output = h // 8 |
||||
|
||||
x = self.conv1(x) |
||||
stage1_out = self.layer1(x) |
||||
stage2_out = self.layer2(self.relu(stage1_out)) |
||||
stage3_out = self.layer3(self.relu(stage2_out)) |
||||
stage3_out_dual = self.layer3_(self.relu(stage2_out)) |
||||
x = stage3_out + self.down3(self.relu(stage3_out_dual)) |
||||
stage3_merge = stage3_out_dual + F.interpolate( |
||||
self.compression3(self.relu(stage3_out)), |
||||
size=[height_output, width_output], |
||||
mode='bilinear') |
||||
|
||||
stage4_out = self.layer4(self.relu(x)) |
||||
stage4_out_dual = self.layer4_(self.relu(stage3_merge)) |
||||
|
||||
x = stage4_out + self.down4(self.relu(stage4_out_dual)) |
||||
stage4_merge = stage4_out_dual + F.interpolate( |
||||
self.compression4(self.relu(stage4_out)), |
||||
size=[height_output, width_output], |
||||
mode='bilinear') |
||||
|
||||
stage5_out_dual = self.layer5_(self.relu(stage4_merge)) |
||||
x = F.interpolate( |
||||
self.spp(self.layer5(self.relu(x))), |
||||
size=[height_output, width_output], |
||||
mode='bilinear') |
||||
|
||||
output = self.head(x + stage5_out_dual) |
||||
logit_list = [] |
||||
logit_list.append(output) |
||||
|
||||
if self.enable_auxiliary_loss: |
||||
aux_out = self.aux_head(stage3_merge) |
||||
logit_list.append(aux_out) |
||||
return [ |
||||
F.interpolate( |
||||
logit, [h, w], mode='bilinear') for logit in logit_list |
||||
] |
||||
|
||||
|
||||
class BasicBlock(nn.Layer): |
||||
expansion = 1 |
||||
|
||||
def __init__(self, |
||||
inplanes, |
||||
planes, |
||||
stride=1, |
||||
downsample=None, |
||||
no_relu=False): |
||||
super().__init__() |
||||
self.conv_bn_relu = layers.ConvBNReLU( |
||||
inplanes, |
||||
planes, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
bias_attr=False) |
||||
self.relu = nn.ReLU() |
||||
self.conv_bn = layers.ConvBN( |
||||
planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False) |
||||
self.downsample = downsample |
||||
self.stride = stride |
||||
self.no_relu = no_relu |
||||
|
||||
def forward(self, x): |
||||
residual = x |
||||
out = self.conv_bn_relu(x) |
||||
out = self.conv_bn(out) |
||||
if self.downsample is not None: |
||||
residual = self.downsample(x) |
||||
out += residual |
||||
if self.no_relu: |
||||
return out |
||||
else: |
||||
return self.relu(out) |
||||
|
||||
|
||||
class Bottleneck(nn.Layer): |
||||
expansion = 2 |
||||
|
||||
def __init__(self, |
||||
inplanes, |
||||
planes, |
||||
stride=1, |
||||
downsample=None, |
||||
no_relu=True): |
||||
super().__init__() |
||||
self.conv_bn_relu1 = layers.ConvBNReLU( |
||||
inplanes, planes, kernel_size=1, bias_attr=False) |
||||
self.conv_bn_relu2 = layers.ConvBNReLU( |
||||
planes, |
||||
planes, |
||||
kernel_size=3, |
||||
stride=stride, |
||||
padding=1, |
||||
bias_attr=False) |
||||
self.conv_bn = layers.ConvBN( |
||||
planes, planes * self.expansion, kernel_size=1, bias_attr=False) |
||||
self.relu = nn.ReLU() |
||||
self.downsample = downsample |
||||
self.stride = stride |
||||
self.no_relu = no_relu |
||||
|
||||
def forward(self, x): |
||||
residual = x |
||||
out = self.conv_bn_relu1(x) |
||||
out = self.conv_bn_relu2(out) |
||||
out = self.conv_bn(out) |
||||
if self.downsample is not None: |
||||
residual = self.downsample(x) |
||||
out += residual |
||||
if self.no_relu: |
||||
return out |
||||
else: |
||||
return self.relu(out) |
||||
|
||||
|
||||
class DAPPM(nn.Layer): |
||||
def __init__(self, inplanes, branch_planes, outplanes): |
||||
super().__init__() |
||||
self.scale1 = nn.Sequential( |
||||
nn.AvgPool2D( |
||||
kernel_size=5, stride=2, padding=2), |
||||
layers.SyncBatchNorm(inplanes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
inplanes, branch_planes, kernel_size=1, bias_attr=False), ) |
||||
self.scale2 = nn.Sequential( |
||||
nn.AvgPool2D( |
||||
kernel_size=9, stride=4, padding=4), |
||||
layers.SyncBatchNorm(inplanes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
inplanes, branch_planes, kernel_size=1, bias_attr=False), ) |
||||
self.scale3 = nn.Sequential( |
||||
nn.AvgPool2D( |
||||
kernel_size=17, stride=8, padding=8), |
||||
layers.SyncBatchNorm(inplanes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
inplanes, branch_planes, kernel_size=1, bias_attr=False), ) |
||||
self.scale4 = nn.Sequential( |
||||
nn.AdaptiveAvgPool2D((1, 1)), |
||||
layers.SyncBatchNorm(inplanes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
inplanes, branch_planes, kernel_size=1, bias_attr=False), ) |
||||
self.scale0 = nn.Sequential( |
||||
layers.SyncBatchNorm(inplanes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
inplanes, branch_planes, kernel_size=1, bias_attr=False), ) |
||||
self.process1 = nn.Sequential( |
||||
layers.SyncBatchNorm(branch_planes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
branch_planes, |
||||
branch_planes, |
||||
kernel_size=3, |
||||
padding=1, |
||||
bias_attr=False), ) |
||||
self.process2 = nn.Sequential( |
||||
layers.SyncBatchNorm(branch_planes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
branch_planes, |
||||
branch_planes, |
||||
kernel_size=3, |
||||
padding=1, |
||||
bias_attr=False), ) |
||||
self.process3 = nn.Sequential( |
||||
layers.SyncBatchNorm(branch_planes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
branch_planes, |
||||
branch_planes, |
||||
kernel_size=3, |
||||
padding=1, |
||||
bias_attr=False), ) |
||||
self.process4 = nn.Sequential( |
||||
layers.SyncBatchNorm(branch_planes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
branch_planes, |
||||
branch_planes, |
||||
kernel_size=3, |
||||
padding=1, |
||||
bias_attr=False), ) |
||||
self.compression = nn.Sequential( |
||||
layers.SyncBatchNorm(branch_planes * 5), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
branch_planes * 5, outplanes, kernel_size=1, bias_attr=False)) |
||||
self.shortcut = nn.Sequential( |
||||
layers.SyncBatchNorm(inplanes), |
||||
nn.ReLU(), |
||||
nn.Conv2D( |
||||
inplanes, outplanes, kernel_size=1, bias_attr=False)) |
||||
|
||||
def forward(self, x): |
||||
n, c, h, w = paddle.shape(x) |
||||
x0 = self.scale0(x) |
||||
x1 = self.process1( |
||||
F.interpolate( |
||||
self.scale1(x), size=[h, w], mode='bilinear') + x0) |
||||
x2 = self.process2( |
||||
F.interpolate( |
||||
self.scale2(x), size=[h, w], mode='bilinear') + x1) |
||||
x3 = self.process3( |
||||
F.interpolate( |
||||
self.scale3(x), size=[h, w], mode='bilinear') + x2) |
||||
x4 = self.process4( |
||||
F.interpolate( |
||||
self.scale4(x), size=[h, w], mode='bilinear') + x3) |
||||
|
||||
out = self.compression(paddle.concat([x0, x1, x2, x3, x4], |
||||
1)) + self.shortcut(x) |
||||
return out |
||||
|
||||
|
||||
class DDRNetHead(nn.Layer): |
||||
def __init__(self, inplanes, interplanes, outplanes, scale_factor=None): |
||||
super().__init__() |
||||
self.bn1 = nn.BatchNorm2D(inplanes) |
||||
self.relu = nn.ReLU() |
||||
self.conv_bn_relu = layers.ConvBNReLU( |
||||
inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False) |
||||
self.conv = nn.Conv2D( |
||||
interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True) |
||||
|
||||
self.scale_factor = scale_factor |
||||
|
||||
def forward(self, x): |
||||
x = self.bn1(x) |
||||
x = self.relu(x) |
||||
x = self.conv_bn_relu(x) |
||||
out = self.conv(x) |
||||
|
||||
if self.scale_factor is not None: |
||||
out = F.interpolate( |
||||
out, scale_factor=self.scale_factor, mode='bilinear') |
||||
return out |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
def DDRNet_23(**kwargs): |
||||
return DualResNet( |
||||
block_layers=[2, 2, 2, 2], |
||||
planes=64, |
||||
spp_planes=128, |
||||
head_planes=128, |
||||
**kwargs) |
@ -0,0 +1,198 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.utils import utils |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class GloRe(nn.Layer): |
||||
""" |
||||
The GloRe implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to: |
||||
Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" |
||||
(https://arxiv.org/pdf/1811.12814.pdf) |
||||
|
||||
Args: |
||||
num_classes (int): The unique number of target classes. |
||||
backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. |
||||
backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. |
||||
gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512. |
||||
gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128. |
||||
gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128. |
||||
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. |
||||
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, |
||||
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
backbone_indices=(2, 3), |
||||
gru_channels=512, |
||||
gru_num_state=128, |
||||
gru_num_node=64, |
||||
enable_auxiliary_loss=True, |
||||
align_corners=False, |
||||
pretrained=None): |
||||
super().__init__() |
||||
|
||||
self.backbone = backbone |
||||
backbone_channels = [ |
||||
backbone.feat_channels[i] for i in backbone_indices |
||||
] |
||||
|
||||
self.head = GloReHead(num_classes, backbone_indices, backbone_channels, |
||||
gru_channels, gru_num_state, gru_num_node, |
||||
enable_auxiliary_loss) |
||||
self.align_corners = align_corners |
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def forward(self, x): |
||||
feat_list = self.backbone(x) |
||||
logit_list = self.head(feat_list) |
||||
return [ |
||||
F.interpolate( |
||||
logit, |
||||
paddle.shape(x)[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) for logit in logit_list |
||||
] |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
|
||||
class GloReHead(nn.Layer): |
||||
def __init__(self, |
||||
num_classes, |
||||
backbone_indices, |
||||
backbone_channels, |
||||
gru_channels=512, |
||||
gru_num_state=128, |
||||
gru_num_node=64, |
||||
enable_auxiliary_loss=True): |
||||
super().__init__() |
||||
|
||||
in_channels = backbone_channels[1] |
||||
self.conv_bn_relu = layers.ConvBNReLU( |
||||
in_channels, gru_channels, 1, bias_attr=False) |
||||
self.gru_module = GruModule( |
||||
num_input=gru_channels, |
||||
num_state=gru_num_state, |
||||
num_node=gru_num_node) |
||||
|
||||
self.dropout = nn.Dropout(0.1) |
||||
self.classifier = nn.Conv2D(512, num_classes, kernel_size=1) |
||||
self.auxlayer = layers.AuxLayer( |
||||
in_channels=backbone_channels[0], |
||||
inter_channels=backbone_channels[0] // 4, |
||||
out_channels=num_classes) |
||||
|
||||
self.backbone_indices = backbone_indices |
||||
self.enable_auxiliary_loss = enable_auxiliary_loss |
||||
|
||||
def forward(self, feat_list): |
||||
|
||||
logit_list = [] |
||||
x = feat_list[self.backbone_indices[1]] |
||||
|
||||
feature = self.conv_bn_relu(x) |
||||
gru_output = self.gru_module(feature) |
||||
output = self.dropout(gru_output) |
||||
logit = self.classifier(output) |
||||
logit_list.append(logit) |
||||
|
||||
if self.enable_auxiliary_loss: |
||||
low_level_feat = feat_list[self.backbone_indices[0]] |
||||
auxiliary_logit = self.auxlayer(low_level_feat) |
||||
logit_list.append(auxiliary_logit) |
||||
|
||||
return logit_list |
||||
|
||||
|
||||
class GCN(nn.Layer): |
||||
def __init__(self, num_state, num_node, bias=False): |
||||
super(GCN, self).__init__() |
||||
self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1) |
||||
self.relu = nn.ReLU() |
||||
self.conv2 = nn.Conv1D( |
||||
num_state, num_state, kernel_size=1, bias_attr=bias) |
||||
|
||||
def forward(self, x): |
||||
h = self.conv1(paddle.transpose(x, perm=(0, 2, 1))) |
||||
h = paddle.transpose(h, perm=(0, 2, 1)) |
||||
h = h + x |
||||
h = self.relu(self.conv2(h)) |
||||
return h |
||||
|
||||
|
||||
class GruModule(nn.Layer): |
||||
def __init__(self, |
||||
num_input=512, |
||||
num_state=128, |
||||
num_node=64, |
||||
normalize=False): |
||||
super(GruModule, self).__init__() |
||||
self.normalize = normalize |
||||
self.num_state = num_state |
||||
self.num_node = num_node |
||||
self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1) |
||||
self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1) |
||||
self.gcn = GCN(num_state=self.num_state, num_node=self.num_node) |
||||
self.extend_dim = nn.Conv2D( |
||||
self.num_state, num_input, kernel_size=1, bias_attr=False) |
||||
self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4) |
||||
|
||||
def forward(self, input): |
||||
n, c, h, w = input.shape |
||||
# B, C, H, W |
||||
reduction_dim = self.reduction_dim(input) |
||||
# B, N, H, W |
||||
mat_B = self.projection_mat(input) |
||||
# B, C, H*W |
||||
reshaped_reduction = paddle.reshape( |
||||
reduction_dim, shape=[n, self.num_state, h * w]) |
||||
# B, N, H*W |
||||
reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w]) |
||||
# B, N, H*W |
||||
reproject = reshaped_B |
||||
# B, C, N |
||||
node_state_V = paddle.matmul( |
||||
reshaped_reduction, paddle.transpose( |
||||
reshaped_B, perm=[0, 2, 1])) |
||||
|
||||
if self.normalize: |
||||
node_state_V = node_state_V * (1. / reshaped_reduction.shape[2]) |
||||
|
||||
# B, C, N |
||||
gcn_out = self.gcn(node_state_V) |
||||
# B, C, H*W |
||||
Y = paddle.matmul(gcn_out, reproject) |
||||
# B, C, H, W |
||||
Y = paddle.reshape(Y, shape=[n, self.num_state, h, w]) |
||||
Y_extend = self.extend_dim(Y) |
||||
Y_extend = self.extend_bn(Y_extend) |
||||
|
||||
out = input + Y_extend |
||||
return out |
@ -0,0 +1,285 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.nn.initializer import Constant |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper |
||||
|
||||
|
||||
class UAFM(nn.Layer): |
||||
""" |
||||
The base of Unified Attention Fusion Module. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__() |
||||
|
||||
self.conv_x = layers.ConvBNReLU( |
||||
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False) |
||||
self.conv_out = layers.ConvBNReLU( |
||||
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False) |
||||
self.resize_mode = resize_mode |
||||
|
||||
def check(self, x, y): |
||||
assert x.ndim == 4 and y.ndim == 4 |
||||
x_h, x_w = x.shape[2:] |
||||
y_h, y_w = y.shape[2:] |
||||
assert x_h >= y_h and x_w >= y_w |
||||
|
||||
def prepare(self, x, y): |
||||
x = self.prepare_x(x, y) |
||||
y = self.prepare_y(x, y) |
||||
return x, y |
||||
|
||||
def prepare_x(self, x, y): |
||||
x = self.conv_x(x) |
||||
return x |
||||
|
||||
def prepare_y(self, x, y): |
||||
y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode) |
||||
return y_up |
||||
|
||||
def fuse(self, x, y): |
||||
out = x + y |
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
def forward(self, x, y): |
||||
""" |
||||
Args: |
||||
x (Tensor): The low level feature. |
||||
y (Tensor): The high level feature. |
||||
""" |
||||
self.check(x, y) |
||||
x, y = self.prepare(x, y) |
||||
out = self.fuse(x, y) |
||||
return out |
||||
|
||||
|
||||
class UAFM_ChAtten(UAFM): |
||||
""" |
||||
The UAFM with channel attention, which uses mean and max values. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) |
||||
|
||||
self.conv_xy_atten = nn.Sequential( |
||||
layers.ConvBNAct( |
||||
4 * y_ch, |
||||
y_ch // 2, |
||||
kernel_size=1, |
||||
bias_attr=False, |
||||
act_type="leakyrelu"), |
||||
layers.ConvBN( |
||||
y_ch // 2, y_ch, kernel_size=1, bias_attr=False)) |
||||
|
||||
def fuse(self, x, y): |
||||
""" |
||||
Args: |
||||
x (Tensor): The low level feature. |
||||
y (Tensor): The high level feature. |
||||
""" |
||||
atten = helper.avg_max_reduce_hw([x, y], self.training) |
||||
atten = F.sigmoid(self.conv_xy_atten(atten)) |
||||
|
||||
out = x * atten + y * (1 - atten) |
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
|
||||
class UAFM_ChAtten_S(UAFM): |
||||
""" |
||||
The UAFM with channel attention, which uses mean values. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) |
||||
|
||||
self.conv_xy_atten = nn.Sequential( |
||||
layers.ConvBNAct( |
||||
2 * y_ch, |
||||
y_ch // 2, |
||||
kernel_size=1, |
||||
bias_attr=False, |
||||
act_type="leakyrelu"), |
||||
layers.ConvBN( |
||||
y_ch // 2, y_ch, kernel_size=1, bias_attr=False)) |
||||
|
||||
def fuse(self, x, y): |
||||
""" |
||||
Args: |
||||
x (Tensor): The low level feature. |
||||
y (Tensor): The high level feature. |
||||
""" |
||||
atten = helper.avg_reduce_hw([x, y]) |
||||
atten = F.sigmoid(self.conv_xy_atten(atten)) |
||||
|
||||
out = x * atten + y * (1 - atten) |
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
|
||||
class UAFM_SpAtten(UAFM): |
||||
""" |
||||
The UAFM with spatial attention, which uses mean and max values. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) |
||||
|
||||
self.conv_xy_atten = nn.Sequential( |
||||
layers.ConvBNReLU( |
||||
4, 2, kernel_size=3, padding=1, bias_attr=False), |
||||
layers.ConvBN( |
||||
2, 1, kernel_size=3, padding=1, bias_attr=False)) |
||||
self._scale = self.create_parameter( |
||||
shape=[1], |
||||
attr=ParamAttr(initializer=Constant(value=1.)), |
||||
dtype="float32") |
||||
self._scale.stop_gradient = True |
||||
|
||||
def fuse(self, x, y): |
||||
""" |
||||
Args: |
||||
x (Tensor): The low level feature. |
||||
y (Tensor): The high level feature. |
||||
""" |
||||
atten = helper.avg_max_reduce_channel([x, y]) |
||||
atten = F.sigmoid(self.conv_xy_atten(atten)) |
||||
|
||||
out = x * atten + y * (self._scale - atten) |
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
|
||||
class UAFM_SpAtten_S(UAFM): |
||||
""" |
||||
The UAFM with spatial attention, which uses mean values. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) |
||||
|
||||
self.conv_xy_atten = nn.Sequential( |
||||
layers.ConvBNReLU( |
||||
2, 2, kernel_size=3, padding=1, bias_attr=False), |
||||
layers.ConvBN( |
||||
2, 1, kernel_size=3, padding=1, bias_attr=False)) |
||||
|
||||
def fuse(self, x, y): |
||||
""" |
||||
Args: |
||||
x (Tensor): The low level feature. |
||||
y (Tensor): The high level feature. |
||||
""" |
||||
atten = helper.avg_reduce_channel([x, y]) |
||||
atten = F.sigmoid(self.conv_xy_atten(atten)) |
||||
|
||||
out = x * atten + y * (1 - atten) |
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
|
||||
class UAFMMobile(UAFM): |
||||
""" |
||||
Unified Attention Fusion Module for mobile. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) |
||||
|
||||
self.conv_x = layers.SeparableConvBNReLU( |
||||
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False) |
||||
self.conv_out = layers.SeparableConvBNReLU( |
||||
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False) |
||||
|
||||
|
||||
class UAFMMobile_SpAtten(UAFM): |
||||
""" |
||||
Unified Attention Fusion Module with spatial attention for mobile. |
||||
Args: |
||||
x_ch (int): The channel of x tensor, which is the low level feature. |
||||
y_ch (int): The channel of y tensor, which is the high level feature. |
||||
out_ch (int): The channel of output tensor. |
||||
ksize (int, optional): The kernel size of the conv for x tensor. Default: 3. |
||||
resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear. |
||||
""" |
||||
|
||||
def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'): |
||||
super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode) |
||||
|
||||
self.conv_x = layers.SeparableConvBNReLU( |
||||
x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False) |
||||
self.conv_out = layers.SeparableConvBNReLU( |
||||
y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False) |
||||
|
||||
self.conv_xy_atten = nn.Sequential( |
||||
layers.ConvBNReLU( |
||||
4, 2, kernel_size=3, padding=1, bias_attr=False), |
||||
layers.ConvBN( |
||||
2, 1, kernel_size=3, padding=1, bias_attr=False)) |
||||
|
||||
def fuse(self, x, y): |
||||
""" |
||||
Args: |
||||
x (Tensor): The low level feature. |
||||
y (Tensor): The high level feature. |
||||
""" |
||||
atten = helper.avg_max_reduce_channel([x, y]) |
||||
atten = F.sigmoid(self.conv_xy_atten(atten)) |
||||
|
||||
out = x * atten + y * (1 - atten) |
||||
out = self.conv_out(out) |
||||
return out |
@ -0,0 +1,133 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
|
||||
def avg_reduce_hw(x): |
||||
# Reduce hw by avg |
||||
# Return cat([avg_pool_0, avg_pool_1, ...]) |
||||
if not isinstance(x, (list, tuple)): |
||||
return F.adaptive_avg_pool2d(x, 1) |
||||
elif len(x) == 1: |
||||
return F.adaptive_avg_pool2d(x[0], 1) |
||||
else: |
||||
res = [] |
||||
for xi in x: |
||||
res.append(F.adaptive_avg_pool2d(xi, 1)) |
||||
return paddle.concat(res, axis=1) |
||||
|
||||
|
||||
def avg_max_reduce_hw_helper(x, is_training, use_concat=True): |
||||
assert not isinstance(x, (list, tuple)) |
||||
avg_pool = F.adaptive_avg_pool2d(x, 1) |
||||
# TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training. |
||||
if is_training: |
||||
max_pool = F.adaptive_max_pool2d(x, 1) |
||||
else: |
||||
max_pool = paddle.max(x, axis=[2, 3], keepdim=True) |
||||
|
||||
if use_concat: |
||||
res = paddle.concat([avg_pool, max_pool], axis=1) |
||||
else: |
||||
res = [avg_pool, max_pool] |
||||
return res |
||||
|
||||
|
||||
def avg_max_reduce_hw(x, is_training): |
||||
# Reduce hw by avg and max |
||||
# Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...]) |
||||
if not isinstance(x, (list, tuple)): |
||||
return avg_max_reduce_hw_helper(x, is_training) |
||||
elif len(x) == 1: |
||||
return avg_max_reduce_hw_helper(x[0], is_training) |
||||
else: |
||||
res_avg = [] |
||||
res_max = [] |
||||
for xi in x: |
||||
avg, max = avg_max_reduce_hw_helper(xi, is_training, False) |
||||
res_avg.append(avg) |
||||
res_max.append(max) |
||||
res = res_avg + res_max |
||||
return paddle.concat(res, axis=1) |
||||
|
||||
|
||||
def avg_reduce_channel(x): |
||||
# Reduce channel by avg |
||||
# Return cat([avg_ch_0, avg_ch_1, ...]) |
||||
if not isinstance(x, (list, tuple)): |
||||
return paddle.mean(x, axis=1, keepdim=True) |
||||
elif len(x) == 1: |
||||
return paddle.mean(x[0], axis=1, keepdim=True) |
||||
else: |
||||
res = [] |
||||
for xi in x: |
||||
res.append(paddle.mean(xi, axis=1, keepdim=True)) |
||||
return paddle.concat(res, axis=1) |
||||
|
||||
|
||||
def max_reduce_channel(x): |
||||
# Reduce channel by max |
||||
# Return cat([max_ch_0, max_ch_1, ...]) |
||||
if not isinstance(x, (list, tuple)): |
||||
return paddle.max(x, axis=1, keepdim=True) |
||||
elif len(x) == 1: |
||||
return paddle.max(x[0], axis=1, keepdim=True) |
||||
else: |
||||
res = [] |
||||
for xi in x: |
||||
res.append(paddle.max(xi, axis=1, keepdim=True)) |
||||
return paddle.concat(res, axis=1) |
||||
|
||||
|
||||
def avg_max_reduce_channel_helper(x, use_concat=True): |
||||
# Reduce hw by avg and max, only support single input |
||||
assert not isinstance(x, (list, tuple)) |
||||
mean_value = paddle.mean(x, axis=1, keepdim=True) |
||||
max_value = paddle.max(x, axis=1, keepdim=True) |
||||
|
||||
if use_concat: |
||||
res = paddle.concat([mean_value, max_value], axis=1) |
||||
else: |
||||
res = [mean_value, max_value] |
||||
return res |
||||
|
||||
|
||||
def avg_max_reduce_channel(x): |
||||
# Reduce hw by avg and max |
||||
# Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...]) |
||||
if not isinstance(x, (list, tuple)): |
||||
return avg_max_reduce_channel_helper(x) |
||||
elif len(x) == 1: |
||||
return avg_max_reduce_channel_helper(x[0]) |
||||
else: |
||||
res = [] |
||||
for xi in x: |
||||
res.extend(avg_max_reduce_channel_helper(xi, False)) |
||||
return paddle.concat(res, axis=1) |
||||
|
||||
|
||||
def cat_avg_max_reduce_channel(x): |
||||
# Reduce hw by cat+avg+max |
||||
assert isinstance(x, (list, tuple)) and len(x) > 1 |
||||
|
||||
x = paddle.concat(x, axis=1) |
||||
|
||||
mean_value = paddle.mean(x, axis=1, keepdim=True) |
||||
max_value = paddle.max(x, axis=1, keepdim=True) |
||||
res = paddle.concat([mean_value, max_value], axis=1) |
||||
|
||||
return res |
@ -0,0 +1,162 @@ |
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from functools import partial |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg import utils |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class LRASPP(nn.Layer): |
||||
""" |
||||
Semantic segmentation model with a light R-ASPP head. |
||||
|
||||
The original article refers to |
||||
Howard, Andrew, et al. "Searching for mobilenetv3." |
||||
(https://arxiv.org/pdf/1909.11065.pdf) |
||||
|
||||
Args: |
||||
num_classes (int): The number of target classes. |
||||
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must |
||||
has feat_channels, of which the length is 5. |
||||
backbone_indices (List(int), optional): The values indicate the indices of backbone output |
||||
used as the input of the LR-ASPP head. |
||||
Default: [0, 1, 3]. |
||||
lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head. |
||||
Default: [32, 64]. |
||||
lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head. |
||||
Default: 128 |
||||
resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head. |
||||
Default: bilinear. |
||||
use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use |
||||
a 49x49 kernel for average pooling. |
||||
Default: True. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
backbone_indices=[0, 1, 3], |
||||
lraspp_head_inter_chs=[32, 64], |
||||
lraspp_head_out_ch=128, |
||||
resize_mode='bilinear', |
||||
use_gap=True, |
||||
pretrained=None): |
||||
super().__init__() |
||||
|
||||
# backbone |
||||
assert hasattr(backbone, 'feat_channels'), \ |
||||
"The backbone should has feat_channels." |
||||
assert len(backbone.feat_channels) >= len(backbone_indices), \ |
||||
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ |
||||
f"greater than the length of feat_channels ({len(backbone.feat_channels)})." |
||||
assert len(backbone.feat_channels) > max(backbone_indices), \ |
||||
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ |
||||
f"less than the length of feat_channels ({len(backbone.feat_channels)})." |
||||
self.backbone = backbone |
||||
|
||||
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \ |
||||
"should not be lesser than 1" |
||||
|
||||
# head |
||||
assert len(backbone_indices) == len( |
||||
lraspp_head_inter_chs |
||||
) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs." |
||||
self.backbone_indices = backbone_indices |
||||
|
||||
self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels, |
||||
lraspp_head_inter_chs, lraspp_head_out_ch, |
||||
num_classes, resize_mode, use_gap) |
||||
|
||||
# pretrained |
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def forward(self, x): |
||||
x_hw = paddle.shape(x)[2:] |
||||
|
||||
feats_backbone = self.backbone(x) |
||||
assert len(feats_backbone) >= len(self.backbone_indices), \ |
||||
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ |
||||
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" |
||||
|
||||
y = self.lraspp_head(feats_backbone) |
||||
y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False) |
||||
logit_list = [y] |
||||
|
||||
return logit_list |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
|
||||
class LRASPPHead(nn.Layer): |
||||
def __init__(self, |
||||
indices, |
||||
in_chs, |
||||
mid_chs, |
||||
out_ch, |
||||
n_classes, |
||||
resize_mode, |
||||
use_gap, |
||||
align_corners=False): |
||||
super().__init__() |
||||
|
||||
self.indices = indices[-2::-1] |
||||
self.in_chs = [in_chs[i] for i in indices[::-1]] |
||||
self.mid_chs = mid_chs[::-1] |
||||
self.convs = nn.LayerList() |
||||
self.conv_ups = nn.LayerList() |
||||
for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs): |
||||
self.convs.append( |
||||
nn.Conv2D( |
||||
in_ch, mid_ch, kernel_size=1, bias_attr=False)) |
||||
self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1)) |
||||
self.conv_w = nn.Sequential( |
||||
nn.AvgPool2D( |
||||
kernel_size=(49, 49), stride=(16, 20)) |
||||
if not use_gap else nn.AdaptiveAvgPool2D(1), |
||||
nn.Conv2D( |
||||
self.in_chs[0], out_ch, 1, bias_attr=False), |
||||
nn.Sigmoid()) |
||||
self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1) |
||||
self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False) |
||||
self.conv_out = nn.Conv2D( |
||||
out_ch, n_classes, kernel_size=1, bias_attr=False) |
||||
|
||||
self.interp = partial( |
||||
F.interpolate, mode=resize_mode, align_corners=align_corners) |
||||
|
||||
def forward(self, in_feat_list): |
||||
x = in_feat_list[-1] |
||||
|
||||
x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:]) |
||||
y = self.conv_t(x) |
||||
|
||||
for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups): |
||||
feat = in_feat_list[idx] |
||||
y = self.interp(y, paddle.shape(feat)[2:]) |
||||
y = paddle.concat([y, conv(feat)], axis=1) |
||||
y = conv_up(y) |
||||
|
||||
y = self.conv_out(y) |
||||
return y |
@ -0,0 +1,289 @@ |
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg import utils |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class MobileSeg(nn.Layer): |
||||
""" |
||||
The semantic segmentation models for mobile devices. |
||||
|
||||
Args: |
||||
num_classes (int): The number of target classes. |
||||
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must |
||||
has feat_channels, of which the length is 5. |
||||
backbone_indices (List(int), optional): The values indicate the indices of output of backbone. |
||||
Default: [2, 3, 4]. |
||||
cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4]. |
||||
cm_out_ch (int, optional): The output channel of the last context module. Default: 128. |
||||
arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3. |
||||
arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128]. |
||||
seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head. |
||||
Default: [64, 64, 64]. |
||||
resize_mode (str, optional): The resize mode for the upsampling operation in decoder. |
||||
Default: bilinear. |
||||
use_last_fuse (bool, optional): Whether use fusion in the last. Default: False. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
backbone_indices=[1, 2, 3], |
||||
cm_bin_sizes=[1, 2], |
||||
cm_out_ch=64, |
||||
arm_type='UAFMMobile', |
||||
arm_out_chs=[32, 48, 64], |
||||
seg_head_inter_chs=[32, 32, 32], |
||||
resize_mode='bilinear', |
||||
use_last_fuse=False, |
||||
pretrained=None): |
||||
super().__init__() |
||||
|
||||
# backbone |
||||
assert hasattr(backbone, 'feat_channels'), \ |
||||
"The backbone should has feat_channels." |
||||
assert len(backbone.feat_channels) >= len(backbone_indices), \ |
||||
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ |
||||
f"greater than the length of feat_channels ({len(backbone.feat_channels)})." |
||||
assert len(backbone.feat_channels) > max(backbone_indices), \ |
||||
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ |
||||
f"less than the length of feat_channels ({len(backbone.feat_channels)})." |
||||
self.backbone = backbone |
||||
|
||||
assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \ |
||||
"should not be lesser than 1" |
||||
self.backbone_indices = backbone_indices # [..., x16_id, x32_id] |
||||
backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices] |
||||
|
||||
# head |
||||
if len(arm_out_chs) == 1: |
||||
arm_out_chs = arm_out_chs * len(backbone_indices) |
||||
assert len(arm_out_chs) == len(backbone_indices), "The length of " \ |
||||
"arm_out_chs and backbone_indices should be equal" |
||||
|
||||
self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs, |
||||
cm_bin_sizes, cm_out_ch, arm_type, |
||||
resize_mode, use_last_fuse) |
||||
|
||||
if len(seg_head_inter_chs) == 1: |
||||
seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices) |
||||
assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \ |
||||
"seg_head_inter_chs and backbone_indices should be equal" |
||||
self.seg_heads = nn.LayerList() # [..., head_16, head32] |
||||
for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs): |
||||
self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes)) |
||||
|
||||
# pretrained |
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def forward(self, x): |
||||
x_hw = paddle.shape(x)[2:] |
||||
|
||||
feats_backbone = self.backbone(x) # [x4, x8, x16, x32] |
||||
assert len(feats_backbone) >= len(self.backbone_indices), \ |
||||
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ |
||||
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" |
||||
|
||||
feats_selected = [feats_backbone[i] for i in self.backbone_indices] |
||||
feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32] |
||||
|
||||
if self.training: |
||||
logit_list = [] |
||||
for x, seg_head in zip(feats_head, self.seg_heads): |
||||
x = seg_head(x) |
||||
logit_list.append(x) |
||||
logit_list = [ |
||||
F.interpolate( |
||||
x, x_hw, mode='bilinear', align_corners=False) |
||||
for x in logit_list |
||||
] |
||||
else: |
||||
x = self.seg_heads[0](feats_head[0]) |
||||
x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False) |
||||
logit_list = [x] |
||||
|
||||
return logit_list |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
|
||||
class MobileSegHead(nn.Layer): |
||||
""" |
||||
The head of MobileSeg. |
||||
|
||||
Args: |
||||
backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone. |
||||
arm_out_chs (List(int)): The out channels of each arm module. |
||||
cm_bin_sizes (List(int)): The bin size of context module. |
||||
cm_out_ch (int): The output channel of the last context module. |
||||
arm_type (str): The type of attention refinement module. |
||||
resize_mode (str): The resize mode for the upsampling operation in decoder. |
||||
""" |
||||
|
||||
def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch, |
||||
arm_type, resize_mode, use_last_fuse): |
||||
super().__init__() |
||||
|
||||
self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch, |
||||
cm_out_ch, cm_bin_sizes) |
||||
|
||||
assert hasattr(layers,arm_type), \ |
||||
"Not support arm_type ({})".format(arm_type) |
||||
arm_class = eval("layers." + arm_type) |
||||
|
||||
self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32] |
||||
for i in range(len(backbone_out_chs)): |
||||
low_chs = backbone_out_chs[i] |
||||
high_ch = cm_out_ch if i == len( |
||||
backbone_out_chs) - 1 else arm_out_chs[i + 1] |
||||
out_ch = arm_out_chs[i] |
||||
arm = arm_class( |
||||
low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode) |
||||
self.arm_list.append(arm) |
||||
|
||||
self.use_last_fuse = use_last_fuse |
||||
if self.use_last_fuse: |
||||
self.fuse_convs = nn.LayerList() |
||||
for i in range(1, len(arm_out_chs)): |
||||
conv = layers.SeparableConvBNReLU( |
||||
arm_out_chs[i], |
||||
arm_out_chs[0], |
||||
kernel_size=3, |
||||
bias_attr=False) |
||||
self.fuse_convs.append(conv) |
||||
self.last_conv = layers.SeparableConvBNReLU( |
||||
len(arm_out_chs) * arm_out_chs[0], |
||||
arm_out_chs[0], |
||||
kernel_size=3, |
||||
bias_attr=False) |
||||
|
||||
def forward(self, in_feat_list): |
||||
""" |
||||
Args: |
||||
in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. |
||||
x2, x4 and x8 are optional. |
||||
Returns: |
||||
out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. |
||||
x2, x4 and x8 are optional. |
||||
The length of in_feat_list and out_feat_list are the same. |
||||
""" |
||||
|
||||
high_feat = self.cm(in_feat_list[-1]) |
||||
out_feat_list = [] |
||||
|
||||
for i in reversed(range(len(in_feat_list))): |
||||
low_feat = in_feat_list[i] |
||||
arm = self.arm_list[i] |
||||
high_feat = arm(low_feat, high_feat) |
||||
out_feat_list.insert(0, high_feat) |
||||
|
||||
if self.use_last_fuse: |
||||
x_list = [out_feat_list[0]] |
||||
size = paddle.shape(out_feat_list[0])[2:] |
||||
for i, (x, conv |
||||
) in enumerate(zip(out_feat_list[1:], self.fuse_convs)): |
||||
x = conv(x) |
||||
x = F.interpolate( |
||||
x, size=size, mode='bilinear', align_corners=False) |
||||
x_list.append(x) |
||||
x = paddle.concat(x_list, axis=1) |
||||
x = self.last_conv(x) |
||||
out_feat_list[0] = x |
||||
|
||||
return out_feat_list |
||||
|
||||
|
||||
class MobileContextModule(nn.Layer): |
||||
""" |
||||
Context Module for Mobile Model. |
||||
|
||||
Args: |
||||
in_channels (int): The number of input channels to pyramid pooling module. |
||||
inter_channels (int): The number of inter channels to pyramid pooling module. |
||||
out_channels (int): The number of output channels after pyramid pooling module. |
||||
bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3). |
||||
align_corners (bool): An argument of F.interpolate. It should be set to False |
||||
when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
inter_channels, |
||||
out_channels, |
||||
bin_sizes, |
||||
align_corners=False): |
||||
super().__init__() |
||||
|
||||
self.stages = nn.LayerList([ |
||||
self._make_stage(in_channels, inter_channels, size) |
||||
for size in bin_sizes |
||||
]) |
||||
|
||||
self.conv_out = layers.SeparableConvBNReLU( |
||||
in_channels=inter_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=3, |
||||
bias_attr=False) |
||||
|
||||
self.align_corners = align_corners |
||||
|
||||
def _make_stage(self, in_channels, out_channels, size): |
||||
prior = nn.AdaptiveAvgPool2D(output_size=size) |
||||
conv = layers.ConvBNReLU( |
||||
in_channels=in_channels, out_channels=out_channels, kernel_size=1) |
||||
return nn.Sequential(prior, conv) |
||||
|
||||
def forward(self, input): |
||||
out = None |
||||
input_shape = paddle.shape(input)[2:] |
||||
|
||||
for stage in self.stages: |
||||
x = stage(input) |
||||
x = F.interpolate( |
||||
x, |
||||
input_shape, |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) |
||||
if out is None: |
||||
out = x |
||||
else: |
||||
out += x |
||||
|
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
|
||||
class SegHead(nn.Layer): |
||||
def __init__(self, in_chan, mid_chan, n_classes): |
||||
super().__init__() |
||||
self.conv = layers.SeparableConvBNReLU( |
||||
in_chan, mid_chan, kernel_size=3, bias_attr=False) |
||||
self.conv_out = nn.Conv2D( |
||||
mid_chan, n_classes, kernel_size=1, bias_attr=False) |
||||
|
||||
def forward(self, x): |
||||
x = self.conv(x) |
||||
x = self.conv_out(x) |
||||
return x |
@ -0,0 +1,273 @@ |
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg import utils |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.utils import utils |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class PPLiteSeg(nn.Layer): |
||||
""" |
||||
The PP_LiteSeg implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu, |
||||
Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai, |
||||
Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic |
||||
Segmentation Model. https://arxiv.org/abs/2204.02681". |
||||
|
||||
Args: |
||||
num_classes (int): The number of target classes. |
||||
backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must |
||||
has feat_channels, of which the length is 5. |
||||
backbone_indices (List(int), optional): The values indicate the indices of output of backbone. |
||||
Default: [2, 3, 4]. |
||||
arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3. |
||||
cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4]. |
||||
cm_out_ch (int, optional): The output channel of the last context module. Default: 128. |
||||
arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128]. |
||||
seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head. |
||||
Default: [64, 64, 64]. |
||||
resize_mode (str, optional): The resize mode for the upsampling operation in decoder. |
||||
Default: bilinear. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
|
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
backbone_indices=[2, 3, 4], |
||||
arm_type='UAFM_SpAtten', |
||||
cm_bin_sizes=[1, 2, 4], |
||||
cm_out_ch=128, |
||||
arm_out_chs=[64, 96, 128], |
||||
seg_head_inter_chs=[64, 64, 64], |
||||
resize_mode='bilinear', |
||||
pretrained=None): |
||||
super().__init__() |
||||
|
||||
# backbone |
||||
assert hasattr(backbone, 'feat_channels'), \ |
||||
"The backbone should has feat_channels." |
||||
assert len(backbone.feat_channels) >= len(backbone_indices), \ |
||||
f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \ |
||||
f"greater than the length of feat_channels ({len(backbone.feat_channels)})." |
||||
assert len(backbone.feat_channels) > max(backbone_indices), \ |
||||
f"The max value ({max(backbone_indices)}) of backbone_indices should be " \ |
||||
f"less than the length of feat_channels ({len(backbone.feat_channels)})." |
||||
self.backbone = backbone |
||||
|
||||
assert len(backbone_indices) > 1, "The lenght of backbone_indices " \ |
||||
"should be greater than 1" |
||||
self.backbone_indices = backbone_indices # [..., x16_id, x32_id] |
||||
backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices] |
||||
|
||||
# head |
||||
if len(arm_out_chs) == 1: |
||||
arm_out_chs = arm_out_chs * len(backbone_indices) |
||||
assert len(arm_out_chs) == len(backbone_indices), "The length of " \ |
||||
"arm_out_chs and backbone_indices should be equal" |
||||
|
||||
self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs, |
||||
cm_bin_sizes, cm_out_ch, arm_type, |
||||
resize_mode) |
||||
|
||||
if len(seg_head_inter_chs) == 1: |
||||
seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices) |
||||
assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \ |
||||
"seg_head_inter_chs and backbone_indices should be equal" |
||||
self.seg_heads = nn.LayerList() # [..., head_16, head32] |
||||
for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs): |
||||
self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes)) |
||||
|
||||
# pretrained |
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def forward(self, x): |
||||
x_hw = paddle.shape(x)[2:] |
||||
|
||||
feats_backbone = self.backbone(x) # [x2, x4, x8, x16, x32] |
||||
assert len(feats_backbone) >= len(self.backbone_indices), \ |
||||
f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \ |
||||
f"equal than the nums of backbone_indices ({len(self.backbone_indices)})" |
||||
|
||||
feats_selected = [feats_backbone[i] for i in self.backbone_indices] |
||||
|
||||
feats_head = self.ppseg_head(feats_selected) # [..., x8, x16, x32] |
||||
|
||||
if self.training: |
||||
logit_list = [] |
||||
|
||||
for x, seg_head in zip(feats_head, self.seg_heads): |
||||
x = seg_head(x) |
||||
logit_list.append(x) |
||||
|
||||
logit_list = [ |
||||
F.interpolate( |
||||
x, x_hw, mode='bilinear', align_corners=False) |
||||
for x in logit_list |
||||
] |
||||
else: |
||||
x = self.seg_heads[0](feats_head[0]) |
||||
x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False) |
||||
logit_list = [x] |
||||
|
||||
return logit_list |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
|
||||
class PPLiteSegHead(nn.Layer): |
||||
""" |
||||
The head of PPLiteSeg. |
||||
|
||||
Args: |
||||
backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone. |
||||
arm_out_chs (List(int)): The out channels of each arm module. |
||||
cm_bin_sizes (List(int)): The bin size of context module. |
||||
cm_out_ch (int): The output channel of the last context module. |
||||
arm_type (str): The type of attention refinement module. |
||||
resize_mode (str): The resize mode for the upsampling operation in decoder. |
||||
""" |
||||
|
||||
def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch, |
||||
arm_type, resize_mode): |
||||
super().__init__() |
||||
|
||||
self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch, |
||||
cm_bin_sizes) |
||||
|
||||
assert hasattr(layers,arm_type), \ |
||||
"Not support arm_type ({})".format(arm_type) |
||||
arm_class = eval("layers." + arm_type) |
||||
|
||||
self.arm_list = nn.LayerList() # [..., arm8, arm16, arm32] |
||||
for i in range(len(backbone_out_chs)): |
||||
low_chs = backbone_out_chs[i] |
||||
high_ch = cm_out_ch if i == len( |
||||
backbone_out_chs) - 1 else arm_out_chs[i + 1] |
||||
out_ch = arm_out_chs[i] |
||||
arm = arm_class( |
||||
low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode) |
||||
self.arm_list.append(arm) |
||||
|
||||
def forward(self, in_feat_list): |
||||
""" |
||||
Args: |
||||
in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. |
||||
x2, x4 and x8 are optional. |
||||
Returns: |
||||
out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32]. |
||||
x2, x4 and x8 are optional. |
||||
The length of in_feat_list and out_feat_list are the same. |
||||
""" |
||||
|
||||
high_feat = self.cm(in_feat_list[-1]) |
||||
out_feat_list = [] |
||||
|
||||
for i in reversed(range(len(in_feat_list))): |
||||
low_feat = in_feat_list[i] |
||||
arm = self.arm_list[i] |
||||
high_feat = arm(low_feat, high_feat) |
||||
out_feat_list.insert(0, high_feat) |
||||
|
||||
return out_feat_list |
||||
|
||||
|
||||
class PPContextModule(nn.Layer): |
||||
""" |
||||
Simple Context module. |
||||
|
||||
Args: |
||||
in_channels (int): The number of input channels to pyramid pooling module. |
||||
inter_channels (int): The number of inter channels to pyramid pooling module. |
||||
out_channels (int): The number of output channels after pyramid pooling module. |
||||
bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3). |
||||
align_corners (bool): An argument of F.interpolate. It should be set to False |
||||
when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
inter_channels, |
||||
out_channels, |
||||
bin_sizes, |
||||
align_corners=False): |
||||
super().__init__() |
||||
|
||||
self.stages = nn.LayerList([ |
||||
self._make_stage(in_channels, inter_channels, size) |
||||
for size in bin_sizes |
||||
]) |
||||
|
||||
self.conv_out = layers.ConvBNReLU( |
||||
in_channels=inter_channels, |
||||
out_channels=out_channels, |
||||
kernel_size=3, |
||||
padding=1) |
||||
|
||||
self.align_corners = align_corners |
||||
|
||||
def _make_stage(self, in_channels, out_channels, size): |
||||
prior = nn.AdaptiveAvgPool2D(output_size=size) |
||||
conv = layers.ConvBNReLU( |
||||
in_channels=in_channels, out_channels=out_channels, kernel_size=1) |
||||
return nn.Sequential(prior, conv) |
||||
|
||||
def forward(self, input): |
||||
out = None |
||||
input_shape = paddle.shape(input)[2:] |
||||
|
||||
for stage in self.stages: |
||||
x = stage(input) |
||||
x = F.interpolate( |
||||
x, |
||||
input_shape, |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) |
||||
if out is None: |
||||
out = x |
||||
else: |
||||
out += x |
||||
|
||||
out = self.conv_out(out) |
||||
return out |
||||
|
||||
|
||||
class SegHead(nn.Layer): |
||||
def __init__(self, in_chan, mid_chan, n_classes): |
||||
super().__init__() |
||||
self.conv = layers.ConvBNReLU( |
||||
in_chan, |
||||
mid_chan, |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
bias_attr=False) |
||||
self.conv_out = nn.Conv2D( |
||||
mid_chan, n_classes, kernel_size=1, bias_attr=False) |
||||
|
||||
def forward(self, x): |
||||
x = self.conv(x) |
||||
x = self.conv_out(x) |
||||
return x |
@ -0,0 +1,449 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
# Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.utils import utils |
||||
|
||||
CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]], |
||||
[[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]], |
||||
[[3, 2], [3, 4]], [[3, 1], [5, 2]]] |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class SINet(nn.Layer): |
||||
""" |
||||
The SINet implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to |
||||
Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak |
||||
"SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules |
||||
and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099). |
||||
|
||||
Args: |
||||
num_classes (int): The unique number of target classes. |
||||
config (List, optional): The config for SINet. Defualt use the CFG. |
||||
stage2_blocks (int, optional): The num of blocks in stage2. Default: 2. |
||||
stage3_blocks (int, optional): The num of blocks in stage3. Default: 8. |
||||
in_channels (int, optional): The channels of input image. Default: 3. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes=2, |
||||
config=CFG, |
||||
stage2_blocks=2, |
||||
stage3_blocks=8, |
||||
in_channels=3, |
||||
pretrained=None): |
||||
super().__init__() |
||||
dim1 = 16 |
||||
dim2 = 48 |
||||
dim3 = 96 |
||||
|
||||
self.encoder = SINetEncoder(config, in_channels, num_classes, |
||||
stage2_blocks, stage3_blocks) |
||||
|
||||
self.up = nn.UpsamplingBilinear2D(scale_factor=2) |
||||
self.bn_3 = nn.BatchNorm(num_classes) |
||||
|
||||
self.level2_C = CBR(dim2, num_classes, 1, 1) |
||||
self.bn_2 = nn.BatchNorm(num_classes) |
||||
|
||||
self.classifier = nn.Sequential( |
||||
nn.UpsamplingBilinear2D(scale_factor=2), |
||||
nn.Conv2D( |
||||
num_classes, num_classes, 3, 1, 1, bias_attr=False)) |
||||
|
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def forward(self, input): |
||||
output1 = self.encoder.level1(input) # x2 |
||||
|
||||
output2_0 = self.encoder.level2_0(output1) # x4 |
||||
for i, layer in enumerate(self.encoder.level2): |
||||
if i == 0: |
||||
output2 = layer(output2_0) |
||||
else: |
||||
output2 = layer(output2) |
||||
output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1)) |
||||
|
||||
output3_0 = self.encoder.level3_0(output2_cat) # x8 |
||||
for i, layer in enumerate(self.encoder.level3): |
||||
if i == 0: |
||||
output3 = layer(output3_0) |
||||
else: |
||||
output3 = layer(output3) |
||||
output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1)) |
||||
enc_final = self.encoder.classifier(output3_cat) # x8 |
||||
|
||||
dec_stage1 = self.bn_3(self.up(enc_final)) # x4 |
||||
stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1) |
||||
stage1_gate = (1 - stage1_confidence).unsqueeze(1) |
||||
|
||||
dec_stage2_0 = self.level2_C(output2) # x4 |
||||
dec_stage2 = self.bn_2( |
||||
self.up(dec_stage2_0 * stage1_gate + dec_stage1)) # x2 |
||||
|
||||
out = self.classifier(dec_stage2) # x |
||||
|
||||
return [out] |
||||
|
||||
|
||||
def channel_shuffle(x, groups): |
||||
x_shape = paddle.shape(x) |
||||
batch_size, height, width = x_shape[0], x_shape[2], x_shape[3] |
||||
num_channels = x.shape[1] |
||||
channels_per_group = num_channels // groups |
||||
|
||||
# reshape |
||||
x = paddle.reshape( |
||||
x=x, shape=[batch_size, groups, channels_per_group, height, width]) |
||||
|
||||
# transpose |
||||
x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) |
||||
|
||||
# flatten |
||||
x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width]) |
||||
|
||||
return x |
||||
|
||||
|
||||
class CBR(nn.Layer): |
||||
''' |
||||
This class defines the convolution layer with batch normalization and PReLU activation |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, kSize, stride=1): |
||||
super().__init__() |
||||
padding = int((kSize - 1) / 2) |
||||
|
||||
self.conv = nn.Conv2D( |
||||
nIn, |
||||
nOut, (kSize, kSize), |
||||
stride=stride, |
||||
padding=(padding, padding), |
||||
bias_attr=False) |
||||
self.bn = nn.BatchNorm(nOut) |
||||
self.act = nn.PReLU(nOut) |
||||
|
||||
def forward(self, input): |
||||
output = self.conv(input) |
||||
output = self.bn(output) |
||||
output = self.act(output) |
||||
return output |
||||
|
||||
|
||||
class SeparableCBR(nn.Layer): |
||||
''' |
||||
This class defines the convolution layer with batch normalization and PReLU activation |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, kSize, stride=1): |
||||
super().__init__() |
||||
padding = int((kSize - 1) / 2) |
||||
|
||||
self.conv = nn.Sequential( |
||||
nn.Conv2D( |
||||
nIn, |
||||
nIn, (kSize, kSize), |
||||
stride=stride, |
||||
padding=(padding, padding), |
||||
groups=nIn, |
||||
bias_attr=False), |
||||
nn.Conv2D( |
||||
nIn, nOut, kernel_size=1, stride=1, bias_attr=False), ) |
||||
self.bn = nn.BatchNorm(nOut) |
||||
self.act = nn.PReLU(nOut) |
||||
|
||||
def forward(self, input): |
||||
output = self.conv(input) |
||||
output = self.bn(output) |
||||
output = self.act(output) |
||||
return output |
||||
|
||||
|
||||
class SqueezeBlock(nn.Layer): |
||||
def __init__(self, exp_size, divide=4.0): |
||||
super(SqueezeBlock, self).__init__() |
||||
|
||||
if divide > 1: |
||||
self.dense = nn.Sequential( |
||||
nn.Linear(exp_size, int(exp_size / divide)), |
||||
nn.PReLU(int(exp_size / divide)), |
||||
nn.Linear(int(exp_size / divide), exp_size), |
||||
nn.PReLU(exp_size), ) |
||||
else: |
||||
self.dense = nn.Sequential( |
||||
nn.Linear(exp_size, exp_size), nn.PReLU(exp_size)) |
||||
|
||||
def forward(self, x): |
||||
alpha = F.adaptive_avg_pool2d(x, [1, 1]) |
||||
alpha = paddle.squeeze(alpha, axis=[2, 3]) |
||||
alpha = self.dense(alpha) |
||||
alpha = paddle.unsqueeze(alpha, axis=[2, 3]) |
||||
out = x * alpha |
||||
return out |
||||
|
||||
|
||||
class SESeparableCBR(nn.Layer): |
||||
''' |
||||
This class defines the convolution layer with batch normalization and PReLU activation |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0): |
||||
super().__init__() |
||||
padding = int((kSize - 1) / 2) |
||||
|
||||
self.conv = nn.Sequential( |
||||
nn.Conv2D( |
||||
nIn, |
||||
nIn, (kSize, kSize), |
||||
stride=stride, |
||||
padding=(padding, padding), |
||||
groups=nIn, |
||||
bias_attr=False), |
||||
SqueezeBlock( |
||||
nIn, divide=divide), |
||||
nn.Conv2D( |
||||
nIn, nOut, kernel_size=1, stride=1, bias_attr=False), ) |
||||
|
||||
self.bn = nn.BatchNorm(nOut) |
||||
self.act = nn.PReLU(nOut) |
||||
|
||||
def forward(self, input): |
||||
output = self.conv(input) |
||||
output = self.bn(output) |
||||
output = self.act(output) |
||||
return output |
||||
|
||||
|
||||
class BR(nn.Layer): |
||||
''' |
||||
This class groups the batch normalization and PReLU activation |
||||
''' |
||||
|
||||
def __init__(self, nOut): |
||||
super().__init__() |
||||
self.bn = nn.BatchNorm(nOut) |
||||
self.act = nn.PReLU(nOut) |
||||
|
||||
def forward(self, input): |
||||
output = self.bn(input) |
||||
output = self.act(output) |
||||
return output |
||||
|
||||
|
||||
class CB(nn.Layer): |
||||
''' |
||||
This class groups the convolution and batch normalization |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, kSize, stride=1): |
||||
super().__init__() |
||||
padding = int((kSize - 1) / 2) |
||||
self.conv = nn.Conv2D( |
||||
nIn, |
||||
nOut, (kSize, kSize), |
||||
stride=stride, |
||||
padding=(padding, padding), |
||||
bias_attr=False) |
||||
self.bn = nn.BatchNorm(nOut) |
||||
|
||||
def forward(self, input): |
||||
output = self.conv(input) |
||||
output = self.bn(output) |
||||
return output |
||||
|
||||
|
||||
class C(nn.Layer): |
||||
''' |
||||
This class is for a convolutional layer. |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, kSize, stride=1, group=1): |
||||
super().__init__() |
||||
padding = int((kSize - 1) / 2) |
||||
self.conv = nn.Conv2D( |
||||
nIn, |
||||
nOut, (kSize, kSize), |
||||
stride=stride, |
||||
padding=(padding, padding), |
||||
bias_attr=False, |
||||
groups=group) |
||||
|
||||
def forward(self, input): |
||||
output = self.conv(input) |
||||
return output |
||||
|
||||
|
||||
class S2block(nn.Layer): |
||||
''' |
||||
This class defines the dilated convolution. |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, kSize, avgsize): |
||||
super().__init__() |
||||
|
||||
self.resolution_down = False |
||||
if avgsize > 1: |
||||
self.resolution_down = True |
||||
self.down_res = nn.AvgPool2D(avgsize, avgsize) |
||||
self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize) |
||||
self.avgsize = avgsize |
||||
|
||||
padding = int((kSize - 1) / 2) |
||||
self.conv = nn.Sequential( |
||||
nn.Conv2D( |
||||
nIn, |
||||
nIn, |
||||
kernel_size=(kSize, kSize), |
||||
stride=1, |
||||
padding=(padding, padding), |
||||
groups=nIn, |
||||
bias_attr=False), |
||||
nn.BatchNorm(nIn)) |
||||
|
||||
self.act_conv1x1 = nn.Sequential( |
||||
nn.PReLU(nIn), |
||||
nn.Conv2D( |
||||
nIn, nOut, kernel_size=1, stride=1, bias_attr=False), ) |
||||
|
||||
self.bn = nn.BatchNorm(nOut) |
||||
|
||||
def forward(self, input): |
||||
if self.resolution_down: |
||||
input = self.down_res(input) |
||||
output = self.conv(input) |
||||
|
||||
output = self.act_conv1x1(output) |
||||
if self.resolution_down: |
||||
output = self.up_res(output) |
||||
return self.bn(output) |
||||
|
||||
|
||||
class S2module(nn.Layer): |
||||
''' |
||||
This class defines the ESP block, which is based on the following principle |
||||
Reduce ---> Split ---> Transform --> Merge |
||||
''' |
||||
|
||||
def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]): |
||||
super().__init__() |
||||
|
||||
group_n = len(config) |
||||
assert group_n == 2 |
||||
n = int(nOut / group_n) |
||||
n1 = nOut - group_n * n |
||||
|
||||
self.c1 = C(nIn, n, 1, 1, group=group_n) |
||||
# self.c1 = C(nIn, n, 1, 1) |
||||
|
||||
for i in range(group_n): |
||||
if i == 0: |
||||
self.layer_0 = S2block( |
||||
n, n + n1, kSize=config[i][0], avgsize=config[i][1]) |
||||
else: |
||||
self.layer_1 = S2block( |
||||
n, n, kSize=config[i][0], avgsize=config[i][1]) |
||||
|
||||
self.BR = BR(nOut) |
||||
self.add = add |
||||
self.group_n = group_n |
||||
|
||||
def forward(self, input): |
||||
output1 = self.c1(input) |
||||
output1 = channel_shuffle(output1, self.group_n) |
||||
res_0 = self.layer_0(output1) |
||||
res_1 = self.layer_1(output1) |
||||
combine = paddle.concat([res_0, res_1], 1) |
||||
|
||||
if self.add: |
||||
combine = input + combine |
||||
output = self.BR(combine) |
||||
return output |
||||
|
||||
|
||||
class SINetEncoder(nn.Layer): |
||||
def __init__(self, |
||||
config, |
||||
in_channels=3, |
||||
num_classes=2, |
||||
stage2_blocks=2, |
||||
stage3_blocks=8): |
||||
super().__init__() |
||||
assert stage2_blocks == 2 |
||||
dim1 = 16 |
||||
dim2 = 48 |
||||
dim3 = 96 |
||||
|
||||
self.level1 = CBR(in_channels, 12, 3, 2) |
||||
|
||||
self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1) |
||||
|
||||
self.level2 = nn.LayerList() |
||||
for i in range(0, stage2_blocks): |
||||
if i == 0: |
||||
self.level2.append( |
||||
S2module( |
||||
dim1, dim2, config=config[i], add=False)) |
||||
else: |
||||
self.level2.append(S2module(dim2, dim2, config=config[i])) |
||||
self.BR2 = BR(dim2 + dim1) |
||||
|
||||
self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2) |
||||
self.level3 = nn.LayerList() |
||||
for i in range(0, stage3_blocks): |
||||
if i == 0: |
||||
self.level3.append( |
||||
S2module( |
||||
dim2, dim3, config=config[2 + i], add=False)) |
||||
else: |
||||
self.level3.append(S2module(dim3, dim3, config=config[2 + i])) |
||||
self.BR3 = BR(dim3 + dim2) |
||||
|
||||
self.classifier = C(dim3 + dim2, num_classes, 1, 1) |
||||
|
||||
def forward(self, input): |
||||
output1 = self.level1(input) # x2 |
||||
|
||||
output2_0 = self.level2_0(output1) # x4 |
||||
for i, layer in enumerate(self.level2): |
||||
if i == 0: |
||||
output2 = layer(output2_0) |
||||
else: |
||||
output2 = layer(output2) |
||||
|
||||
output3_0 = self.level3_0( |
||||
self.BR2(paddle.concat([output2_0, output2], 1))) # x8 |
||||
for i, layer in enumerate(self.level3): |
||||
if i == 0: |
||||
output3 = layer(output3_0) |
||||
else: |
||||
output3 = layer(output3) |
||||
|
||||
output3_cat = self.BR3(paddle.concat([output3_0, output3], 1)) |
||||
classifier = self.classifier(output3_cat) |
||||
return classifier |
@ -0,0 +1,155 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import warnings |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.models import layers |
||||
from paddlers.models.ppseg.utils import utils |
||||
from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class TopFormer(nn.Layer): |
||||
""" |
||||
The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to |
||||
Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu, |
||||
and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation." |
||||
In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, |
||||
pp. 12083-12093. 2022. |
||||
|
||||
This model refers to https://github.com/hustvl/TopFormer. |
||||
|
||||
Args: |
||||
num_classes(int,optional): The unique number of target classes. |
||||
backbone(nn.Layer): Backbone network. |
||||
head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False. |
||||
align_corners (bool, optional): Set the align_corners in resizing. Default: False. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
head_use_dw=False, |
||||
align_corners=False, |
||||
pretrained=None): |
||||
super().__init__() |
||||
self.backbone = backbone |
||||
|
||||
head_in_channels = [ |
||||
i for i in backbone.injection_out_channels if i is not None |
||||
] |
||||
self.decode_head = TopFormerHead( |
||||
num_classes=num_classes, |
||||
in_channels=head_in_channels, |
||||
use_dw=head_use_dw, |
||||
align_corners=align_corners) |
||||
|
||||
self.align_corners = align_corners |
||||
self.pretrained = pretrained |
||||
self.init_weight() |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
def forward(self, x): |
||||
x_hw = paddle.shape(x)[2:] |
||||
x = self.backbone(x) # len=3, 1/8,1/16,1/32 |
||||
x = self.decode_head(x) |
||||
x = F.interpolate( |
||||
x, x_hw, mode='bilinear', align_corners=self.align_corners) |
||||
|
||||
return [x] |
||||
|
||||
|
||||
class TopFormerHead(nn.Layer): |
||||
def __init__(self, |
||||
num_classes, |
||||
in_channels, |
||||
in_index=[0, 1, 2], |
||||
in_transform='multiple_select', |
||||
use_dw=False, |
||||
dropout_ratio=0.1, |
||||
align_corners=False): |
||||
super().__init__() |
||||
|
||||
self.in_index = in_index |
||||
self.in_transform = in_transform |
||||
self.align_corners = align_corners |
||||
|
||||
self._init_inputs(in_channels, in_index, in_transform) |
||||
self.linear_fuse = ConvBNAct( |
||||
in_channels=self.last_channels, |
||||
out_channels=self.last_channels, |
||||
kernel_size=1, |
||||
stride=1, |
||||
groups=self.last_channels if use_dw else 1, |
||||
act=nn.ReLU) |
||||
self.dropout = nn.Dropout2D(dropout_ratio) |
||||
self.conv_seg = nn.Conv2D( |
||||
self.last_channels, num_classes, kernel_size=1) |
||||
|
||||
def _init_inputs(self, in_channels, in_index, in_transform): |
||||
assert in_transform in [None, 'resize_concat', 'multiple_select'] |
||||
if in_transform is not None: |
||||
assert len(in_channels) == len(in_index) |
||||
if in_transform == 'resize_concat': |
||||
self.last_channels = sum(in_channels) |
||||
else: |
||||
self.last_channels = in_channels[0] |
||||
else: |
||||
assert isinstance(in_channels, int) |
||||
assert isinstance(in_index, int) |
||||
self.last_channels = in_channels |
||||
|
||||
def _transform_inputs(self, inputs): |
||||
if self.in_transform == 'resize_concat': |
||||
inputs = [inputs[i] for i in self.in_index] |
||||
inputs = [ |
||||
F.interpolate( |
||||
input_data=x, |
||||
size=paddle.shape(inputs[0])[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) for x in inputs |
||||
] |
||||
inputs = paddle.concat(inputs, axis=1) |
||||
elif self.in_transform == 'multiple_select': |
||||
inputs_tmp = [inputs[i] for i in self.in_index] |
||||
inputs = inputs_tmp[0] |
||||
for x in inputs_tmp[1:]: |
||||
x = F.interpolate( |
||||
x, |
||||
size=paddle.shape(inputs)[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) |
||||
inputs += x |
||||
else: |
||||
inputs = inputs[self.in_index] |
||||
|
||||
return inputs |
||||
|
||||
def forward(self, x): |
||||
x = self._transform_inputs(x) |
||||
x = self.linear_fuse(x) |
||||
x = self.dropout(x) |
||||
x = self.conv_seg(x) |
||||
return x |
@ -0,0 +1,173 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
|
||||
from paddlers.models.ppseg import utils |
||||
from paddlers.models.ppseg.cvlibs import manager |
||||
from paddlers.models.ppseg.models import layers |
||||
|
||||
|
||||
@manager.MODELS.add_component |
||||
class UPerNet(nn.Layer): |
||||
""" |
||||
The UPerNet implementation based on PaddlePaddle. |
||||
|
||||
The original article refers to |
||||
Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding" |
||||
(https://arxiv.org/abs/1807.10221). |
||||
|
||||
Args: |
||||
num_classes (int): The unique number of target classes. |
||||
backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. |
||||
backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone. |
||||
channels (int): The channels of inter layers. Default: 512. |
||||
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False. |
||||
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, |
||||
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. |
||||
dropout_prob (float): Dropout ratio for upernet head. Default: 0.1. |
||||
pretrained (str, optional): The path or url of pretrained model. Default: None. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
num_classes, |
||||
backbone, |
||||
backbone_indices, |
||||
channels=512, |
||||
enable_auxiliary_loss=False, |
||||
align_corners=False, |
||||
dropout_prob=0.1, |
||||
pretrained=None): |
||||
super().__init__() |
||||
self.backbone = backbone |
||||
self.backbone_indices = backbone_indices |
||||
self.in_channels = [ |
||||
self.backbone.feat_channels[i] for i in backbone_indices |
||||
] |
||||
self.align_corners = align_corners |
||||
self.pretrained = pretrained |
||||
self.enable_auxiliary_loss = enable_auxiliary_loss |
||||
|
||||
fpn_inplanes = [ |
||||
self.backbone.feat_channels[i] for i in backbone_indices |
||||
] |
||||
self.head = UPerNetHead( |
||||
num_classes=num_classes, |
||||
fpn_inplanes=fpn_inplanes, |
||||
dropout_prob=dropout_prob, |
||||
channels=channels, |
||||
enable_auxiliary_loss=self.enable_auxiliary_loss) |
||||
self.init_weight() |
||||
|
||||
def forward(self, x): |
||||
feats = self.backbone(x) |
||||
feats = [feats[i] for i in self.backbone_indices] |
||||
logit_list = self.head(feats) |
||||
logit_list = [ |
||||
F.interpolate( |
||||
logit, |
||||
paddle.shape(x)[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) for logit in logit_list |
||||
] |
||||
return logit_list |
||||
|
||||
def init_weight(self): |
||||
if self.pretrained is not None: |
||||
utils.load_entire_model(self, self.pretrained) |
||||
|
||||
|
||||
class UPerNetHead(nn.Layer): |
||||
def __init__(self, |
||||
num_classes, |
||||
fpn_inplanes, |
||||
channels, |
||||
dropout_prob=0.1, |
||||
enable_auxiliary_loss=False, |
||||
align_corners=True): |
||||
super(UPerNetHead, self).__init__() |
||||
self.align_corners = align_corners |
||||
self.ppm = layers.PPModule( |
||||
in_channels=fpn_inplanes[-1], |
||||
out_channels=channels, |
||||
bin_sizes=(1, 2, 3, 6), |
||||
dim_reduction=True, |
||||
align_corners=True) |
||||
self.enable_auxiliary_loss = enable_auxiliary_loss |
||||
self.lateral_convs = nn.LayerList() |
||||
self.fpn_convs = nn.LayerList() |
||||
|
||||
for fpn_inplane in fpn_inplanes[:-1]: |
||||
self.lateral_convs.append( |
||||
layers.ConvBNReLU(fpn_inplane, channels, 1)) |
||||
self.fpn_convs.append( |
||||
layers.ConvBNReLU( |
||||
channels, channels, 3, bias_attr=False)) |
||||
|
||||
if self.enable_auxiliary_loss: |
||||
self.aux_head = layers.AuxLayer( |
||||
fpn_inplanes[2], |
||||
fpn_inplanes[2], |
||||
num_classes, |
||||
dropout_prob=dropout_prob) |
||||
|
||||
self.fpn_bottleneck = layers.ConvBNReLU( |
||||
len(fpn_inplanes) * channels, channels, 3, padding=1) |
||||
|
||||
self.conv_last = nn.Sequential( |
||||
layers.ConvBNReLU( |
||||
len(fpn_inplanes) * channels, channels, 3, bias_attr=False), |
||||
nn.Conv2D( |
||||
channels, num_classes, kernel_size=1)) |
||||
self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1) |
||||
|
||||
def forward(self, inputs): |
||||
laterals = [] |
||||
for i, lateral_conv in enumerate(self.lateral_convs): |
||||
laterals.append(lateral_conv(inputs[i])) |
||||
|
||||
laterals.append(self.ppm(inputs[-1])) |
||||
fpn_levels = len(laterals) |
||||
for i in range(fpn_levels - 1, 0, -1): |
||||
prev_shape = paddle.shape(laterals[i - 1]) |
||||
laterals[i - 1] = laterals[i - 1] + F.interpolate( |
||||
laterals[i], |
||||
size=prev_shape[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) |
||||
|
||||
fpn_outs = [] |
||||
for i in range(fpn_levels - 1): |
||||
fpn_outs.append(self.fpn_convs[i](laterals[i])) |
||||
fpn_outs.append(laterals[-1]) |
||||
|
||||
for i in range(fpn_levels - 1, 0, -1): |
||||
fpn_outs[i] = F.interpolate( |
||||
fpn_outs[i], |
||||
size=paddle.shape(fpn_outs[0])[2:], |
||||
mode='bilinear', |
||||
align_corners=self.align_corners) |
||||
fuse_out = paddle.concat(fpn_outs, axis=1) |
||||
x = self.fpn_bottleneck(fuse_out) |
||||
|
||||
x = self.conv_seg(x) |
||||
logits_list = [x] |
||||
if self.enable_auxiliary_loss: |
||||
aux_out = self.aux_head(inputs[2]) |
||||
logits_list.append(aux_out) |
||||
return logits_list |
||||
else: |
||||
return logits_list |
File diff suppressed because it is too large
Load Diff
@ -1,59 +0,0 @@ |
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
def config_check(cfg, train_dataset=None, val_dataset=None): |
||||
""" |
||||
To check config。 |
||||
|
||||
Args: |
||||
cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config. |
||||
train_dataset (paddle.io.Dataset): Used to read and process training datasets. |
||||
val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. |
||||
""" |
||||
|
||||
num_classes_check(cfg, train_dataset, val_dataset) |
||||
|
||||
|
||||
def num_classes_check(cfg, train_dataset, val_dataset): |
||||
"""" |
||||
Check that the num_classes in model, train_dataset and val_dataset is consistent. |
||||
""" |
||||
num_classes_set = set() |
||||
if train_dataset and hasattr(train_dataset, 'num_classes'): |
||||
num_classes_set.add(train_dataset.num_classes) |
||||
if val_dataset and hasattr(val_dataset, 'num_classes'): |
||||
num_classes_set.add(val_dataset.num_classes) |
||||
if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None): |
||||
num_classes_set.add(cfg.dic['model'].get('num_classes')) |
||||
if (not cfg.train_dataset) and (not cfg.val_dataset): |
||||
raise ValueError( |
||||
'One of `train_dataset` or `val_dataset should be given, but there are none.' |
||||
) |
||||
if len(num_classes_set) == 0: |
||||
raise ValueError( |
||||
'`num_classes` is not found. Please set it in model, train_dataset or val_dataset' |
||||
) |
||||
elif len(num_classes_set) > 1: |
||||
raise ValueError( |
||||
'`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset' |
||||
.format(num_classes_set)) |
||||
else: |
||||
num_classes = num_classes_set.pop() |
||||
if train_dataset: |
||||
train_dataset.num_classes = num_classes |
||||
if val_dataset: |
||||
val_dataset.num_classes = num_classes |
@ -1,442 +1,442 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This code is based on https://github.com/AgentMaker/Paddle-Image-Models |
||||
Ths copyright of AgentMaker/Paddle-Image-Models is as follows: |
||||
Apache License [see LICENSE for details] |
||||
""" |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
|
||||
__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"] |
||||
|
||||
|
||||
class SELayer(nn.Layer): |
||||
def __init__(self, inplanes, reduction=16): |
||||
super(SELayer, self).__init__() |
||||
self.avg_pool = nn.AdaptiveAvgPool2D(1) |
||||
self.fc = nn.Sequential( |
||||
nn.Linear( |
||||
inplanes, inplanes // reduction, bias_attr=False), |
||||
nn.ReLU(), |
||||
nn.Linear( |
||||
inplanes // reduction, inplanes, bias_attr=False), |
||||
nn.Sigmoid(), ) |
||||
|
||||
def forward(self, x): |
||||
b, c, _, _ = x.shape |
||||
y = self.avg_pool(x).reshape((b, c)) |
||||
y = self.fc(y).reshape((b, c, 1, 1)) |
||||
return x * paddle.expand(y, shape=x.shape) |
||||
|
||||
|
||||
class HS(nn.Layer): |
||||
def __init__(self): |
||||
super(HS, self).__init__() |
||||
self.relu6 = nn.ReLU6() |
||||
|
||||
def forward(self, inputs): |
||||
return inputs * self.relu6(inputs + 3) / 6 |
||||
|
||||
|
||||
class Conv(nn.Sequential): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
activation="ReLU", |
||||
bn_momentum=0.9, ): |
||||
super(Conv, self).__init__() |
||||
self.add_sublayer( |
||||
"norm", nn.BatchNorm2D( |
||||
in_channels, momentum=bn_momentum)) |
||||
if activation == "ReLU": |
||||
self.add_sublayer("activation", nn.ReLU()) |
||||
elif activation == "HS": |
||||
self.add_sublayer("activation", HS()) |
||||
else: |
||||
raise NotImplementedError |
||||
self.add_sublayer( |
||||
"conv", |
||||
nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
bias_attr=False, |
||||
groups=groups, ), ) |
||||
|
||||
|
||||
def ShuffleLayer(x, groups): |
||||
batchsize, num_channels, height, width = x.shape |
||||
channels_per_group = num_channels // groups |
||||
# Reshape |
||||
x = x.reshape((batchsize, groups, channels_per_group, height, width)) |
||||
# Transpose |
||||
x = x.transpose((0, 2, 1, 3, 4)) |
||||
# Reshape |
||||
x = x.reshape((batchsize, groups * channels_per_group, height, width)) |
||||
return x |
||||
|
||||
|
||||
def ShuffleLayerTrans(x, groups): |
||||
batchsize, num_channels, height, width = x.shape |
||||
channels_per_group = num_channels // groups |
||||
# Reshape |
||||
x = x.reshape((batchsize, channels_per_group, groups, height, width)) |
||||
# Transpose |
||||
x = x.transpose((0, 2, 1, 3, 4)) |
||||
# Reshape |
||||
x = x.reshape((batchsize, channels_per_group * groups, height, width)) |
||||
return x |
||||
|
||||
|
||||
class CondenseLGC(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
activation="ReLU", ): |
||||
super(CondenseLGC, self).__init__() |
||||
self.in_channels = in_channels |
||||
self.out_channels = out_channels |
||||
self.groups = groups |
||||
self.norm = nn.BatchNorm2D(self.in_channels) |
||||
if activation == "ReLU": |
||||
self.activation = nn.ReLU() |
||||
elif activation == "HS": |
||||
self.activation = HS() |
||||
else: |
||||
raise NotImplementedError |
||||
self.conv = nn.Conv2D( |
||||
self.in_channels, |
||||
self.out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=self.groups, |
||||
bias_attr=False, ) |
||||
self.register_buffer( |
||||
"index", paddle.zeros( |
||||
(self.in_channels, ), dtype="int64")) |
||||
|
||||
def forward(self, x): |
||||
x = paddle.index_select(x, self.index, axis=1) |
||||
x = self.norm(x) |
||||
x = self.activation(x) |
||||
x = self.conv(x) |
||||
x = ShuffleLayer(x, self.groups) |
||||
return x |
||||
|
||||
|
||||
class CondenseSFR(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
activation="ReLU", ): |
||||
super(CondenseSFR, self).__init__() |
||||
self.in_channels = in_channels |
||||
self.out_channels = out_channels |
||||
self.groups = groups |
||||
self.norm = nn.BatchNorm2D(self.in_channels) |
||||
if activation == "ReLU": |
||||
self.activation = nn.ReLU() |
||||
elif activation == "HS": |
||||
self.activation = HS() |
||||
else: |
||||
raise NotImplementedError |
||||
self.conv = nn.Conv2D( |
||||
self.in_channels, |
||||
self.out_channels, |
||||
kernel_size=kernel_size, |
||||
padding=padding, |
||||
groups=self.groups, |
||||
bias_attr=False, |
||||
stride=stride, ) |
||||
self.register_buffer("index", |
||||
paddle.zeros( |
||||
(self.out_channels, self.out_channels))) |
||||
|
||||
def forward(self, x): |
||||
x = self.norm(x) |
||||
x = self.activation(x) |
||||
x = ShuffleLayerTrans(x, self.groups) |
||||
x = self.conv(x) # SIZE: N, C, H, W |
||||
N, C, H, W = x.shape |
||||
x = x.reshape((N, C, H * W)) |
||||
x = x.transpose((0, 2, 1)) # SIZE: N, HW, C |
||||
# x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C |
||||
x = paddle.matmul(x, self.index) |
||||
x = x.transpose((0, 2, 1)) # SIZE: N, C, HW |
||||
x = x.reshape((N, C, H, W)) # SIZE: N, C, HW |
||||
return x |
||||
|
||||
|
||||
class _SFR_DenseLayer(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
growth_rate, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
activation, |
||||
use_se=False, ): |
||||
super(_SFR_DenseLayer, self).__init__() |
||||
self.group_1x1 = group_1x1 |
||||
self.group_3x3 = group_3x3 |
||||
self.group_trans = group_trans |
||||
self.use_se = use_se |
||||
# 1x1 conv i --> b*k |
||||
self.conv_1 = CondenseLGC( |
||||
in_channels, |
||||
bottleneck * growth_rate, |
||||
kernel_size=1, |
||||
groups=self.group_1x1, |
||||
activation=activation, ) |
||||
# 3x3 conv b*k --> k |
||||
self.conv_2 = Conv( |
||||
bottleneck * growth_rate, |
||||
growth_rate, |
||||
kernel_size=3, |
||||
padding=1, |
||||
groups=self.group_3x3, |
||||
activation=activation, ) |
||||
# 1x1 res conv k(8-16-32)--> i (k*l) |
||||
self.sfr = CondenseSFR( |
||||
growth_rate, |
||||
in_channels, |
||||
kernel_size=1, |
||||
groups=self.group_trans, |
||||
activation=activation, ) |
||||
if self.use_se: |
||||
self.se = SELayer(inplanes=growth_rate, reduction=1) |
||||
|
||||
def forward(self, x): |
||||
x_ = x |
||||
x = self.conv_1(x) |
||||
x = self.conv_2(x) |
||||
if self.use_se: |
||||
x = self.se(x) |
||||
sfr_feature = self.sfr(x) |
||||
y = x_ + sfr_feature |
||||
return paddle.concat([y, x], 1) |
||||
|
||||
|
||||
class _SFR_DenseBlock(nn.Sequential): |
||||
def __init__( |
||||
self, |
||||
num_layers, |
||||
in_channels, |
||||
growth_rate, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
activation, |
||||
use_se, ): |
||||
super(_SFR_DenseBlock, self).__init__() |
||||
for i in range(num_layers): |
||||
layer = _SFR_DenseLayer( |
||||
in_channels + i * growth_rate, |
||||
growth_rate, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
activation, |
||||
use_se, ) |
||||
self.add_sublayer("denselayer_%d" % (i + 1), layer) |
||||
|
||||
|
||||
class _Transition(nn.Layer): |
||||
def __init__(self): |
||||
super(_Transition, self).__init__() |
||||
self.pool = nn.AvgPool2D(kernel_size=2, stride=2) |
||||
|
||||
def forward(self, x): |
||||
x = self.pool(x) |
||||
return x |
||||
|
||||
|
||||
class CondenseNetV2(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
stages, |
||||
growth, |
||||
HS_start_block, |
||||
SE_start_block, |
||||
fc_channel, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
last_se_reduction, |
||||
in_channels=3, |
||||
class_num=1000, ): |
||||
super(CondenseNetV2, self).__init__() |
||||
self.stages = stages |
||||
self.growth = growth |
||||
self.in_channels = in_channels |
||||
self.class_num = class_num |
||||
self.last_se_reduction = last_se_reduction |
||||
assert len(self.stages) == len(self.growth) |
||||
self.progress = 0.0 |
||||
|
||||
self.init_stride = 2 |
||||
self.pool_size = 7 |
||||
|
||||
self.features = nn.Sequential() |
||||
# Initial nChannels should be 3 |
||||
self.num_features = 2 * self.growth[0] |
||||
# Dense-block 1 (224x224) |
||||
self.features.add_sublayer( |
||||
"init_conv", |
||||
nn.Conv2D( |
||||
in_channels, |
||||
self.num_features, |
||||
kernel_size=3, |
||||
stride=self.init_stride, |
||||
padding=1, |
||||
bias_attr=False, ), ) |
||||
for i in range(len(self.stages)): |
||||
activation = "HS" if i >= HS_start_block else "ReLU" |
||||
use_se = True if i >= SE_start_block else False |
||||
# Dense-block i |
||||
self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck, |
||||
activation, use_se) |
||||
|
||||
self.fc = nn.Linear(self.num_features, fc_channel) |
||||
self.fc_act = HS() |
||||
|
||||
# Classifier layer |
||||
if class_num > 0: |
||||
self.classifier = nn.Linear(fc_channel, class_num) |
||||
self._initialize() |
||||
|
||||
def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck, |
||||
activation, use_se): |
||||
# Check if ith is the last one |
||||
last = i == len(self.stages) - 1 |
||||
block = _SFR_DenseBlock( |
||||
num_layers=self.stages[i], |
||||
in_channels=self.num_features, |
||||
growth_rate=self.growth[i], |
||||
group_1x1=group_1x1, |
||||
group_3x3=group_3x3, |
||||
group_trans=group_trans, |
||||
bottleneck=bottleneck, |
||||
activation=activation, |
||||
use_se=use_se, ) |
||||
self.features.add_sublayer("denseblock_%d" % (i + 1), block) |
||||
self.num_features += self.stages[i] * self.growth[i] |
||||
if not last: |
||||
trans = _Transition() |
||||
self.features.add_sublayer("transition_%d" % (i + 1), trans) |
||||
else: |
||||
self.features.add_sublayer("norm_last", |
||||
nn.BatchNorm2D(self.num_features)) |
||||
self.features.add_sublayer("relu_last", nn.ReLU()) |
||||
self.features.add_sublayer("pool_last", |
||||
nn.AvgPool2D(self.pool_size)) |
||||
# if useSE: |
||||
self.features.add_sublayer( |
||||
"se_last", |
||||
SELayer( |
||||
self.num_features, reduction=self.last_se_reduction)) |
||||
|
||||
def forward(self, x): |
||||
features = self.features(x) |
||||
out = features.reshape((features.shape[0], features.shape[1] * |
||||
features.shape[2] * features.shape[3])) |
||||
out = self.fc(out) |
||||
out = self.fc_act(out) |
||||
|
||||
if self.class_num > 0: |
||||
out = self.classifier(out) |
||||
|
||||
return out |
||||
|
||||
def _initialize(self): |
||||
# Initialize |
||||
for m in self.sublayers(): |
||||
if isinstance(m, nn.Conv2D): |
||||
nn.initializer.KaimingNormal()(m.weight) |
||||
elif isinstance(m, nn.BatchNorm2D): |
||||
nn.initializer.Constant(value=1.0)(m.weight) |
||||
nn.initializer.Constant(value=0.0)(m.bias) |
||||
|
||||
|
||||
def CondenseNetV2_a(**kwargs): |
||||
model = CondenseNetV2( |
||||
stages=[1, 1, 4, 6, 8], |
||||
growth=[8, 8, 16, 32, 64], |
||||
HS_start_block=2, |
||||
SE_start_block=3, |
||||
fc_channel=828, |
||||
group_1x1=8, |
||||
group_3x3=8, |
||||
group_trans=8, |
||||
bottleneck=4, |
||||
last_se_reduction=16, |
||||
**kwargs) |
||||
return model |
||||
|
||||
|
||||
def CondenseNetV2_b(**kwargs): |
||||
model = CondenseNetV2( |
||||
stages=[2, 4, 6, 8, 6], |
||||
growth=[6, 12, 24, 48, 96], |
||||
HS_start_block=2, |
||||
SE_start_block=3, |
||||
fc_channel=1024, |
||||
group_1x1=6, |
||||
group_3x3=6, |
||||
group_trans=6, |
||||
bottleneck=4, |
||||
last_se_reduction=16, |
||||
**kwargs) |
||||
return model |
||||
|
||||
|
||||
def CondenseNetV2_c(**kwargs): |
||||
model = CondenseNetV2( |
||||
stages=[4, 6, 8, 10, 8], |
||||
growth=[8, 16, 32, 64, 128], |
||||
HS_start_block=2, |
||||
SE_start_block=3, |
||||
fc_channel=1024, |
||||
group_1x1=8, |
||||
group_3x3=8, |
||||
group_trans=8, |
||||
bottleneck=4, |
||||
last_se_reduction=16, |
||||
**kwargs) |
||||
return model |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This code is based on https://github.com/AgentMaker/Paddle-Image-Models |
||||
Ths copyright of AgentMaker/Paddle-Image-Models is as follows: |
||||
Apache License [see LICENSE for details] |
||||
""" |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
|
||||
__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"] |
||||
|
||||
|
||||
class SELayer(nn.Layer): |
||||
def __init__(self, inplanes, reduction=16): |
||||
super(SELayer, self).__init__() |
||||
self.avg_pool = nn.AdaptiveAvgPool2D(1) |
||||
self.fc = nn.Sequential( |
||||
nn.Linear( |
||||
inplanes, inplanes // reduction, bias_attr=False), |
||||
nn.ReLU(), |
||||
nn.Linear( |
||||
inplanes // reduction, inplanes, bias_attr=False), |
||||
nn.Sigmoid(), ) |
||||
|
||||
def forward(self, x): |
||||
b, c, _, _ = x.shape |
||||
y = self.avg_pool(x).reshape((b, c)) |
||||
y = self.fc(y).reshape((b, c, 1, 1)) |
||||
return x * paddle.expand(y, shape=x.shape) |
||||
|
||||
|
||||
class HS(nn.Layer): |
||||
def __init__(self): |
||||
super(HS, self).__init__() |
||||
self.relu6 = nn.ReLU6() |
||||
|
||||
def forward(self, inputs): |
||||
return inputs * self.relu6(inputs + 3) / 6 |
||||
|
||||
|
||||
class Conv(nn.Sequential): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
activation="ReLU", |
||||
bn_momentum=0.9, ): |
||||
super(Conv, self).__init__() |
||||
self.add_sublayer( |
||||
"norm", nn.BatchNorm2D( |
||||
in_channels, momentum=bn_momentum)) |
||||
if activation == "ReLU": |
||||
self.add_sublayer("activation", nn.ReLU()) |
||||
elif activation == "HS": |
||||
self.add_sublayer("activation", HS()) |
||||
else: |
||||
raise NotImplementedError |
||||
self.add_sublayer( |
||||
"conv", |
||||
nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
bias_attr=False, |
||||
groups=groups, ), ) |
||||
|
||||
|
||||
def ShuffleLayer(x, groups): |
||||
batchsize, num_channels, height, width = x.shape |
||||
channels_per_group = num_channels // groups |
||||
# Reshape |
||||
x = x.reshape((batchsize, groups, channels_per_group, height, width)) |
||||
# Transpose |
||||
x = x.transpose((0, 2, 1, 3, 4)) |
||||
# Reshape |
||||
x = x.reshape((batchsize, groups * channels_per_group, height, width)) |
||||
return x |
||||
|
||||
|
||||
def ShuffleLayerTrans(x, groups): |
||||
batchsize, num_channels, height, width = x.shape |
||||
channels_per_group = num_channels // groups |
||||
# Reshape |
||||
x = x.reshape((batchsize, channels_per_group, groups, height, width)) |
||||
# Transpose |
||||
x = x.transpose((0, 2, 1, 3, 4)) |
||||
# Reshape |
||||
x = x.reshape((batchsize, channels_per_group * groups, height, width)) |
||||
return x |
||||
|
||||
|
||||
class CondenseLGC(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
activation="ReLU", ): |
||||
super(CondenseLGC, self).__init__() |
||||
self.in_channels = in_channels |
||||
self.out_channels = out_channels |
||||
self.groups = groups |
||||
self.norm = nn.BatchNorm2D(self.in_channels) |
||||
if activation == "ReLU": |
||||
self.activation = nn.ReLU() |
||||
elif activation == "HS": |
||||
self.activation = HS() |
||||
else: |
||||
raise NotImplementedError |
||||
self.conv = nn.Conv2D( |
||||
self.in_channels, |
||||
self.out_channels, |
||||
kernel_size=kernel_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=self.groups, |
||||
bias_attr=False, ) |
||||
self.register_buffer( |
||||
"index", paddle.zeros( |
||||
(self.in_channels, ), dtype="int64")) |
||||
|
||||
def forward(self, x): |
||||
x = paddle.index_select(x, self.index, axis=1) |
||||
x = self.norm(x) |
||||
x = self.activation(x) |
||||
x = self.conv(x) |
||||
x = ShuffleLayer(x, self.groups) |
||||
return x |
||||
|
||||
|
||||
class CondenseSFR(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size, |
||||
stride=1, |
||||
padding=0, |
||||
groups=1, |
||||
activation="ReLU", ): |
||||
super(CondenseSFR, self).__init__() |
||||
self.in_channels = in_channels |
||||
self.out_channels = out_channels |
||||
self.groups = groups |
||||
self.norm = nn.BatchNorm2D(self.in_channels) |
||||
if activation == "ReLU": |
||||
self.activation = nn.ReLU() |
||||
elif activation == "HS": |
||||
self.activation = HS() |
||||
else: |
||||
raise NotImplementedError |
||||
self.conv = nn.Conv2D( |
||||
self.in_channels, |
||||
self.out_channels, |
||||
kernel_size=kernel_size, |
||||
padding=padding, |
||||
groups=self.groups, |
||||
bias_attr=False, |
||||
stride=stride, ) |
||||
self.register_buffer("index", |
||||
paddle.zeros( |
||||
(self.out_channels, self.out_channels))) |
||||
|
||||
def forward(self, x): |
||||
x = self.norm(x) |
||||
x = self.activation(x) |
||||
x = ShuffleLayerTrans(x, self.groups) |
||||
x = self.conv(x) # SIZE: N, C, H, W |
||||
N, C, H, W = x.shape |
||||
x = x.reshape((N, C, H * W)) |
||||
x = x.transpose((0, 2, 1)) # SIZE: N, HW, C |
||||
# x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C |
||||
x = paddle.matmul(x, self.index) |
||||
x = x.transpose((0, 2, 1)) # SIZE: N, C, HW |
||||
x = x.reshape((N, C, H, W)) # SIZE: N, C, HW |
||||
return x |
||||
|
||||
|
||||
class _SFR_DenseLayer(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
in_channels, |
||||
growth_rate, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
activation, |
||||
use_se=False, ): |
||||
super(_SFR_DenseLayer, self).__init__() |
||||
self.group_1x1 = group_1x1 |
||||
self.group_3x3 = group_3x3 |
||||
self.group_trans = group_trans |
||||
self.use_se = use_se |
||||
# 1x1 conv i --> b*k |
||||
self.conv_1 = CondenseLGC( |
||||
in_channels, |
||||
bottleneck * growth_rate, |
||||
kernel_size=1, |
||||
groups=self.group_1x1, |
||||
activation=activation, ) |
||||
# 3x3 conv b*k --> k |
||||
self.conv_2 = Conv( |
||||
bottleneck * growth_rate, |
||||
growth_rate, |
||||
kernel_size=3, |
||||
padding=1, |
||||
groups=self.group_3x3, |
||||
activation=activation, ) |
||||
# 1x1 res conv k(8-16-32)--> i (k*l) |
||||
self.sfr = CondenseSFR( |
||||
growth_rate, |
||||
in_channels, |
||||
kernel_size=1, |
||||
groups=self.group_trans, |
||||
activation=activation, ) |
||||
if self.use_se: |
||||
self.se = SELayer(inplanes=growth_rate, reduction=1) |
||||
|
||||
def forward(self, x): |
||||
x_ = x |
||||
x = self.conv_1(x) |
||||
x = self.conv_2(x) |
||||
if self.use_se: |
||||
x = self.se(x) |
||||
sfr_feature = self.sfr(x) |
||||
y = x_ + sfr_feature |
||||
return paddle.concat([y, x], 1) |
||||
|
||||
|
||||
class _SFR_DenseBlock(nn.Sequential): |
||||
def __init__( |
||||
self, |
||||
num_layers, |
||||
in_channels, |
||||
growth_rate, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
activation, |
||||
use_se, ): |
||||
super(_SFR_DenseBlock, self).__init__() |
||||
for i in range(num_layers): |
||||
layer = _SFR_DenseLayer( |
||||
in_channels + i * growth_rate, |
||||
growth_rate, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
activation, |
||||
use_se, ) |
||||
self.add_sublayer("denselayer_%d" % (i + 1), layer) |
||||
|
||||
|
||||
class _Transition(nn.Layer): |
||||
def __init__(self): |
||||
super(_Transition, self).__init__() |
||||
self.pool = nn.AvgPool2D(kernel_size=2, stride=2) |
||||
|
||||
def forward(self, x): |
||||
x = self.pool(x) |
||||
return x |
||||
|
||||
|
||||
class CondenseNetV2(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
stages, |
||||
growth, |
||||
HS_start_block, |
||||
SE_start_block, |
||||
fc_channel, |
||||
group_1x1, |
||||
group_3x3, |
||||
group_trans, |
||||
bottleneck, |
||||
last_se_reduction, |
||||
in_channels=3, |
||||
class_num=1000, ): |
||||
super(CondenseNetV2, self).__init__() |
||||
self.stages = stages |
||||
self.growth = growth |
||||
self.in_channels = in_channels |
||||
self.class_num = class_num |
||||
self.last_se_reduction = last_se_reduction |
||||
assert len(self.stages) == len(self.growth) |
||||
self.progress = 0.0 |
||||
|
||||
self.init_stride = 2 |
||||
self.pool_size = 7 |
||||
|
||||
self.features = nn.Sequential() |
||||
# Initial nChannels should be 3 |
||||
self.num_features = 2 * self.growth[0] |
||||
# Dense-block 1 (224x224) |
||||
self.features.add_sublayer( |
||||
"init_conv", |
||||
nn.Conv2D( |
||||
in_channels, |
||||
self.num_features, |
||||
kernel_size=3, |
||||
stride=self.init_stride, |
||||
padding=1, |
||||
bias_attr=False, ), ) |
||||
for i in range(len(self.stages)): |
||||
activation = "HS" if i >= HS_start_block else "ReLU" |
||||
use_se = True if i >= SE_start_block else False |
||||
# Dense-block i |
||||
self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck, |
||||
activation, use_se) |
||||
|
||||
self.fc = nn.Linear(self.num_features, fc_channel) |
||||
self.fc_act = HS() |
||||
|
||||
# Classifier layer |
||||
if class_num > 0: |
||||
self.classifier = nn.Linear(fc_channel, class_num) |
||||
self._initialize() |
||||
|
||||
def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck, |
||||
activation, use_se): |
||||
# Check if ith is the last one |
||||
last = i == len(self.stages) - 1 |
||||
block = _SFR_DenseBlock( |
||||
num_layers=self.stages[i], |
||||
in_channels=self.num_features, |
||||
growth_rate=self.growth[i], |
||||
group_1x1=group_1x1, |
||||
group_3x3=group_3x3, |
||||
group_trans=group_trans, |
||||
bottleneck=bottleneck, |
||||
activation=activation, |
||||
use_se=use_se, ) |
||||
self.features.add_sublayer("denseblock_%d" % (i + 1), block) |
||||
self.num_features += self.stages[i] * self.growth[i] |
||||
if not last: |
||||
trans = _Transition() |
||||
self.features.add_sublayer("transition_%d" % (i + 1), trans) |
||||
else: |
||||
self.features.add_sublayer("norm_last", |
||||
nn.BatchNorm2D(self.num_features)) |
||||
self.features.add_sublayer("relu_last", nn.ReLU()) |
||||
self.features.add_sublayer("pool_last", |
||||
nn.AvgPool2D(self.pool_size)) |
||||
# if useSE: |
||||
self.features.add_sublayer( |
||||
"se_last", |
||||
SELayer( |
||||
self.num_features, reduction=self.last_se_reduction)) |
||||
|
||||
def forward(self, x): |
||||
features = self.features(x) |
||||
out = features.reshape((features.shape[0], features.shape[1] * |
||||
features.shape[2] * features.shape[3])) |
||||
out = self.fc(out) |
||||
out = self.fc_act(out) |
||||
|
||||
if self.class_num > 0: |
||||
out = self.classifier(out) |
||||
|
||||
return out |
||||
|
||||
def _initialize(self): |
||||
# Initialize |
||||
for m in self.sublayers(): |
||||
if isinstance(m, nn.Conv2D): |
||||
nn.initializer.KaimingNormal()(m.weight) |
||||
elif isinstance(m, nn.BatchNorm2D): |
||||
nn.initializer.Constant(value=1.0)(m.weight) |
||||
nn.initializer.Constant(value=0.0)(m.bias) |
||||
|
||||
|
||||
def CondenseNetV2_A(**kwargs): |
||||
model = CondenseNetV2( |
||||
stages=[1, 1, 4, 6, 8], |
||||
growth=[8, 8, 16, 32, 64], |
||||
HS_start_block=2, |
||||
SE_start_block=3, |
||||
fc_channel=828, |
||||
group_1x1=8, |
||||
group_3x3=8, |
||||
group_trans=8, |
||||
bottleneck=4, |
||||
last_se_reduction=16, |
||||
**kwargs) |
||||
return model |
||||
|
||||
|
||||
def CondenseNetV2_B(**kwargs): |
||||
model = CondenseNetV2( |
||||
stages=[2, 4, 6, 8, 6], |
||||
growth=[6, 12, 24, 48, 96], |
||||
HS_start_block=2, |
||||
SE_start_block=3, |
||||
fc_channel=1024, |
||||
group_1x1=6, |
||||
group_3x3=6, |
||||
group_trans=6, |
||||
bottleneck=4, |
||||
last_se_reduction=16, |
||||
**kwargs) |
||||
return model |
||||
|
||||
|
||||
def CondenseNetV2_C(**kwargs): |
||||
model = CondenseNetV2( |
||||
stages=[4, 6, 8, 10, 8], |
||||
growth=[8, 16, 32, 64, 128], |
||||
HS_start_block=2, |
||||
SE_start_block=3, |
||||
fc_channel=1024, |
||||
group_1x1=8, |
||||
group_3x3=8, |
||||
group_trans=8, |
||||
bottleneck=4, |
||||
last_se_reduction=16, |
||||
**kwargs) |
||||
return model |
@ -1,8 +0,0 @@ |
||||
# Basic configurations of BIT |
||||
|
||||
_base_: ../_base_/airchange.yaml |
||||
|
||||
save_dir: ./test_tipc/output/cd/bit/ |
||||
|
||||
model: !Node |
||||
type: BIT |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue