You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
739 lines
23 KiB
739 lines
23 KiB
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
# Code was based on https://github.com/zhanghang1989/ResNeSt |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import numpy as np |
|
import paddle |
|
import math |
|
import paddle.nn as nn |
|
import paddle.nn.functional as F |
|
from paddle import ParamAttr |
|
from paddle.nn.initializer import KaimingNormal |
|
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout |
|
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D |
|
from paddle.regularizer import L2Decay |
|
|
|
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url |
|
|
|
MODEL_URLS = { |
|
"ResNeSt50_fast_1s1x64d": |
|
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams", |
|
"ResNeSt50": |
|
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams", |
|
"ResNeSt101": |
|
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams", |
|
} |
|
|
|
__all__ = list(MODEL_URLS.keys()) |
|
|
|
|
|
class ConvBNLayer(nn.Layer): |
|
def __init__(self, |
|
num_channels, |
|
num_filters, |
|
filter_size, |
|
stride=1, |
|
dilation=1, |
|
groups=1, |
|
act=None, |
|
name=None): |
|
super(ConvBNLayer, self).__init__() |
|
|
|
bn_decay = 0.0 |
|
|
|
self._conv = Conv2D( |
|
in_channels=num_channels, |
|
out_channels=num_filters, |
|
kernel_size=filter_size, |
|
stride=stride, |
|
padding=(filter_size - 1) // 2, |
|
dilation=dilation, |
|
groups=groups, |
|
weight_attr=ParamAttr(name=name + "_weight"), |
|
bias_attr=False) |
|
self._batch_norm = BatchNorm( |
|
num_filters, |
|
act=act, |
|
param_attr=ParamAttr( |
|
name=name + "_scale", regularizer=L2Decay(bn_decay)), |
|
bias_attr=ParamAttr( |
|
name + "_offset", regularizer=L2Decay(bn_decay)), |
|
moving_mean_name=name + "_mean", |
|
moving_variance_name=name + "_variance") |
|
|
|
def forward(self, x): |
|
x = self._conv(x) |
|
x = self._batch_norm(x) |
|
return x |
|
|
|
|
|
class rSoftmax(nn.Layer): |
|
def __init__(self, radix, cardinality): |
|
super(rSoftmax, self).__init__() |
|
self.radix = radix |
|
self.cardinality = cardinality |
|
|
|
def forward(self, x): |
|
cardinality = self.cardinality |
|
radix = self.radix |
|
|
|
batch, r, h, w = x.shape |
|
if self.radix > 1: |
|
x = paddle.reshape( |
|
x=x, |
|
shape=[ |
|
batch, cardinality, radix, |
|
int(r * h * w / cardinality / radix) |
|
]) |
|
x = paddle.transpose(x=x, perm=[0, 2, 1, 3]) |
|
x = nn.functional.softmax(x, axis=1) |
|
x = paddle.reshape(x=x, shape=[batch, r * h * w, 1, 1]) |
|
else: |
|
x = nn.functional.sigmoid(x) |
|
return x |
|
|
|
|
|
class SplatConv(nn.Layer): |
|
def __init__(self, |
|
in_channels, |
|
channels, |
|
kernel_size, |
|
stride=1, |
|
padding=0, |
|
dilation=1, |
|
groups=1, |
|
bias=True, |
|
radix=2, |
|
reduction_factor=4, |
|
rectify_avg=False, |
|
name=None): |
|
super(SplatConv, self).__init__() |
|
|
|
self.radix = radix |
|
|
|
self.conv1 = ConvBNLayer( |
|
num_channels=in_channels, |
|
num_filters=channels * radix, |
|
filter_size=kernel_size, |
|
stride=stride, |
|
groups=groups * radix, |
|
act="relu", |
|
name=name + "_1_weights") |
|
|
|
self.avg_pool2d = AdaptiveAvgPool2D(1) |
|
|
|
inter_channels = int(max(in_channels * radix // reduction_factor, 32)) |
|
|
|
# to calc gap |
|
self.conv2 = ConvBNLayer( |
|
num_channels=channels, |
|
num_filters=inter_channels, |
|
filter_size=1, |
|
stride=1, |
|
groups=groups, |
|
act="relu", |
|
name=name + "_2_weights") |
|
|
|
# to calc atten |
|
self.conv3 = Conv2D( |
|
in_channels=inter_channels, |
|
out_channels=channels * radix, |
|
kernel_size=1, |
|
stride=1, |
|
padding=0, |
|
groups=groups, |
|
weight_attr=ParamAttr( |
|
name=name + "_weights", initializer=KaimingNormal()), |
|
bias_attr=False) |
|
|
|
self.rsoftmax = rSoftmax(radix=radix, cardinality=groups) |
|
|
|
def forward(self, x): |
|
x = self.conv1(x) |
|
|
|
if self.radix > 1: |
|
splited = paddle.split(x, num_or_sections=self.radix, axis=1) |
|
gap = paddle.add_n(splited) |
|
else: |
|
gap = x |
|
|
|
gap = self.avg_pool2d(gap) |
|
gap = self.conv2(gap) |
|
|
|
atten = self.conv3(gap) |
|
atten = self.rsoftmax(atten) |
|
|
|
if self.radix > 1: |
|
attens = paddle.split(atten, num_or_sections=self.radix, axis=1) |
|
y = paddle.add_n([ |
|
paddle.multiply(split, att) |
|
for (att, split) in zip(attens, splited) |
|
]) |
|
else: |
|
y = paddle.multiply(x, atten) |
|
|
|
return y |
|
|
|
|
|
class BottleneckBlock(nn.Layer): |
|
def __init__(self, |
|
inplanes, |
|
planes, |
|
stride=1, |
|
radix=1, |
|
cardinality=1, |
|
bottleneck_width=64, |
|
avd=False, |
|
avd_first=False, |
|
dilation=1, |
|
is_first=False, |
|
rectify_avg=False, |
|
last_gamma=False, |
|
avg_down=False, |
|
name=None): |
|
super(BottleneckBlock, self).__init__() |
|
self.inplanes = inplanes |
|
self.planes = planes |
|
self.stride = stride |
|
self.radix = radix |
|
self.cardinality = cardinality |
|
self.avd = avd |
|
self.avd_first = avd_first |
|
self.dilation = dilation |
|
self.is_first = is_first |
|
self.rectify_avg = rectify_avg |
|
self.last_gamma = last_gamma |
|
self.avg_down = avg_down |
|
|
|
group_width = int(planes * (bottleneck_width / 64.)) * cardinality |
|
|
|
self.conv1 = ConvBNLayer( |
|
num_channels=self.inplanes, |
|
num_filters=group_width, |
|
filter_size=1, |
|
stride=1, |
|
groups=1, |
|
act="relu", |
|
name=name + "_conv1") |
|
|
|
if avd and avd_first and (stride > 1 or is_first): |
|
self.avg_pool2d_1 = AvgPool2D( |
|
kernel_size=3, stride=stride, padding=1) |
|
|
|
if radix >= 1: |
|
self.conv2 = SplatConv( |
|
in_channels=group_width, |
|
channels=group_width, |
|
kernel_size=3, |
|
stride=1, |
|
padding=dilation, |
|
dilation=dilation, |
|
groups=cardinality, |
|
bias=False, |
|
radix=radix, |
|
rectify_avg=rectify_avg, |
|
name=name + "_splat") |
|
else: |
|
self.conv2 = ConvBNLayer( |
|
num_channels=group_width, |
|
num_filters=group_width, |
|
filter_size=3, |
|
stride=1, |
|
dilation=dilation, |
|
groups=cardinality, |
|
act="relu", |
|
name=name + "_conv2") |
|
|
|
if avd and avd_first == False and (stride > 1 or is_first): |
|
self.avg_pool2d_2 = AvgPool2D( |
|
kernel_size=3, stride=stride, padding=1) |
|
|
|
self.conv3 = ConvBNLayer( |
|
num_channels=group_width, |
|
num_filters=planes * 4, |
|
filter_size=1, |
|
stride=1, |
|
groups=1, |
|
act=None, |
|
name=name + "_conv3") |
|
|
|
if stride != 1 or self.inplanes != self.planes * 4: |
|
if avg_down: |
|
if dilation == 1: |
|
self.avg_pool2d_3 = AvgPool2D( |
|
kernel_size=stride, stride=stride, padding=0) |
|
else: |
|
self.avg_pool2d_3 = AvgPool2D( |
|
kernel_size=1, stride=1, padding=0, ceil_mode=True) |
|
|
|
self.conv4 = Conv2D( |
|
in_channels=self.inplanes, |
|
out_channels=planes * 4, |
|
kernel_size=1, |
|
stride=1, |
|
padding=0, |
|
groups=1, |
|
weight_attr=ParamAttr( |
|
name=name + "_weights", initializer=KaimingNormal()), |
|
bias_attr=False) |
|
else: |
|
self.conv4 = Conv2D( |
|
in_channels=self.inplanes, |
|
out_channels=planes * 4, |
|
kernel_size=1, |
|
stride=stride, |
|
padding=0, |
|
groups=1, |
|
weight_attr=ParamAttr( |
|
name=name + "_shortcut_weights", |
|
initializer=KaimingNormal()), |
|
bias_attr=False) |
|
|
|
bn_decay = 0.0 |
|
self._batch_norm = BatchNorm( |
|
planes * 4, |
|
act=None, |
|
param_attr=ParamAttr( |
|
name=name + "_shortcut_scale", |
|
regularizer=L2Decay(bn_decay)), |
|
bias_attr=ParamAttr( |
|
name + "_shortcut_offset", regularizer=L2Decay(bn_decay)), |
|
moving_mean_name=name + "_shortcut_mean", |
|
moving_variance_name=name + "_shortcut_variance") |
|
|
|
def forward(self, x): |
|
short = x |
|
|
|
x = self.conv1(x) |
|
if self.avd and self.avd_first and (self.stride > 1 or self.is_first): |
|
x = self.avg_pool2d_1(x) |
|
|
|
x = self.conv2(x) |
|
|
|
if self.avd and self.avd_first == False and (self.stride > 1 or |
|
self.is_first): |
|
x = self.avg_pool2d_2(x) |
|
|
|
x = self.conv3(x) |
|
|
|
if self.stride != 1 or self.inplanes != self.planes * 4: |
|
if self.avg_down: |
|
short = self.avg_pool2d_3(short) |
|
|
|
short = self.conv4(short) |
|
|
|
short = self._batch_norm(short) |
|
|
|
y = paddle.add(x=short, y=x) |
|
y = F.relu(y) |
|
return y |
|
|
|
|
|
class ResNeStLayer(nn.Layer): |
|
def __init__(self, |
|
inplanes, |
|
planes, |
|
blocks, |
|
radix, |
|
cardinality, |
|
bottleneck_width, |
|
avg_down, |
|
avd, |
|
avd_first, |
|
rectify_avg, |
|
last_gamma, |
|
stride=1, |
|
dilation=1, |
|
is_first=True, |
|
name=None): |
|
super(ResNeStLayer, self).__init__() |
|
self.inplanes = inplanes |
|
self.planes = planes |
|
self.blocks = blocks |
|
self.radix = radix |
|
self.cardinality = cardinality |
|
self.bottleneck_width = bottleneck_width |
|
self.avg_down = avg_down |
|
self.avd = avd |
|
self.avd_first = avd_first |
|
self.rectify_avg = rectify_avg |
|
self.last_gamma = last_gamma |
|
self.is_first = is_first |
|
|
|
if dilation == 1 or dilation == 2: |
|
bottleneck_func = self.add_sublayer( |
|
name + "_bottleneck_0", |
|
BottleneckBlock( |
|
inplanes=self.inplanes, |
|
planes=planes, |
|
stride=stride, |
|
radix=radix, |
|
cardinality=cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
dilation=1, |
|
is_first=is_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
name=name + "_bottleneck_0")) |
|
elif dilation == 4: |
|
bottleneck_func = self.add_sublayer( |
|
name + "_bottleneck_0", |
|
BottleneckBlock( |
|
inplanes=self.inplanes, |
|
planes=planes, |
|
stride=stride, |
|
radix=radix, |
|
cardinality=cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
dilation=2, |
|
is_first=is_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
name=name + "_bottleneck_0")) |
|
else: |
|
raise RuntimeError("=>unknown dilation size") |
|
|
|
self.inplanes = planes * 4 |
|
self.bottleneck_block_list = [bottleneck_func] |
|
for i in range(1, blocks): |
|
curr_name = name + "_bottleneck_" + str(i) |
|
|
|
bottleneck_func = self.add_sublayer( |
|
curr_name, |
|
BottleneckBlock( |
|
inplanes=self.inplanes, |
|
planes=planes, |
|
radix=radix, |
|
cardinality=cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
dilation=dilation, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
name=curr_name)) |
|
self.bottleneck_block_list.append(bottleneck_func) |
|
|
|
def forward(self, x): |
|
for bottleneck_block in self.bottleneck_block_list: |
|
x = bottleneck_block(x) |
|
return x |
|
|
|
|
|
class ResNeSt(nn.Layer): |
|
def __init__(self, |
|
layers, |
|
radix=1, |
|
groups=1, |
|
bottleneck_width=64, |
|
dilated=False, |
|
dilation=1, |
|
deep_stem=False, |
|
stem_width=64, |
|
avg_down=False, |
|
rectify_avg=False, |
|
avd=False, |
|
avd_first=False, |
|
final_drop=0.0, |
|
last_gamma=False, |
|
class_num=1000): |
|
super(ResNeSt, self).__init__() |
|
|
|
self.cardinality = groups |
|
self.bottleneck_width = bottleneck_width |
|
# ResNet-D params |
|
self.inplanes = stem_width * 2 if deep_stem else 64 |
|
self.avg_down = avg_down |
|
self.last_gamma = last_gamma |
|
# ResNeSt params |
|
self.radix = radix |
|
self.avd = avd |
|
self.avd_first = avd_first |
|
|
|
self.deep_stem = deep_stem |
|
self.stem_width = stem_width |
|
self.layers = layers |
|
self.final_drop = final_drop |
|
self.dilated = dilated |
|
self.dilation = dilation |
|
|
|
self.rectify_avg = rectify_avg |
|
|
|
if self.deep_stem: |
|
self.stem = nn.Sequential( |
|
("conv1", ConvBNLayer( |
|
num_channels=3, |
|
num_filters=stem_width, |
|
filter_size=3, |
|
stride=2, |
|
act="relu", |
|
name="conv1")), ("conv2", ConvBNLayer( |
|
num_channels=stem_width, |
|
num_filters=stem_width, |
|
filter_size=3, |
|
stride=1, |
|
act="relu", |
|
name="conv2")), ("conv3", ConvBNLayer( |
|
num_channels=stem_width, |
|
num_filters=stem_width * 2, |
|
filter_size=3, |
|
stride=1, |
|
act="relu", |
|
name="conv3"))) |
|
else: |
|
self.stem = ConvBNLayer( |
|
num_channels=3, |
|
num_filters=stem_width, |
|
filter_size=7, |
|
stride=2, |
|
act="relu", |
|
name="conv1") |
|
|
|
self.max_pool2d = MaxPool2D(kernel_size=3, stride=2, padding=1) |
|
|
|
self.layer1 = ResNeStLayer( |
|
inplanes=self.stem_width * 2 if self.deep_stem else self.stem_width, |
|
planes=64, |
|
blocks=self.layers[0], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=1, |
|
dilation=1, |
|
is_first=False, |
|
name="layer1") |
|
|
|
# return |
|
|
|
self.layer2 = ResNeStLayer( |
|
inplanes=256, |
|
planes=128, |
|
blocks=self.layers[1], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=2, |
|
name="layer2") |
|
|
|
if self.dilated or self.dilation == 4: |
|
self.layer3 = ResNeStLayer( |
|
inplanes=512, |
|
planes=256, |
|
blocks=self.layers[2], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=1, |
|
dilation=2, |
|
name="layer3") |
|
self.layer4 = ResNeStLayer( |
|
inplanes=1024, |
|
planes=512, |
|
blocks=self.layers[3], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=1, |
|
dilation=4, |
|
name="layer4") |
|
elif self.dilation == 2: |
|
self.layer3 = ResNeStLayer( |
|
inplanes=512, |
|
planes=256, |
|
blocks=self.layers[2], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=2, |
|
dilation=1, |
|
name="layer3") |
|
self.layer4 = ResNeStLayer( |
|
inplanes=1024, |
|
planes=512, |
|
blocks=self.layers[3], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=1, |
|
dilation=2, |
|
name="layer4") |
|
else: |
|
self.layer3 = ResNeStLayer( |
|
inplanes=512, |
|
planes=256, |
|
blocks=self.layers[2], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=2, |
|
name="layer3") |
|
self.layer4 = ResNeStLayer( |
|
inplanes=1024, |
|
planes=512, |
|
blocks=self.layers[3], |
|
radix=radix, |
|
cardinality=self.cardinality, |
|
bottleneck_width=bottleneck_width, |
|
avg_down=self.avg_down, |
|
avd=avd, |
|
avd_first=avd_first, |
|
rectify_avg=rectify_avg, |
|
last_gamma=last_gamma, |
|
stride=2, |
|
name="layer4") |
|
|
|
self.pool2d_avg = AdaptiveAvgPool2D(1) |
|
|
|
self.out_channels = 2048 |
|
|
|
stdv = 1.0 / math.sqrt(self.out_channels * 1.0) |
|
|
|
self.out = Linear( |
|
self.out_channels, |
|
class_num, |
|
weight_attr=ParamAttr( |
|
initializer=nn.initializer.Uniform(-stdv, stdv), |
|
name="fc_weights"), |
|
bias_attr=ParamAttr(name="fc_offset")) |
|
|
|
def forward(self, x): |
|
x = self.stem(x) |
|
x = self.max_pool2d(x) |
|
x = self.layer1(x) |
|
x = self.layer2(x) |
|
|
|
x = self.layer3(x) |
|
|
|
x = self.layer4(x) |
|
x = self.pool2d_avg(x) |
|
x = paddle.reshape(x, shape=[-1, self.out_channels]) |
|
x = self.out(x) |
|
return x |
|
|
|
|
|
def _load_pretrained(pretrained, model, model_url, use_ssld=False): |
|
if pretrained is False: |
|
pass |
|
elif pretrained is True: |
|
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) |
|
elif isinstance(pretrained, str): |
|
load_dygraph_pretrain(model, pretrained) |
|
else: |
|
raise RuntimeError( |
|
"pretrained type is not available. Please use `string` or `boolean` type." |
|
) |
|
|
|
|
|
def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs): |
|
model = ResNeSt( |
|
layers=[3, 4, 6, 3], |
|
radix=1, |
|
groups=1, |
|
bottleneck_width=64, |
|
deep_stem=True, |
|
stem_width=32, |
|
avg_down=True, |
|
avd=True, |
|
avd_first=True, |
|
final_drop=0.0, |
|
**kwargs) |
|
_load_pretrained( |
|
pretrained, |
|
model, |
|
MODEL_URLS["ResNeSt50_fast_1s1x64d"], |
|
use_ssld=use_ssld) |
|
return model |
|
|
|
|
|
def ResNeSt50(pretrained=False, use_ssld=False, **kwargs): |
|
model = ResNeSt( |
|
layers=[3, 4, 6, 3], |
|
radix=2, |
|
groups=1, |
|
bottleneck_width=64, |
|
deep_stem=True, |
|
stem_width=32, |
|
avg_down=True, |
|
avd=True, |
|
avd_first=False, |
|
final_drop=0.0, |
|
**kwargs) |
|
_load_pretrained( |
|
pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld) |
|
return model |
|
|
|
|
|
def ResNeSt101(pretrained=False, use_ssld=False, **kwargs): |
|
model = ResNeSt( |
|
layers=[3, 4, 23, 3], |
|
radix=2, |
|
groups=1, |
|
bottleneck_width=64, |
|
deep_stem=True, |
|
stem_width=64, |
|
avg_down=True, |
|
avd=True, |
|
avd_first=False, |
|
final_drop=0.0, |
|
**kwargs) |
|
_load_pretrained( |
|
pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld) |
|
return model
|
|
|