|
|
|
@ -20,25 +20,79 @@ import math |
|
|
|
|
|
|
|
|
|
import paddle.nn as nn |
|
|
|
|
import paddle.nn.functional as F |
|
|
|
|
from paddle.vision.models import resnet50 |
|
|
|
|
from paddle import nn |
|
|
|
|
import paddle.nn.functional as F |
|
|
|
|
from paddle.vision.models import resnet |
|
|
|
|
|
|
|
|
|
from .layers import (Identity, ConvReLU, kaiming_normal_init, constant_init) |
|
|
|
|
from paddlers.models.ppdet.modeling import initializer as init |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FPN(nn.Layer): |
|
|
|
|
""" |
|
|
|
|
Module that adds FPN on top of a list of feature maps. |
|
|
|
|
The feature maps are currently supposed to be in increasing depth |
|
|
|
|
order, and must be consecutive. |
|
|
|
|
""" |
|
|
|
|
class FPNConvBlock(nn.Conv2D): |
|
|
|
|
def __init__(self, |
|
|
|
|
in_channels, |
|
|
|
|
out_channels, |
|
|
|
|
kernel_size, |
|
|
|
|
stride=1, |
|
|
|
|
dilation=1): |
|
|
|
|
super(FPNConvBlock, self).__init__( |
|
|
|
|
in_channels, |
|
|
|
|
out_channels, |
|
|
|
|
kernel_size=kernel_size, |
|
|
|
|
stride=stride, |
|
|
|
|
padding=dilation * (kernel_size - 1) // 2, |
|
|
|
|
dilation=dilation) |
|
|
|
|
init.kaiming_uniform_(self.weight, a=1) |
|
|
|
|
init.constant_(self.bias, value=0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DefaultConvBlock(nn.Conv2D): |
|
|
|
|
def __init__(self, |
|
|
|
|
in_channels_list, |
|
|
|
|
in_channels, |
|
|
|
|
out_channels, |
|
|
|
|
conv_block=ConvReLU, |
|
|
|
|
top_blocks=None): |
|
|
|
|
kernel_size, |
|
|
|
|
stride=1, |
|
|
|
|
padding=0, |
|
|
|
|
bias_attr=None): |
|
|
|
|
super(DefaultConvBlock, self).__init__( |
|
|
|
|
in_channels, |
|
|
|
|
out_channels, |
|
|
|
|
kernel_size, |
|
|
|
|
stride=stride, |
|
|
|
|
padding=padding, |
|
|
|
|
bias_attr=bias_attr) |
|
|
|
|
init.kaiming_uniform_(self.weight, a=math.sqrt(5)) |
|
|
|
|
if self.bias is not None: |
|
|
|
|
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) |
|
|
|
|
bound = 1 / math.sqrt(fan_in) |
|
|
|
|
init.uniform_(self.bias, -bound, bound) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResNetEncoder(nn.Layer): |
|
|
|
|
def __init__(self, backbone='resnet50', in_channels=3, pretrained=True): |
|
|
|
|
super(ResNetEncoder, self).__init__() |
|
|
|
|
self.resnet = getattr(resnet, backbone)(pretrained=pretrained) |
|
|
|
|
if in_channels != 3: |
|
|
|
|
self.resnet.conv1 = nn.Conv2D( |
|
|
|
|
in_channels, 64, 7, stride=2, padding=3, bias_attr=False) |
|
|
|
|
|
|
|
|
|
for layer in self.resnet.sublayers(): |
|
|
|
|
if isinstance(layer, (nn.BatchNorm2D, nn.SyncBatchNorm)): |
|
|
|
|
layer._momentum = 0.1 |
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
x = self.resnet.conv1(x) |
|
|
|
|
x = self.resnet.bn1(x) |
|
|
|
|
x = self.resnet.relu(x) |
|
|
|
|
x = self.resnet.maxpool(x) |
|
|
|
|
|
|
|
|
|
c2 = self.resnet.layer1(x) |
|
|
|
|
c3 = self.resnet.layer2(c2) |
|
|
|
|
c4 = self.resnet.layer3(c3) |
|
|
|
|
c5 = self.resnet.layer4(c4) |
|
|
|
|
|
|
|
|
|
return [c2, c3, c4, c5] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FPN(nn.Layer): |
|
|
|
|
def __init__(self, in_channels_list, out_channels, conv_block=FPNConvBlock): |
|
|
|
|
super(FPN, self).__init__() |
|
|
|
|
|
|
|
|
|
inner_blocks = [] |
|
|
|
@ -46,17 +100,10 @@ class FPN(nn.Layer): |
|
|
|
|
for idx, in_channels in enumerate(in_channels_list, 1): |
|
|
|
|
if in_channels == 0: |
|
|
|
|
continue |
|
|
|
|
inner_block_module = conv_block(in_channels, out_channels, 1) |
|
|
|
|
layer_block_module = conv_block(out_channels, out_channels, 3, 1) |
|
|
|
|
for module in [inner_block_module, layer_block_module]: |
|
|
|
|
for m in module.sublayers(): |
|
|
|
|
if isinstance(m, nn.Conv2D): |
|
|
|
|
kaiming_normal_init(m.weight) |
|
|
|
|
inner_blocks.append(inner_block_module) |
|
|
|
|
layer_blocks.append(layer_block_module) |
|
|
|
|
inner_blocks.append(conv_block(in_channels, out_channels, 1)) |
|
|
|
|
layer_blocks.append(conv_block(out_channels, out_channels, 3, 1)) |
|
|
|
|
self.inner_blocks = nn.LayerList(inner_blocks) |
|
|
|
|
self.layer_blocks = nn.LayerList(layer_blocks) |
|
|
|
|
self.top_blocks = top_blocks |
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
last_inner = self.inner_blocks[-1](x[-1]) |
|
|
|
@ -69,80 +116,55 @@ class FPN(nn.Layer): |
|
|
|
|
inner_lateral = inner_block(feature) |
|
|
|
|
last_inner = inner_lateral + inner_top_down |
|
|
|
|
results.insert(0, layer_block(last_inner)) |
|
|
|
|
if isinstance(self.top_blocks, LastLevelP6P7): |
|
|
|
|
last_results = self.top_blocks(x[-1], results[-1]) |
|
|
|
|
results.extend(last_results) |
|
|
|
|
elif isinstance(self.top_blocks, LastLevelMaxPool): |
|
|
|
|
last_results = self.top_blocks(results[-1]) |
|
|
|
|
results.extend(last_results) |
|
|
|
|
return tuple(results) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LastLevelMaxPool(nn.Layer): |
|
|
|
|
def forward(self, x): |
|
|
|
|
return [F.max_pool2d(x, 1, 2, 0)] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LastLevelP6P7(nn.Layer): |
|
|
|
|
""" |
|
|
|
|
This module is used in RetinaNet to generate extra layers, P6 and P7. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
def __init__(self, in_channels, out_channels): |
|
|
|
|
super(LastLevelP6P7, self).__init__() |
|
|
|
|
self.p6 = nn.Conv2D(in_channels, out_channels, 3, 2, 1) |
|
|
|
|
self.p7 = nn.Conv2D(out_channels, out_channels, 3, 2, 1) |
|
|
|
|
for module in [self.p6, self.p7]: |
|
|
|
|
for m in module.sublayers(): |
|
|
|
|
kaiming_normal_init(m.weight) |
|
|
|
|
constant_init(m.bias, value=0) |
|
|
|
|
self.use_P5 = in_channels == out_channels |
|
|
|
|
|
|
|
|
|
def forward(self, c5, p5): |
|
|
|
|
x = p5 if self.use_P5 else c5 |
|
|
|
|
p6 = self.p6(x) |
|
|
|
|
p7 = self.p7(F.relu(p6)) |
|
|
|
|
return [p6, p7] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SceneRelation(nn.Layer): |
|
|
|
|
class FSRelation(nn.Layer): |
|
|
|
|
def __init__(self, |
|
|
|
|
in_channels, |
|
|
|
|
channel_list, |
|
|
|
|
channels_list, |
|
|
|
|
out_channels, |
|
|
|
|
scale_aware_proj=True): |
|
|
|
|
super(SceneRelation, self).__init__() |
|
|
|
|
scale_aware_proj=True, |
|
|
|
|
conv_block=DefaultConvBlock): |
|
|
|
|
super(FSRelation, self).__init__() |
|
|
|
|
|
|
|
|
|
self.scale_aware_proj = scale_aware_proj |
|
|
|
|
if scale_aware_proj: |
|
|
|
|
if self.scale_aware_proj: |
|
|
|
|
self.scene_encoder = nn.LayerList([ |
|
|
|
|
nn.Sequential( |
|
|
|
|
nn.Conv2D(in_channels, out_channels, 1), |
|
|
|
|
nn.ReLU(), nn.Conv2D(out_channels, out_channels, 1)) |
|
|
|
|
for _ in range(len(channel_list)) |
|
|
|
|
conv_block(in_channels, out_channels, 1), |
|
|
|
|
nn.ReLU(), conv_block(out_channels, out_channels, 1)) |
|
|
|
|
for _ in range(len(channels_list)) |
|
|
|
|
]) |
|
|
|
|
else: |
|
|
|
|
# 2mlp |
|
|
|
|
self.scene_encoder = nn.Sequential( |
|
|
|
|
nn.Conv2D(in_channels, out_channels, 1), |
|
|
|
|
nn.ReLU(), |
|
|
|
|
nn.Conv2D(out_channels, out_channels, 1), ) |
|
|
|
|
conv_block(in_channels, out_channels, 1), |
|
|
|
|
nn.ReLU(), conv_block(out_channels, out_channels, 1)) |
|
|
|
|
|
|
|
|
|
self.content_encoders = nn.LayerList() |
|
|
|
|
self.feature_reencoders = nn.LayerList() |
|
|
|
|
for c in channel_list: |
|
|
|
|
for channel in channels_list: |
|
|
|
|
self.content_encoders.append( |
|
|
|
|
nn.Sequential( |
|
|
|
|
nn.Conv2D(c, out_channels, 1), |
|
|
|
|
nn.BatchNorm2D(out_channels), nn.ReLU())) |
|
|
|
|
conv_block( |
|
|
|
|
channel, out_channels, 1, bias_attr=True), |
|
|
|
|
nn.BatchNorm2D( |
|
|
|
|
out_channels, momentum=0.1), |
|
|
|
|
nn.ReLU())) |
|
|
|
|
self.feature_reencoders.append( |
|
|
|
|
nn.Sequential( |
|
|
|
|
nn.Conv2D(c, out_channels, 1), |
|
|
|
|
nn.BatchNorm2D(out_channels), nn.ReLU())) |
|
|
|
|
conv_block( |
|
|
|
|
channel, out_channels, 1, bias_attr=True), |
|
|
|
|
nn.BatchNorm2D( |
|
|
|
|
out_channels, momentum=0.1), |
|
|
|
|
nn.ReLU())) |
|
|
|
|
|
|
|
|
|
self.normalizer = nn.Sigmoid() |
|
|
|
|
|
|
|
|
|
def forward(self, scene_feature, features: list): |
|
|
|
|
def forward(self, scene_feature, feature_list): |
|
|
|
|
content_feats = [ |
|
|
|
|
c_en(p_feat) |
|
|
|
|
for c_en, p_feat in zip(self.content_encoders, features) |
|
|
|
|
for c_en, p_feat in zip(self.content_encoders, feature_list) |
|
|
|
|
] |
|
|
|
|
if self.scale_aware_proj: |
|
|
|
|
scene_feats = [op(scene_feature) for op in self.scene_encoder] |
|
|
|
@ -157,7 +179,8 @@ class SceneRelation(nn.Layer): |
|
|
|
|
for cf in content_feats |
|
|
|
|
] |
|
|
|
|
p_feats = [ |
|
|
|
|
op(p_feat) for op, p_feat in zip(self.feature_reencoders, features) |
|
|
|
|
op(p_feat) |
|
|
|
|
for op, p_feat in zip(self.feature_reencoders, feature_list) |
|
|
|
|
] |
|
|
|
|
refined_feats = [r * p for r, p in zip(relations, p_feats)] |
|
|
|
|
return refined_feats |
|
|
|
@ -167,71 +190,40 @@ class AsymmetricDecoder(nn.Layer): |
|
|
|
|
def __init__(self, |
|
|
|
|
in_channels, |
|
|
|
|
out_channels, |
|
|
|
|
in_feat_output_strides=(4, 8, 16, 32), |
|
|
|
|
out_feat_output_stride=4, |
|
|
|
|
norm_fn=nn.BatchNorm2D, |
|
|
|
|
num_groups_gn=None): |
|
|
|
|
in_feature_output_strides=(4, 8, 16, 32), |
|
|
|
|
out_feature_output_stride=4, |
|
|
|
|
conv_block=DefaultConvBlock): |
|
|
|
|
super(AsymmetricDecoder, self).__init__() |
|
|
|
|
if norm_fn == nn.BatchNorm2D: |
|
|
|
|
norm_fn_args = dict(num_features=out_channels) |
|
|
|
|
elif norm_fn == nn.GroupNorm: |
|
|
|
|
if num_groups_gn is None: |
|
|
|
|
raise ValueError( |
|
|
|
|
'When norm_fn is nn.GroupNorm, num_groups_gn is needed.') |
|
|
|
|
norm_fn_args = dict( |
|
|
|
|
num_groups=num_groups_gn, num_channels=out_channels) |
|
|
|
|
else: |
|
|
|
|
raise ValueError('Type of {} is not support.'.format(type(norm_fn))) |
|
|
|
|
|
|
|
|
|
self.blocks = nn.LayerList() |
|
|
|
|
for in_feat_os in in_feat_output_strides: |
|
|
|
|
num_upsample = int(math.log2(int(in_feat_os))) - int( |
|
|
|
|
math.log2(int(out_feat_output_stride))) |
|
|
|
|
for in_feature_output_stride in in_feature_output_strides: |
|
|
|
|
num_upsample = int(math.log2(int(in_feature_output_stride))) - int( |
|
|
|
|
math.log2(int(out_feature_output_stride))) |
|
|
|
|
num_layers = num_upsample if num_upsample != 0 else 1 |
|
|
|
|
self.blocks.append( |
|
|
|
|
nn.Sequential(*[ |
|
|
|
|
nn.Sequential( |
|
|
|
|
nn.Conv2D( |
|
|
|
|
conv_block( |
|
|
|
|
in_channels if idx == 0 else out_channels, |
|
|
|
|
out_channels, |
|
|
|
|
3, |
|
|
|
|
1, |
|
|
|
|
1, |
|
|
|
|
bias_attr=False), |
|
|
|
|
norm_fn(**norm_fn_args) |
|
|
|
|
if norm_fn is not None else Identity(), |
|
|
|
|
nn.BatchNorm2D( |
|
|
|
|
out_channels, momentum=0.1), |
|
|
|
|
nn.ReLU(), |
|
|
|
|
nn.UpsamplingBilinear2D(scale_factor=2) if num_upsample |
|
|
|
|
!= 0 else Identity(), ) for idx in range(num_layers) |
|
|
|
|
!= 0 else nn.Identity(), ) for idx in range(num_layers) |
|
|
|
|
])) |
|
|
|
|
|
|
|
|
|
def forward(self, feat_list: list): |
|
|
|
|
inner_feat_list = [] |
|
|
|
|
def forward(self, feature_list): |
|
|
|
|
inner_feature_list = [] |
|
|
|
|
for idx, block in enumerate(self.blocks): |
|
|
|
|
decoder_feat = block(feat_list[idx]) |
|
|
|
|
inner_feat_list.append(decoder_feat) |
|
|
|
|
out_feat = sum(inner_feat_list) / 4. |
|
|
|
|
return out_feat |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResNet50Encoder(nn.Layer): |
|
|
|
|
def __init__(self, in_ch=3, pretrained=True): |
|
|
|
|
super(ResNet50Encoder, self).__init__() |
|
|
|
|
self.resnet = resnet50(pretrained=pretrained) |
|
|
|
|
if in_ch != 3: |
|
|
|
|
self.resnet.conv1 = nn.Conv2D( |
|
|
|
|
in_ch, 64, kernel_size=7, stride=2, padding=3, bias_attr=False) |
|
|
|
|
|
|
|
|
|
def forward(self, inputs): |
|
|
|
|
x = inputs |
|
|
|
|
x = self.resnet.conv1(x) |
|
|
|
|
x = self.resnet.bn1(x) |
|
|
|
|
x = self.resnet.relu(x) |
|
|
|
|
x = self.resnet.maxpool(x) |
|
|
|
|
c2 = self.resnet.layer1(x) |
|
|
|
|
c3 = self.resnet.layer2(c2) |
|
|
|
|
c4 = self.resnet.layer3(c3) |
|
|
|
|
c5 = self.resnet.layer4(c4) |
|
|
|
|
return [c2, c3, c4, c5] |
|
|
|
|
decoder_feature = block(feature_list[idx]) |
|
|
|
|
inner_feature_list.append(decoder_feature) |
|
|
|
|
out_feature = sum(inner_feature_list) / len(inner_feature_list) |
|
|
|
|
return out_feature |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FarSeg(nn.Layer): |
|
|
|
@ -239,50 +231,66 @@ class FarSeg(nn.Layer): |
|
|
|
|
The FarSeg implementation based on PaddlePaddle. |
|
|
|
|
|
|
|
|
|
The original article refers to |
|
|
|
|
Zheng, Zhuo, et al. "Foreground-Aware Relation Network for Geospatial Object Segmentation in High Spatial Resolution |
|
|
|
|
Remote Sensing Imagery" |
|
|
|
|
(https://openaccess.thecvf.com/content_CVPR_2020/papers/Zheng_Foreground-Aware_Relation_Network_for_Geospatial_Object_Segmentation_in_High_Spatial_CVPR_2020_paper.pdf) |
|
|
|
|
Zheng Z, Zhong Y, Wang J, et al. Foreground-aware relation network for geospatial object segmentation in |
|
|
|
|
high spatial resolution remote sensing imagery[C]//Proceedings of the IEEE/CVF conference on computer vision |
|
|
|
|
and pattern recognition. 2020: 4096-4105. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
in_channels (int, optional): Number of bands of the input images. Default: 3. |
|
|
|
|
num_classes (int, optional): Number of target classes. Default: 16. |
|
|
|
|
fpn_ch_list (list[int]|tuple[int], optional): Channel list of the FPN. Default: (256, 512, 1024, 2048). |
|
|
|
|
mid_ch (int, optional): Output channels of the FPN. Default: 256. |
|
|
|
|
out_ch (int, optional): Output channels of the decoder. Default: 128. |
|
|
|
|
sr_ch_list (list[int]|tuple[int], optional): Channel list of the foreground-scene relation module. Default: (256, 256, 256, 256). |
|
|
|
|
pretrained_encoder (bool, optional): Whether to use a pretrained encoder. Default: True. |
|
|
|
|
in_channels (int): The number of image channels for the input model. Default: 3. |
|
|
|
|
num_classes (int): The unique number of target classes. Default: 16. |
|
|
|
|
backbone (str): A backbone network, models available in `paddle.vision.models.resnet`. Default: resnet50. |
|
|
|
|
backbone_pretrained (bool): Whether the backbone network uses IMAGENET pretrained weights. Default: True. |
|
|
|
|
fpn_out_channels (int): The number of channels output by the feature pyramid network. Default: 256. |
|
|
|
|
fsr_out_channels (int): The number of channels output by the F-S relation module. Default: 256. |
|
|
|
|
scale_aware_proj (bool): Whether to use scale awareness in F-S relation module. Default: True. |
|
|
|
|
decoder_out_channels (int): The number of channels output by the decoder. Default: 128. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
def __init__(self, |
|
|
|
|
in_channels=3, |
|
|
|
|
num_classes=16, |
|
|
|
|
fpn_ch_list=(256, 512, 1024, 2048), |
|
|
|
|
mid_ch=256, |
|
|
|
|
out_ch=128, |
|
|
|
|
sr_ch_list=(256, 256, 256, 256), |
|
|
|
|
pretrained_encoder=True): |
|
|
|
|
backbone='resnet50', |
|
|
|
|
backbone_pretrained=True, |
|
|
|
|
fpn_out_channels=256, |
|
|
|
|
fsr_out_channels=256, |
|
|
|
|
scale_aware_proj=True, |
|
|
|
|
decoder_out_channels=128): |
|
|
|
|
super(FarSeg, self).__init__() |
|
|
|
|
self.en = ResNet50Encoder(in_channels, pretrained_encoder) |
|
|
|
|
self.fpn = FPN(in_channels_list=fpn_ch_list, out_channels=mid_ch) |
|
|
|
|
|
|
|
|
|
backbone = backbone.lower() |
|
|
|
|
self.encoder = ResNetEncoder( |
|
|
|
|
backbone=backbone, |
|
|
|
|
in_channels=in_channels, |
|
|
|
|
pretrained=backbone_pretrained) |
|
|
|
|
|
|
|
|
|
fpn_max_in_channels = 2048 |
|
|
|
|
if backbone in ['resnet18', 'resnet34']: |
|
|
|
|
fpn_max_in_channels = 512 |
|
|
|
|
self.fpn = FPN(in_channels_list=[ |
|
|
|
|
fpn_max_in_channels // (2**(3 - i)) for i in range(4) |
|
|
|
|
], |
|
|
|
|
out_channels=fpn_out_channels) |
|
|
|
|
self.gap = nn.AdaptiveAvgPool2D(1) |
|
|
|
|
self.fsr = FSRelation( |
|
|
|
|
in_channels=fpn_max_in_channels, |
|
|
|
|
channels_list=[fpn_out_channels] * 4, |
|
|
|
|
out_channels=fsr_out_channels, |
|
|
|
|
scale_aware_proj=scale_aware_proj) |
|
|
|
|
|
|
|
|
|
self.decoder = AsymmetricDecoder( |
|
|
|
|
in_channels=mid_ch, out_channels=out_ch) |
|
|
|
|
self.cls_pred_conv = nn.Conv2D(out_ch, num_classes, 1) |
|
|
|
|
self.upsample4x_op = nn.UpsamplingBilinear2D(scale_factor=4) |
|
|
|
|
self.scene_relation = True if sr_ch_list is not None else False |
|
|
|
|
if self.scene_relation: |
|
|
|
|
self.gap = nn.AdaptiveAvgPool2D(1) |
|
|
|
|
self.sr = SceneRelation(fpn_ch_list[-1], sr_ch_list, mid_ch) |
|
|
|
|
in_channels=fsr_out_channels, out_channels=decoder_out_channels) |
|
|
|
|
|
|
|
|
|
self.cls_head = nn.Sequential( |
|
|
|
|
DefaultConvBlock(decoder_out_channels, num_classes, 1), |
|
|
|
|
nn.UpsamplingBilinear2D(scale_factor=4)) |
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
feat_list = self.en(x) |
|
|
|
|
fpn_feat_list = self.fpn(feat_list) |
|
|
|
|
if self.scene_relation: |
|
|
|
|
c5 = feat_list[-1] |
|
|
|
|
c6 = self.gap(c5) |
|
|
|
|
refined_fpn_feat_list = self.sr(c6, fpn_feat_list) |
|
|
|
|
else: |
|
|
|
|
refined_fpn_feat_list = fpn_feat_list |
|
|
|
|
final_feat = self.decoder(refined_fpn_feat_list) |
|
|
|
|
cls_pred = self.cls_pred_conv(final_feat) |
|
|
|
|
cls_pred = self.upsample4x_op(cls_pred) |
|
|
|
|
return [cls_pred] |
|
|
|
|
feature_list = self.encoder(x) |
|
|
|
|
|
|
|
|
|
fpn_feature_list = self.fpn(feature_list) |
|
|
|
|
scene_feature = self.gap(feature_list[-1]) |
|
|
|
|
refined_feature_list = self.fsr(scene_feature, fpn_feature_list) |
|
|
|
|
|
|
|
|
|
feature = self.decoder(refined_feature_list) |
|
|
|
|
logit = self.cls_head(feature) |
|
|
|
|
return [logit] |
|
|
|
|