You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

296 lines
12 KiB

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback
import random
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import numpy as np
from paddle.fluid.dataloader.collate import default_collate_fn
from .operators import Transform, Resize, ResizeByShort, _Permute, interp_dict
from .box_utils import jaccard_overlap
from paddlers.utils import logging
class BatchCompose(Transform):
def __init__(self, batch_transforms=None, collate_batch=True):
super(BatchCompose, self).__init__()
self.batch_transforms = batch_transforms
self.collate_batch = collate_batch
def __call__(self, samples):
if self.batch_transforms is not None:
for op in self.batch_transforms:
try:
samples = op(samples)
except Exception as e:
stack_info = traceback.format_exc()
logging.warning("fail to map batch transform [{}] "
"with error: {} and stack:\n{}".format(
op, e, str(stack_info)))
raise e
samples = _Permute()(samples)
extra_key = ['h', 'w', 'flipped']
for k in extra_key:
for sample in samples:
if k in sample:
sample.pop(k)
if self.collate_batch:
batch_data = default_collate_fn(samples)
else:
batch_data = {}
for k in samples[0].keys():
tmp_data = []
for i in range(len(samples)):
tmp_data.append(samples[i][k])
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
# This if assumes that all elements in tmp_data has the same type.
if len(tmp_data) == 0 or not isinstance(tmp_data[0],
(str, bytes)):
tmp_data = np.stack(tmp_data, axis=0)
batch_data[k] = tmp_data
return batch_data
class BatchRandomResize(Transform):
"""
Resize a batch of inputs to random sizes.
Attention: If `interp` is 'RANDOM', the interpolation method will be chosen randomly.
Args:
target_sizes (list[int] | list[list|tuple] | tuple[list|tuple]):
Multiple target sizes, each of which should be an int or list/tuple of length 2.
interp (str, optional): Interpolation method for resizing image(s). One of
{'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}.
Defaults to 'LINEAR'.
Raises:
TypeError: Invalid type of `target_size`.
ValueError: Invalid interpolation method.
See Also:
RandomResize: Resize input to random sizes.
"""
def __init__(self, target_sizes, interp='NEAREST'):
super(BatchRandomResize, self).__init__()
if not (interp == "RANDOM" or interp in interp_dict):
raise ValueError("interp should be one of {}".format(
interp_dict.keys()))
self.interp = interp
assert isinstance(target_sizes, list), \
"target_size must be a list."
for i, item in enumerate(target_sizes):
if isinstance(item, int):
target_sizes[i] = (item, item)
self.target_size = target_sizes
def __call__(self, samples):
height, width = random.choice(self.target_size)
resizer = Resize((height, width), interp=self.interp)
samples = resizer(samples)
return samples
class BatchRandomResizeByShort(Transform):
"""
Resize a batch of inputs to random sizes while keeping the aspect ratio.
Attention: If `interp` is 'RANDOM', the interpolation method will be chosen randomly.
Args:
short_sizes (list[int] | tuple[int]): Target sizes of the shorter side of
the image(s).
max_size (int, optional): Upper bound of longer side of the image(s).
If `max_size` is -1, no upper bound will be applied. Defaults to -1.
interp (str, optional): Interpolation method for resizing image(s). One of
{'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}.
Defaults to 'LINEAR'.
Raises:
TypeError: Invalid type of `target_size`.
ValueError: Invalid interpolation method.
See Also:
RandomResizeByShort: Resize input to random sizes while keeping the aspect
ratio.
"""
def __init__(self, short_sizes, max_size=-1, interp='NEAREST'):
super(BatchRandomResizeByShort, self).__init__()
if not (interp == "RANDOM" or interp in interp_dict):
raise ValueError("interp should be one of {}".format(
interp_dict.keys()))
self.interp = interp
assert isinstance(short_sizes, list), \
"short_sizes must be a list."
self.short_sizes = short_sizes
self.max_size = max_size
def __call__(self, samples):
short_size = random.choice(self.short_sizes)
resizer = ResizeByShort(
short_size=short_size, max_size=self.max_size, interp=self.interp)
samples = resizer(samples)
return samples
class _BatchPad(Transform):
def __init__(self, pad_to_stride=0):
super(_BatchPad, self).__init__()
self.pad_to_stride = pad_to_stride
def __call__(self, samples):
coarsest_stride = self.pad_to_stride
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
max_shape[0] = int(
np.ceil(max_shape[0] / coarsest_stride) * coarsest_stride)
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
for data in samples:
im = data['image']
im_h, im_w, im_c = im.shape[:]
padding_im = np.zeros(
(max_shape[0], max_shape[1], im_c), dtype=np.float32)
padding_im[:im_h, :im_w, :] = im
data['image'] = padding_im
return samples
class _Gt2YoloTarget(Transform):
"""
Generate YOLOv3 targets by groud truth data, this operator is only used in
fine grained YOLOv3 loss mode.
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
num_classes=80,
iou_thresh=1.):
super(_Gt2YoloTarget, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.num_classes = num_classes
self.iou_thresh = iou_thresh
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[:2]
an_hw = np.array(self.anchors) / np.array([[w, h]])
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
if 'gt_score' not in sample:
sample['gt_score'] = np.ones(
(gt_bbox.shape[0], 1), dtype=np.float32)
gt_score = sample['gt_score']
for i, (
mask, downsample_ratio
) in enumerate(zip(self.anchor_masks, self.downsample_ratios)):
grid_h = int(h / downsample_ratio)
grid_w = int(w / downsample_ratio)
target = np.zeros(
(len(mask), 6 + self.num_classes, grid_h, grid_w),
dtype=np.float32)
for b in range(gt_bbox.shape[0]):
gx, gy, gw, gh = gt_bbox[b, :]
cls = gt_class[b]
score = gt_score[b]
if gw <= 0. or gh <= 0. or score <= 0.:
continue
# Find best matched anchor index
best_iou = 0.
best_idx = -1
for an_idx in range(an_hw.shape[0]):
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
if iou > best_iou:
best_iou = iou
best_idx = an_idx
gi = int(gx * grid_w)
gj = int(gy * grid_h)
# gtbox should be regressed in this layer if best matched
# anchor index is in the anchor mask of this layer.
if best_idx in mask:
best_n = mask.index(best_idx)
# x, y, w, h, scale
target[best_n, 0, gj, gi] = gx * grid_w - gi
target[best_n, 1, gj, gi] = gy * grid_h - gj
target[best_n, 2, gj, gi] = np.log(
gw * w / self.anchors[best_idx][0])
target[best_n, 3, gj, gi] = np.log(
gh * h / self.anchors[best_idx][1])
target[best_n, 4, gj, gi] = 2.0 - gw * gh
# Record gt_score
target[best_n, 5, gj, gi] = score
# Do classification
target[best_n, 6 + cls, gj, gi] = 1.
# For non-matched anchors, calculate the target if the iou
# between anchor and gt is larger than iou_thresh.
if self.iou_thresh < 1:
for idx, mask_i in enumerate(mask):
if mask_i == best_idx: continue
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
if iou > self.iou_thresh and target[idx, 5, gj,
gi] == 0.:
# x, y, w, h, scale
target[idx, 0, gj, gi] = gx * grid_w - gi
target[idx, 1, gj, gi] = gy * grid_h - gj
target[idx, 2, gj, gi] = np.log(
gw * w / self.anchors[mask_i][0])
target[idx, 3, gj, gi] = np.log(
gh * h / self.anchors[mask_i][1])
target[idx, 4, gj, gi] = 2.0 - gw * gh
# Record gt_score
target[idx, 5, gj, gi] = score
# Do classification
target[idx, 5 + cls, gj, gi] = 1.
sample['target{}'.format(i)] = target
# Remove useless gt_class and gt_score items after target has been calculated.
sample.pop('gt_class')
sample.pop('gt_score')
return samples