You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
479 lines
17 KiB
479 lines
17 KiB
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
from __future__ import absolute_import |
|
from __future__ import print_function |
|
from __future__ import division |
|
|
|
try: |
|
from collections.abc import Sequence |
|
except Exception: |
|
from collections import Sequence |
|
|
|
from numbers import Number, Integral |
|
|
|
import cv2 |
|
import numpy as np |
|
import math |
|
import copy |
|
|
|
from .operators import register_op, BaseOperator |
|
from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np |
|
from paddlers.models.ppdet.utils.logger import setup_logger |
|
logger = setup_logger(__name__) |
|
|
|
|
|
@register_op |
|
class RRotate(BaseOperator): |
|
""" Rotate Image, Polygon, Box |
|
|
|
Args: |
|
scale (float): rotate scale |
|
angle (float): rotate angle |
|
fill_value (int, tuple): fill color |
|
auto_bound (bool): whether auto bound or not |
|
""" |
|
|
|
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True): |
|
super(RRotate, self).__init__() |
|
self.scale = scale |
|
self.angle = angle |
|
self.fill_value = fill_value |
|
self.auto_bound = auto_bound |
|
|
|
def get_rotated_matrix(self, angle, scale, h, w): |
|
center = ((w - 1) * 0.5, (h - 1) * 0.5) |
|
matrix = cv2.getRotationMatrix2D(center, -angle, scale) |
|
# calculate the new size |
|
cos = np.abs(matrix[0, 0]) |
|
sin = np.abs(matrix[0, 1]) |
|
new_w = h * sin + w * cos |
|
new_h = h * cos + w * sin |
|
# calculate offset |
|
n_w = int(np.round(new_w)) |
|
n_h = int(np.round(new_h)) |
|
if self.auto_bound: |
|
ratio = min(w / n_w, h / n_h) |
|
matrix = cv2.getRotationMatrix2D(center, -angle, ratio) |
|
else: |
|
matrix[0, 2] += (new_w - w) * 0.5 |
|
matrix[1, 2] += (new_h - h) * 0.5 |
|
w = n_w |
|
h = n_h |
|
return matrix, h, w |
|
|
|
def get_rect_from_pts(self, pts, h, w): |
|
""" get minimum rectangle of points |
|
""" |
|
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct' |
|
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2], |
|
axis=1) |
|
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2], |
|
axis=1) |
|
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h) |
|
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h) |
|
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1) |
|
return boxes |
|
|
|
def apply_image(self, image, matrix, h, w): |
|
return cv2.warpAffine( |
|
image, matrix, (w, h), borderValue=self.fill_value) |
|
|
|
def apply_pts(self, pts, matrix, h, w): |
|
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct' |
|
# n is number of samples and m is two times the number of points due to (x, y) |
|
_, m = pts.shape |
|
# transpose points |
|
pts_ = pts.reshape(-1, 2).T |
|
# pad 1 to convert the points to homogeneous coordinates |
|
padding = np.ones((1, pts_.shape[1]), pts.dtype) |
|
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0)) |
|
return rotated_pts[:2, :].T.reshape(-1, m) |
|
|
|
def apply(self, sample, context=None): |
|
image = sample['image'] |
|
h, w = image.shape[:2] |
|
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w) |
|
sample['image'] = self.apply_image(image, matrix, h, w) |
|
polys = sample['gt_poly'] |
|
# TODO: segment or keypoint to be processed |
|
if len(polys) > 0: |
|
pts = self.apply_pts(polys, matrix, h, w) |
|
sample['gt_poly'] = pts |
|
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w) |
|
|
|
return sample |
|
|
|
|
|
@register_op |
|
class RandomRRotate(BaseOperator): |
|
""" Random Rotate Image |
|
Args: |
|
scale (float, tuple, list): rotate scale |
|
scale_mode (str): mode of scale, [range, value, None] |
|
angle (float, tuple, list): rotate angle |
|
angle_mode (str): mode of angle, [range, value, None] |
|
fill_value (float, tuple, list): fill value |
|
rotate_prob (float): probability of rotation |
|
auto_bound (bool): whether auto bound or not |
|
""" |
|
|
|
def __init__(self, |
|
scale=1.0, |
|
scale_mode=None, |
|
angle=0., |
|
angle_mode=None, |
|
fill_value=0., |
|
rotate_prob=1.0, |
|
auto_bound=True): |
|
super(RandomRRotate, self).__init__() |
|
self.scale = scale |
|
self.scale_mode = scale_mode |
|
self.angle = angle |
|
self.angle_mode = angle_mode |
|
self.fill_value = fill_value |
|
self.rotate_prob = rotate_prob |
|
self.auto_bound = auto_bound |
|
|
|
def get_angle(self, angle, angle_mode): |
|
assert not angle_mode or angle_mode in [ |
|
'range', 'value' |
|
], 'angle mode should be in [range, value, None]' |
|
if not angle_mode: |
|
return angle |
|
elif angle_mode == 'range': |
|
low, high = angle |
|
return np.random.rand() * (high - low) + low |
|
elif angle_mode == 'value': |
|
return np.random.choice(angle) |
|
|
|
def get_scale(self, scale, scale_mode): |
|
assert not scale_mode or scale_mode in [ |
|
'range', 'value' |
|
], 'scale mode should be in [range, value, None]' |
|
if not scale_mode: |
|
return scale |
|
elif scale_mode == 'range': |
|
low, high = scale |
|
return np.random.rand() * (high - low) + low |
|
elif scale_mode == 'value': |
|
return np.random.choice(scale) |
|
|
|
def apply(self, sample, context=None): |
|
if np.random.rand() > self.rotate_prob: |
|
return sample |
|
|
|
angle = self.get_angle(self.angle, self.angle_mode) |
|
scale = self.get_scale(self.scale, self.scale_mode) |
|
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound) |
|
return rotator(sample) |
|
|
|
|
|
@register_op |
|
class Poly2RBox(BaseOperator): |
|
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1 |
|
|
|
Args: |
|
filter_threshold (int, float): threshold to filter annotations |
|
filter_mode (str): filter mode, ['area', 'edge'] |
|
rbox_type (str): rbox type, ['le135', 'oc'] |
|
|
|
""" |
|
|
|
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'): |
|
super(Poly2RBox, self).__init__() |
|
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode) |
|
self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np |
|
|
|
def filter(self, size, threshold, mode): |
|
if mode == 'area': |
|
if size[0] * size[1] < threshold: |
|
return True |
|
elif mode == 'edge': |
|
if min(size) < threshold: |
|
return True |
|
return False |
|
|
|
def get_rbox(self, polys): |
|
valid_ids, rboxes, bboxes = [], [], [] |
|
for i, poly in enumerate(polys): |
|
cx, cy, w, h, angle = self.rbox_fn(poly) |
|
if self.filter_fn((w, h)): |
|
continue |
|
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32)) |
|
valid_ids.append(i) |
|
xmin, ymin = min(poly[0::2]), min(poly[1::2]) |
|
xmax, ymax = max(poly[0::2]), max(poly[1::2]) |
|
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32)) |
|
|
|
if len(valid_ids) == 0: |
|
rboxes = np.zeros((0, 5), dtype=np.float32) |
|
bboxes = np.zeros((0, 4), dtype=np.float32) |
|
else: |
|
rboxes = np.stack(rboxes) |
|
bboxes = np.stack(bboxes) |
|
|
|
return rboxes, bboxes, valid_ids |
|
|
|
def apply(self, sample, context=None): |
|
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly']) |
|
sample['gt_rbox'] = rboxes |
|
sample['gt_bbox'] = bboxes |
|
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']: |
|
if k in sample: |
|
sample[k] = sample[k][valid_ids] |
|
|
|
return sample |
|
|
|
|
|
@register_op |
|
class Poly2Array(BaseOperator): |
|
""" convert gt_poly to np.array for rotated bboxes |
|
""" |
|
|
|
def __init__(self): |
|
super(Poly2Array, self).__init__() |
|
|
|
def apply(self, sample, context=None): |
|
if 'gt_poly' in sample: |
|
sample['gt_poly'] = np.array( |
|
sample['gt_poly'], dtype=np.float32).reshape((-1, 8)) |
|
|
|
return sample |
|
|
|
|
|
@register_op |
|
class RResize(BaseOperator): |
|
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR): |
|
""" |
|
Resize image to target size. if keep_ratio is True, |
|
resize the image's long side to the maximum of target_size |
|
if keep_ratio is False, resize the image to target size(h, w) |
|
Args: |
|
target_size (int|list): image target size |
|
keep_ratio (bool): whether keep_ratio or not, default true |
|
interp (int): the interpolation method |
|
""" |
|
super(RResize, self).__init__() |
|
self.keep_ratio = keep_ratio |
|
self.interp = interp |
|
if not isinstance(target_size, (Integral, Sequence)): |
|
raise TypeError( |
|
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}". |
|
format(type(target_size))) |
|
if isinstance(target_size, Integral): |
|
target_size = [target_size, target_size] |
|
self.target_size = target_size |
|
|
|
def apply_image(self, image, scale): |
|
im_scale_x, im_scale_y = scale |
|
|
|
return cv2.resize( |
|
image, |
|
None, |
|
None, |
|
fx=im_scale_x, |
|
fy=im_scale_y, |
|
interpolation=self.interp) |
|
|
|
def apply_pts(self, pts, scale, size): |
|
im_scale_x, im_scale_y = scale |
|
resize_w, resize_h = size |
|
pts[:, 0::2] *= im_scale_x |
|
pts[:, 1::2] *= im_scale_y |
|
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w) |
|
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h) |
|
return pts |
|
|
|
def apply(self, sample, context=None): |
|
""" Resize the image numpy. |
|
""" |
|
im = sample['image'] |
|
if not isinstance(im, np.ndarray): |
|
raise TypeError("{}: image type is not numpy.".format(self)) |
|
if len(im.shape) != 3: |
|
raise ImageError('{}: image is not 3-dimensional.'.format(self)) |
|
|
|
# apply image |
|
im_shape = im.shape |
|
if self.keep_ratio: |
|
|
|
im_size_min = np.min(im_shape[0:2]) |
|
im_size_max = np.max(im_shape[0:2]) |
|
|
|
target_size_min = np.min(self.target_size) |
|
target_size_max = np.max(self.target_size) |
|
|
|
im_scale = min(target_size_min / im_size_min, |
|
target_size_max / im_size_max) |
|
|
|
resize_h = im_scale * float(im_shape[0]) |
|
resize_w = im_scale * float(im_shape[1]) |
|
|
|
im_scale_x = im_scale |
|
im_scale_y = im_scale |
|
else: |
|
resize_h, resize_w = self.target_size |
|
im_scale_y = resize_h / im_shape[0] |
|
im_scale_x = resize_w / im_shape[1] |
|
|
|
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y]) |
|
sample['image'] = im.astype(np.float32) |
|
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32) |
|
if 'scale_factor' in sample: |
|
scale_factor = sample['scale_factor'] |
|
sample['scale_factor'] = np.asarray( |
|
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x], |
|
dtype=np.float32) |
|
else: |
|
sample['scale_factor'] = np.asarray( |
|
[im_scale_y, im_scale_x], dtype=np.float32) |
|
|
|
# apply bbox |
|
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: |
|
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], |
|
[im_scale_x, im_scale_y], |
|
[resize_w, resize_h]) |
|
|
|
# apply polygon |
|
if 'gt_poly' in sample and len(sample['gt_poly']) > 0: |
|
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], |
|
[im_scale_x, im_scale_y], |
|
[resize_w, resize_h]) |
|
|
|
return sample |
|
|
|
|
|
@register_op |
|
class RandomRFlip(BaseOperator): |
|
def __init__(self, prob=0.5): |
|
""" |
|
Args: |
|
prob (float): the probability of flipping image |
|
""" |
|
super(RandomRFlip, self).__init__() |
|
self.prob = prob |
|
if not (isinstance(self.prob, float)): |
|
raise TypeError("{}: input type is invalid.".format(self)) |
|
|
|
def apply_image(self, image): |
|
return image[:, ::-1, :] |
|
|
|
def apply_pts(self, pts, width): |
|
oldx = pts[:, 0::2].copy() |
|
pts[:, 0::2] = width - oldx - 1 |
|
return pts |
|
|
|
def apply(self, sample, context=None): |
|
"""Filp the image and bounding box. |
|
Operators: |
|
1. Flip the image numpy. |
|
2. Transform the bboxes' x coordinates. |
|
(Must judge whether the coordinates are normalized!) |
|
3. Transform the segmentations' x coordinates. |
|
(Must judge whether the coordinates are normalized!) |
|
Output: |
|
sample: the image, bounding box and segmentation part |
|
in sample are flipped. |
|
""" |
|
if np.random.uniform(0, 1) < self.prob: |
|
im = sample['image'] |
|
height, width = im.shape[:2] |
|
im = self.apply_image(im) |
|
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: |
|
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width) |
|
if 'gt_poly' in sample and len(sample['gt_poly']) > 0: |
|
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width) |
|
|
|
sample['flipped'] = True |
|
sample['image'] = im |
|
return sample |
|
|
|
|
|
@register_op |
|
class VisibleRBox(BaseOperator): |
|
""" |
|
In debug mode, visualize images according to `gt_box`. |
|
(Currently only supported when not cropping and flipping image.) |
|
""" |
|
|
|
def __init__(self, output_dir='debug'): |
|
super(VisibleRBox, self).__init__() |
|
self.output_dir = output_dir |
|
if not os.path.isdir(output_dir): |
|
os.makedirs(output_dir) |
|
|
|
def apply(self, sample, context=None): |
|
image = Image.fromarray(sample['image'].astype(np.uint8)) |
|
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0]) |
|
width = sample['w'] |
|
height = sample['h'] |
|
# gt_poly = sample['gt_rbox'] |
|
gt_poly = sample['gt_poly'] |
|
gt_class = sample['gt_class'] |
|
draw = ImageDraw.Draw(image) |
|
for i in range(gt_poly.shape[0]): |
|
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i] |
|
draw.line( |
|
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)], |
|
width=2, |
|
fill='green') |
|
# draw label |
|
xmin = min(x1, x2, x3, x4) |
|
ymin = min(y1, y2, y3, y4) |
|
text = str(gt_class[i][0]) |
|
tw, th = draw.textsize(text) |
|
draw.rectangle( |
|
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green') |
|
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) |
|
|
|
if 'gt_keypoint' in sample.keys(): |
|
gt_keypoint = sample['gt_keypoint'] |
|
if self.is_normalized: |
|
for i in range(gt_keypoint.shape[1]): |
|
if i % 2: |
|
gt_keypoint[:, i] = gt_keypoint[:, i] * height |
|
else: |
|
gt_keypoint[:, i] = gt_keypoint[:, i] * width |
|
for i in range(gt_keypoint.shape[0]): |
|
keypoint = gt_keypoint[i] |
|
for j in range(int(keypoint.shape[0] / 2)): |
|
x1 = round(keypoint[2 * j]).astype(np.int32) |
|
y1 = round(keypoint[2 * j + 1]).astype(np.int32) |
|
draw.ellipse( |
|
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green') |
|
save_path = os.path.join(self.output_dir, out_file_name) |
|
image.save(save_path, quality=95) |
|
return sample |
|
|
|
|
|
@register_op |
|
class Rbox2Poly(BaseOperator): |
|
""" |
|
Convert rbbox format to poly format. |
|
""" |
|
|
|
def __init__(self): |
|
super(Rbox2Poly, self).__init__() |
|
|
|
def apply(self, sample, context=None): |
|
assert 'gt_rbox' in sample |
|
assert sample['gt_rbox'].shape[1] == 5 |
|
rboxes = sample['gt_rbox'] |
|
polys = rbox2poly_np(rboxes) |
|
sample['gt_poly'] = polys |
|
xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1) |
|
xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1) |
|
sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1) |
|
return sample
|
|
|