You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1271 lines
45 KiB
1271 lines
45 KiB
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
import random |
|
import math |
|
|
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
|
|
from paddlers.models.ppseg.cvlibs import manager |
|
from paddlers.models.ppseg.transforms import functional |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class Compose: |
|
""" |
|
Do transformation on input data with corresponding pre-processing and augmentation operations. |
|
The shape of input data to all operations is [height, width, channels]. |
|
|
|
Args: |
|
transforms (list): A list contains data pre-processing or augmentation. Empty list means only reading images, no transformation. |
|
to_rgb (bool, optional): If converting image to RGB color space. Default: True. |
|
|
|
Raises: |
|
TypeError: When 'transforms' is not a list. |
|
ValueError: when the length of 'transforms' is less than 1. |
|
""" |
|
|
|
def __init__(self, transforms, to_rgb=True): |
|
if not isinstance(transforms, list): |
|
raise TypeError('The transforms must be a list!') |
|
self.transforms = transforms |
|
self.to_rgb = to_rgb |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (str|np.ndarray): It is either image path or image object. |
|
label (str|np.ndarray): It is either label path or label ndarray. |
|
|
|
Returns: |
|
(tuple). A tuple including image, image info, and label after transformation. |
|
""" |
|
if isinstance(im, str): |
|
im = cv2.imread(im).astype('float32') |
|
if isinstance(label, str): |
|
label = np.asarray(Image.open(label)) |
|
if im is None: |
|
raise ValueError('Can\'t read The image file {}!'.format(im)) |
|
if self.to_rgb: |
|
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) |
|
|
|
for op in self.transforms: |
|
outputs = op(im, label) |
|
im = outputs[0] |
|
if len(outputs) == 2: |
|
label = outputs[1] |
|
im = np.transpose(im, (2, 0, 1)) |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomHorizontalFlip: |
|
""" |
|
Flip an image horizontally with a certain probability. |
|
|
|
Args: |
|
prob (float, optional): A probability of horizontally flipping. Default: 0.5. |
|
""" |
|
|
|
def __init__(self, prob=0.5): |
|
self.prob = prob |
|
|
|
def __call__(self, im, label=None): |
|
if random.random() < self.prob: |
|
im = functional.horizontal_flip(im) |
|
if label is not None: |
|
label = functional.horizontal_flip(label) |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomVerticalFlip: |
|
""" |
|
Flip an image vertically with a certain probability. |
|
|
|
Args: |
|
prob (float, optional): A probability of vertical flipping. Default: 0.1. |
|
""" |
|
|
|
def __init__(self, prob=0.1): |
|
self.prob = prob |
|
|
|
def __call__(self, im, label=None): |
|
if random.random() < self.prob: |
|
im = functional.vertical_flip(im) |
|
if label is not None: |
|
label = functional.vertical_flip(label) |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class Resize: |
|
""" |
|
Resize an image. |
|
|
|
Args: |
|
target_size (list|tuple, optional): The target size of image. Default: (512, 512). |
|
interp (str, optional): The interpolation mode of resize is consistent with opencv. |
|
['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']. Note that when it is |
|
'RANDOM', a random interpolation mode would be specified. Default: "LINEAR". |
|
|
|
Raises: |
|
TypeError: When 'target_size' type is neither list nor tuple. |
|
ValueError: When "interp" is out of pre-defined methods ('NEAREST', 'LINEAR', 'CUBIC', |
|
'AREA', 'LANCZOS4', 'RANDOM'). |
|
""" |
|
|
|
# The interpolation mode |
|
interp_dict = { |
|
'NEAREST': cv2.INTER_NEAREST, |
|
'LINEAR': cv2.INTER_LINEAR, |
|
'CUBIC': cv2.INTER_CUBIC, |
|
'AREA': cv2.INTER_AREA, |
|
'LANCZOS4': cv2.INTER_LANCZOS4 |
|
} |
|
|
|
def __init__(self, target_size=(512, 512), interp='LINEAR'): |
|
self.interp = interp |
|
if not (interp == "RANDOM" or interp in self.interp_dict): |
|
raise ValueError("`interp` should be one of {}".format( |
|
self.interp_dict.keys())) |
|
if isinstance(target_size, list) or isinstance(target_size, tuple): |
|
if len(target_size) != 2: |
|
raise ValueError( |
|
'`target_size` should include 2 elements, but it is {}'. |
|
format(target_size)) |
|
else: |
|
raise TypeError( |
|
"Type of `target_size` is invalid. It should be list or tuple, but it is {}" |
|
.format(type(target_size))) |
|
|
|
self.target_size = target_size |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label), |
|
|
|
Raises: |
|
TypeError: When the 'img' type is not numpy. |
|
ValueError: When the length of "im" shape is not 3. |
|
""" |
|
|
|
if not isinstance(im, np.ndarray): |
|
raise TypeError("Resize: image type is not numpy.") |
|
if len(im.shape) != 3: |
|
raise ValueError('Resize: image is not 3-dimensional.') |
|
if self.interp == "RANDOM": |
|
interp = random.choice(list(self.interp_dict.keys())) |
|
else: |
|
interp = self.interp |
|
im = functional.resize(im, self.target_size, self.interp_dict[interp]) |
|
if label is not None: |
|
label = functional.resize(label, self.target_size, |
|
cv2.INTER_NEAREST) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class ResizeByLong: |
|
""" |
|
Resize the long side of an image to given size, and then scale the other side proportionally. |
|
|
|
Args: |
|
long_size (int): The target size of long side. |
|
""" |
|
|
|
def __init__(self, long_size): |
|
self.long_size = long_size |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
im = functional.resize_long(im, self.long_size) |
|
if label is not None: |
|
label = functional.resize_long(label, self.long_size, |
|
cv2.INTER_NEAREST) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class ResizeByShort: |
|
""" |
|
Resize the short side of an image to given size, and then scale the other side proportionally. |
|
|
|
Args: |
|
short_size (int): The target size of short side. |
|
""" |
|
|
|
def __init__(self, short_size): |
|
self.short_size = short_size |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
im = functional.resize_short(im, self.short_size) |
|
if label is not None: |
|
label = functional.resize_short(label, self.short_size, |
|
cv2.INTER_NEAREST) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class LimitLong: |
|
""" |
|
Limit the long edge of image. |
|
|
|
If the long edge is larger than max_long, resize the long edge |
|
to max_long, while scale the short edge proportionally. |
|
|
|
If the long edge is smaller than min_long, resize the long edge |
|
to min_long, while scale the short edge proportionally. |
|
|
|
Args: |
|
max_long (int, optional): If the long edge of image is larger than max_long, |
|
it will be resize to max_long. Default: None. |
|
min_long (int, optional): If the long edge of image is smaller than min_long, |
|
it will be resize to min_long. Default: None. |
|
""" |
|
|
|
def __init__(self, max_long=None, min_long=None): |
|
if max_long is not None: |
|
if not isinstance(max_long, int): |
|
raise TypeError( |
|
"Type of `max_long` is invalid. It should be int, but it is {}" |
|
.format(type(max_long))) |
|
if min_long is not None: |
|
if not isinstance(min_long, int): |
|
raise TypeError( |
|
"Type of `min_long` is invalid. It should be int, but it is {}" |
|
.format(type(min_long))) |
|
if (max_long is not None) and (min_long is not None): |
|
if min_long > max_long: |
|
raise ValueError( |
|
'`max_long should not smaller than min_long, but they are {} and {}' |
|
.format(max_long, min_long)) |
|
self.max_long = max_long |
|
self.min_long = min_long |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
h, w = im.shape[0], im.shape[1] |
|
long_edge = max(h, w) |
|
target = long_edge |
|
if (self.max_long is not None) and (long_edge > self.max_long): |
|
target = self.max_long |
|
elif (self.min_long is not None) and (long_edge < self.min_long): |
|
target = self.min_long |
|
|
|
if target != long_edge: |
|
im = functional.resize_long(im, target) |
|
if label is not None: |
|
label = functional.resize_long(label, target, cv2.INTER_NEAREST) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class ResizeRangeScaling: |
|
""" |
|
Resize the long side of an image into a range, and then scale the other side proportionally. |
|
|
|
Args: |
|
min_value (int, optional): The minimum value of long side after resize. Default: 400. |
|
max_value (int, optional): The maximum value of long side after resize. Default: 600. |
|
""" |
|
|
|
def __init__(self, min_value=400, max_value=600): |
|
if min_value > max_value: |
|
raise ValueError('min_value must be less than max_value, ' |
|
'but they are {} and {}.'.format( |
|
min_value, max_value)) |
|
self.min_value = min_value |
|
self.max_value = max_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
if self.min_value == self.max_value: |
|
random_size = self.max_value |
|
else: |
|
random_size = int( |
|
np.random.uniform(self.min_value, self.max_value) + 0.5) |
|
im = functional.resize_long(im, random_size, cv2.INTER_LINEAR) |
|
if label is not None: |
|
label = functional.resize_long(label, random_size, |
|
cv2.INTER_NEAREST) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class ResizeStepScaling: |
|
""" |
|
Scale an image proportionally within a range. |
|
|
|
Args: |
|
min_scale_factor (float, optional): The minimum scale. Default: 0.75. |
|
max_scale_factor (float, optional): The maximum scale. Default: 1.25. |
|
scale_step_size (float, optional): The scale interval. Default: 0.25. |
|
|
|
Raises: |
|
ValueError: When min_scale_factor is smaller than max_scale_factor. |
|
""" |
|
|
|
def __init__(self, |
|
min_scale_factor=0.75, |
|
max_scale_factor=1.25, |
|
scale_step_size=0.25): |
|
if min_scale_factor > max_scale_factor: |
|
raise ValueError( |
|
'min_scale_factor must be less than max_scale_factor, ' |
|
'but they are {} and {}.'.format(min_scale_factor, |
|
max_scale_factor)) |
|
self.min_scale_factor = min_scale_factor |
|
self.max_scale_factor = max_scale_factor |
|
self.scale_step_size = scale_step_size |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
if self.min_scale_factor == self.max_scale_factor: |
|
scale_factor = self.min_scale_factor |
|
|
|
elif self.scale_step_size == 0: |
|
scale_factor = np.random.uniform(self.min_scale_factor, |
|
self.max_scale_factor) |
|
|
|
else: |
|
num_steps = int((self.max_scale_factor - self.min_scale_factor) / |
|
self.scale_step_size + 1) |
|
scale_factors = np.linspace(self.min_scale_factor, |
|
self.max_scale_factor, |
|
num_steps).tolist() |
|
np.random.shuffle(scale_factors) |
|
scale_factor = scale_factors[0] |
|
w = int(round(scale_factor * im.shape[1])) |
|
h = int(round(scale_factor * im.shape[0])) |
|
|
|
im = functional.resize(im, (w, h), cv2.INTER_LINEAR) |
|
if label is not None: |
|
label = functional.resize(label, (w, h), cv2.INTER_NEAREST) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class Normalize: |
|
""" |
|
Normalize an image. |
|
|
|
Args: |
|
mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5]. |
|
std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5]. |
|
|
|
Raises: |
|
ValueError: When mean/std is not list or any value in std is 0. |
|
""" |
|
|
|
def __init__(self, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)): |
|
self.mean = mean |
|
self.std = std |
|
if not (isinstance(self.mean, (list, tuple)) |
|
and isinstance(self.std, (list, tuple))): |
|
raise ValueError( |
|
"{}: input type is invalid. It should be list or tuple".format( |
|
self)) |
|
from functools import reduce |
|
if reduce(lambda x, y: x * y, self.std) == 0: |
|
raise ValueError('{}: std is invalid!'.format(self)) |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
mean = np.array(self.mean)[np.newaxis, np.newaxis, :] |
|
std = np.array(self.std)[np.newaxis, np.newaxis, :] |
|
im = functional.normalize(im, mean, std) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class Padding: |
|
""" |
|
Add bottom-right padding to a raw image or annotation image. |
|
|
|
Args: |
|
target_size (list|tuple): The target size after padding. |
|
im_padding_value (list, optional): The padding value of raw image. |
|
Default: [127.5, 127.5, 127.5]. |
|
label_padding_value (int, optional): The padding value of annotation image. Default: 255. |
|
|
|
Raises: |
|
TypeError: When target_size is neither list nor tuple. |
|
ValueError: When the length of target_size is not 2. |
|
""" |
|
|
|
def __init__(self, |
|
target_size, |
|
im_padding_value=(127.5, 127.5, 127.5), |
|
label_padding_value=255): |
|
if isinstance(target_size, list) or isinstance(target_size, tuple): |
|
if len(target_size) != 2: |
|
raise ValueError( |
|
'`target_size` should include 2 elements, but it is {}'. |
|
format(target_size)) |
|
else: |
|
raise TypeError( |
|
"Type of target_size is invalid. It should be list or tuple, now is {}" |
|
.format(type(target_size))) |
|
self.target_size = target_size |
|
self.im_padding_value = im_padding_value |
|
self.label_padding_value = label_padding_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
im_height, im_width = im.shape[0], im.shape[1] |
|
if isinstance(self.target_size, int): |
|
target_height = self.target_size |
|
target_width = self.target_size |
|
else: |
|
target_height = self.target_size[1] |
|
target_width = self.target_size[0] |
|
pad_height = target_height - im_height |
|
pad_width = target_width - im_width |
|
if pad_height < 0 or pad_width < 0: |
|
raise ValueError( |
|
'The size of image should be less than `target_size`, but the size of image ({}, {}) is larger than `target_size` ({}, {})' |
|
.format(im_width, im_height, target_width, target_height)) |
|
else: |
|
im = cv2.copyMakeBorder( |
|
im, |
|
0, |
|
pad_height, |
|
0, |
|
pad_width, |
|
cv2.BORDER_CONSTANT, |
|
value=self.im_padding_value) |
|
if label is not None: |
|
label = cv2.copyMakeBorder( |
|
label, |
|
0, |
|
pad_height, |
|
0, |
|
pad_width, |
|
cv2.BORDER_CONSTANT, |
|
value=self.label_padding_value) |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class PaddingByAspectRatio: |
|
""" |
|
|
|
Args: |
|
aspect_ratio (int|float, optional): The aspect ratio = width / height. Default: 1. |
|
""" |
|
|
|
def __init__(self, |
|
aspect_ratio=1, |
|
im_padding_value=(127.5, 127.5, 127.5), |
|
label_padding_value=255): |
|
self.aspect_ratio = aspect_ratio |
|
self.im_padding_value = im_padding_value |
|
self.label_padding_value = label_padding_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
img_height = im.shape[0] |
|
img_width = im.shape[1] |
|
ratio = img_width / img_height |
|
if ratio == self.aspect_ratio: |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
elif ratio > self.aspect_ratio: |
|
img_height = int(img_width / self.aspect_ratio) |
|
else: |
|
img_width = int(img_height * self.aspect_ratio) |
|
padding = Padding((img_width, img_height), |
|
im_padding_value=self.im_padding_value, |
|
label_padding_value=self.label_padding_value) |
|
return padding(im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomPaddingCrop: |
|
""" |
|
Crop a sub-image from a raw image and annotation image randomly. If the target cropping size |
|
is larger than original image, then the bottom-right padding will be added. |
|
|
|
Args: |
|
crop_size (tuple, optional): The target cropping size. Default: (512, 512). |
|
im_padding_value (list, optional): The padding value of raw image. |
|
Default: [127.5, 127.5, 127.5]. |
|
label_padding_value (int, optional): The padding value of annotation image. Default: 255. |
|
|
|
Raises: |
|
TypeError: When crop_size is neither list nor tuple. |
|
ValueError: When the length of crop_size is not 2. |
|
""" |
|
|
|
def __init__(self, |
|
crop_size=(512, 512), |
|
im_padding_value=(127.5, 127.5, 127.5), |
|
label_padding_value=255): |
|
if isinstance(crop_size, list) or isinstance(crop_size, tuple): |
|
if len(crop_size) != 2: |
|
raise ValueError( |
|
'Type of `crop_size` is list or tuple. It should include 2 elements, but it is {}' |
|
.format(crop_size)) |
|
else: |
|
raise TypeError( |
|
"The type of `crop_size` is invalid. It should be list or tuple, but it is {}" |
|
.format(type(crop_size))) |
|
self.crop_size = crop_size |
|
self.im_padding_value = im_padding_value |
|
self.label_padding_value = label_padding_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
if isinstance(self.crop_size, int): |
|
crop_width = self.crop_size |
|
crop_height = self.crop_size |
|
else: |
|
crop_width = self.crop_size[0] |
|
crop_height = self.crop_size[1] |
|
|
|
img_height = im.shape[0] |
|
img_width = im.shape[1] |
|
|
|
if img_height == crop_height and img_width == crop_width: |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
else: |
|
pad_height = max(crop_height - img_height, 0) |
|
pad_width = max(crop_width - img_width, 0) |
|
if (pad_height > 0 or pad_width > 0): |
|
im = cv2.copyMakeBorder( |
|
im, |
|
0, |
|
pad_height, |
|
0, |
|
pad_width, |
|
cv2.BORDER_CONSTANT, |
|
value=self.im_padding_value) |
|
if label is not None: |
|
label = cv2.copyMakeBorder( |
|
label, |
|
0, |
|
pad_height, |
|
0, |
|
pad_width, |
|
cv2.BORDER_CONSTANT, |
|
value=self.label_padding_value) |
|
img_height = im.shape[0] |
|
img_width = im.shape[1] |
|
|
|
if crop_height > 0 and crop_width > 0: |
|
h_off = np.random.randint(img_height - crop_height + 1) |
|
w_off = np.random.randint(img_width - crop_width + 1) |
|
|
|
im = im[h_off:(crop_height + h_off), w_off:( |
|
w_off + crop_width), :] |
|
if label is not None: |
|
label = label[h_off:(crop_height + h_off), w_off:( |
|
w_off + crop_width)] |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomCenterCrop: |
|
""" |
|
Crops the given the input data at the center. |
|
Args: |
|
retain_ratio (tuple or list, optional): The length of the input list or tuple must be 2. Default: (0.5, 0.5). |
|
the first value is used for width and the second is for height. |
|
In addition, the minimum size of the cropped image is [width * retain_ratio[0], height * retain_ratio[1]]. |
|
Raises: |
|
TypeError: When retain_ratio is neither list nor tuple. Default: None. |
|
ValueError: When the value of retain_ratio is not in [0-1]. |
|
""" |
|
|
|
def __init__(self, |
|
retain_ratio=(0.5, 0.5)): |
|
if isinstance(retain_ratio, list) or isinstance(retain_ratio, tuple): |
|
if len(retain_ratio) != 2: |
|
raise ValueError( |
|
'When type of `retain_ratio` is list or tuple, it shoule include 2 elements, but it is {}'.format( |
|
retain_ratio) |
|
) |
|
if retain_ratio[0] > 1 or retain_ratio[1] > 1 or retain_ratio[0] < 0 or retain_ratio[1] < 0: |
|
raise ValueError( |
|
'Value of `retain_ratio` should be in [0, 1], but it is {}'.format(retain_ratio) |
|
) |
|
else: |
|
raise TypeError( |
|
"The type of `retain_ratio` is invalid. It should be list or tuple, but it is {}" |
|
.format(type(retain_ratio))) |
|
self.retain_ratio = retain_ratio |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
retain_width = self.retain_ratio[0] |
|
retain_height = self.retain_ratio[1] |
|
|
|
img_height = im.shape[0] |
|
img_width = im.shape[1] |
|
|
|
if retain_width == 1. and retain_height == 1.: |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
else: |
|
randw = np.random.randint(img_width * (1 - retain_width)) |
|
randh = np.random.randint(img_height * (1 - retain_height)) |
|
offsetw = 0 if randw == 0 else np.random.randint(randw) |
|
offseth = 0 if randh == 0 else np.random.randint(randh) |
|
p0, p1, p2, p3 = offseth, img_height + offseth - randh, offsetw, img_width + offsetw - randw |
|
im = im[p0:p1, p2:p3, :] |
|
if label is not None: |
|
label = label[p0:p1, p2:p3, :] |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class ScalePadding: |
|
""" |
|
Add center padding to a raw image or annotation image,then scale the |
|
image to target size. |
|
|
|
Args: |
|
target_size (list|tuple, optional): The target size of image. Default: (512, 512). |
|
im_padding_value (list, optional): The padding value of raw image. |
|
Default: [127.5, 127.5, 127.5]. |
|
label_padding_value (int, optional): The padding value of annotation image. Default: 255. |
|
|
|
Raises: |
|
TypeError: When target_size is neither list nor tuple. |
|
ValueError: When the length of target_size is not 2. |
|
""" |
|
|
|
def __init__(self, |
|
target_size=(512, 512), |
|
im_padding_value=(127.5, 127.5, 127.5), |
|
label_padding_value=255): |
|
if isinstance(target_size, list) or isinstance(target_size, tuple): |
|
if len(target_size) != 2: |
|
raise ValueError( |
|
'`target_size` should include 2 elements, but it is {}'. |
|
format(target_size)) |
|
else: |
|
raise TypeError( |
|
"Type of `target_size` is invalid. It should be list or tuple, but it is {}" |
|
.format(type(target_size))) |
|
|
|
self.target_size = target_size |
|
self.im_padding_value = im_padding_value |
|
self.label_padding_value = label_padding_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
height = im.shape[0] |
|
width = im.shape[1] |
|
|
|
new_im = np.zeros( |
|
(max(height, width), max(height, width), 3)) + self.im_padding_value |
|
if label is not None: |
|
new_label = np.zeros((max(height, width), max( |
|
height, width))) + self.label_padding_value |
|
|
|
if height > width: |
|
padding = int((height - width) / 2) |
|
new_im[:, padding:padding + width, :] = im |
|
if label is not None: |
|
new_label[:, padding:padding + width] = label |
|
else: |
|
padding = int((width - height) / 2) |
|
new_im[padding:padding + height, :, :] = im |
|
if label is not None: |
|
new_label[padding:padding + height, :] = label |
|
|
|
im = np.uint8(new_im) |
|
im = functional.resize(im, self.target_size, interp=cv2.INTER_CUBIC) |
|
if label is not None: |
|
label = np.uint8(new_label) |
|
label = functional.resize( |
|
label, self.target_size, interp=cv2.INTER_CUBIC) |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomNoise: |
|
""" |
|
Superimposing noise on an image with a certain probability. |
|
|
|
Args: |
|
prob (float, optional): A probability of blurring an image. Default: 0.5. |
|
max_sigma(float, optional): The maximum value of standard deviation of the distribution. |
|
Default: 10.0. |
|
""" |
|
|
|
def __init__(self, prob=0.5, max_sigma=10.0): |
|
self.prob = prob |
|
self.max_sigma = max_sigma |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
if random.random() < self.prob: |
|
mu = 0 |
|
sigma = random.random() * self.max_sigma |
|
im = np.array(im, dtype=np.float32) |
|
im += np.random.normal(mu, sigma, im.shape) |
|
im[im > 255] = 255 |
|
im[im < 0] = 0 |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomBlur: |
|
""" |
|
Blurring an image by a Gaussian function with a certain probability. |
|
|
|
Args: |
|
prob (float, optional): A probability of blurring an image. Default: 0.1. |
|
blur_type(str, optional): A type of blurring an image, |
|
gaussian stands for cv2.GaussianBlur, |
|
median stands for cv2.medianBlur, |
|
blur stands for cv2.blur, |
|
random represents randomly selected from above. |
|
Default: gaussian. |
|
""" |
|
|
|
def __init__(self, prob=0.1, blur_type="gaussian"): |
|
self.prob = prob |
|
self.blur_type = blur_type |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
if self.prob <= 0: |
|
n = 0 |
|
elif self.prob >= 1: |
|
n = 1 |
|
else: |
|
n = int(1.0 / self.prob) |
|
if n > 0: |
|
if np.random.randint(0, n) == 0: |
|
radius = np.random.randint(3, 10) |
|
if radius % 2 != 1: |
|
radius = radius + 1 |
|
if radius > 9: |
|
radius = 9 |
|
im = np.array(im, dtype='uint8') |
|
if self.blur_type == "gaussian": |
|
im = cv2.GaussianBlur(im, (radius, radius), 0, 0) |
|
elif self.blur_type == "median": |
|
im = cv2.medianBlur(im, radius) |
|
elif self.blur_type == "blur": |
|
im = cv2.blur(im, (radius, radius)) |
|
elif self.blur_type == "random": |
|
select = random.random() |
|
if select < 0.3: |
|
im = cv2.GaussianBlur(im, (radius, radius), 0) |
|
elif select < 0.6: |
|
im = cv2.medianBlur(im, radius) |
|
else: |
|
im = cv2.blur(im, (radius, radius)) |
|
else: |
|
im = cv2.GaussianBlur(im, (radius, radius), 0, 0) |
|
im = np.array(im, dtype='float32') |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomRotation: |
|
""" |
|
Rotate an image randomly with padding. |
|
|
|
Args: |
|
max_rotation (float, optional): The maximum rotation degree. Default: 15. |
|
im_padding_value (list, optional): The padding value of raw image. |
|
Default: [127.5, 127.5, 127.5]. |
|
label_padding_value (int, optional): The padding value of annotation image. Default: 255. |
|
""" |
|
|
|
def __init__(self, |
|
max_rotation=15, |
|
im_padding_value=(127.5, 127.5, 127.5), |
|
label_padding_value=255): |
|
self.max_rotation = max_rotation |
|
self.im_padding_value = im_padding_value |
|
self.label_padding_value = label_padding_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
if self.max_rotation > 0: |
|
(h, w) = im.shape[:2] |
|
do_rotation = np.random.uniform(-self.max_rotation, |
|
self.max_rotation) |
|
pc = (w // 2, h // 2) |
|
r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0) |
|
cos = np.abs(r[0, 0]) |
|
sin = np.abs(r[0, 1]) |
|
|
|
nw = int((h * sin) + (w * cos)) |
|
nh = int((h * cos) + (w * sin)) |
|
|
|
(cx, cy) = pc |
|
r[0, 2] += (nw / 2) - cx |
|
r[1, 2] += (nh / 2) - cy |
|
dsize = (nw, nh) |
|
im = cv2.warpAffine( |
|
im, |
|
r, |
|
dsize=dsize, |
|
flags=cv2.INTER_LINEAR, |
|
borderMode=cv2.BORDER_CONSTANT, |
|
borderValue=self.im_padding_value) |
|
if label is not None: |
|
label = cv2.warpAffine( |
|
label, |
|
r, |
|
dsize=dsize, |
|
flags=cv2.INTER_NEAREST, |
|
borderMode=cv2.BORDER_CONSTANT, |
|
borderValue=self.label_padding_value) |
|
|
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomScaleAspect: |
|
""" |
|
Crop a sub-image from an original image with a range of area ratio and aspect and |
|
then scale the sub-image back to the size of the original image. |
|
|
|
Args: |
|
min_scale (float, optional): The minimum area ratio of cropped image to the original image. Default: 0.5. |
|
aspect_ratio (float, optional): The minimum aspect ratio. Default: 0.33. |
|
""" |
|
|
|
def __init__(self, min_scale=0.5, aspect_ratio=0.33): |
|
self.min_scale = min_scale |
|
self.aspect_ratio = aspect_ratio |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
if self.min_scale != 0 and self.aspect_ratio != 0: |
|
img_height = im.shape[0] |
|
img_width = im.shape[1] |
|
for i in range(0, 10): |
|
area = img_height * img_width |
|
target_area = area * np.random.uniform(self.min_scale, 1.0) |
|
aspectRatio = np.random.uniform(self.aspect_ratio, |
|
1.0 / self.aspect_ratio) |
|
|
|
dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) |
|
dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) |
|
if (np.random.randint(10) < 5): |
|
tmp = dw |
|
dw = dh |
|
dh = tmp |
|
|
|
if (dh < img_height and dw < img_width): |
|
h1 = np.random.randint(0, img_height - dh) |
|
w1 = np.random.randint(0, img_width - dw) |
|
|
|
im = im[h1:(h1 + dh), w1:(w1 + dw), :] |
|
im = cv2.resize( |
|
im, (img_width, img_height), |
|
interpolation=cv2.INTER_LINEAR) |
|
if label is not None: |
|
label = label[h1:(h1 + dh), w1:(w1 + dw)] |
|
label = cv2.resize( |
|
label, (img_width, img_height), |
|
interpolation=cv2.INTER_NEAREST) |
|
break |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomDistort: |
|
""" |
|
Distort an image with random configurations. |
|
|
|
Args: |
|
brightness_range (float, optional): A range of brightness. Default: 0.5. |
|
brightness_prob (float, optional): A probability of adjusting brightness. Default: 0.5. |
|
contrast_range (float, optional): A range of contrast. Default: 0.5. |
|
contrast_prob (float, optional): A probability of adjusting contrast. Default: 0.5. |
|
saturation_range (float, optional): A range of saturation. Default: 0.5. |
|
saturation_prob (float, optional): A probability of adjusting saturation. Default: 0.5. |
|
hue_range (int, optional): A range of hue. Default: 18. |
|
hue_prob (float, optional): A probability of adjusting hue. Default: 0.5. |
|
sharpness_range (float, optional): A range of sharpness. Default: 0.5. |
|
sharpness_prob (float, optional): A probability of adjusting saturation. Default: 0. |
|
""" |
|
|
|
def __init__(self, |
|
brightness_range=0.5, |
|
brightness_prob=0.5, |
|
contrast_range=0.5, |
|
contrast_prob=0.5, |
|
saturation_range=0.5, |
|
saturation_prob=0.5, |
|
hue_range=18, |
|
hue_prob=0.5, |
|
sharpness_range=0.5, |
|
sharpness_prob=0): |
|
self.brightness_range = brightness_range |
|
self.brightness_prob = brightness_prob |
|
self.contrast_range = contrast_range |
|
self.contrast_prob = contrast_prob |
|
self.saturation_range = saturation_range |
|
self.saturation_prob = saturation_prob |
|
self.hue_range = hue_range |
|
self.hue_prob = hue_prob |
|
self.sharpness_range = sharpness_range |
|
self.sharpness_prob = sharpness_prob |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
brightness_lower = 1 - self.brightness_range |
|
brightness_upper = 1 + self.brightness_range |
|
contrast_lower = 1 - self.contrast_range |
|
contrast_upper = 1 + self.contrast_range |
|
saturation_lower = 1 - self.saturation_range |
|
saturation_upper = 1 + self.saturation_range |
|
hue_lower = -self.hue_range |
|
hue_upper = self.hue_range |
|
sharpness_lower = 1 - self.sharpness_range |
|
sharpness_upper = 1 + self.sharpness_range |
|
ops = [ |
|
functional.brightness, functional.contrast, functional.saturation, |
|
functional.hue, functional.sharpness |
|
] |
|
random.shuffle(ops) |
|
params_dict = { |
|
'brightness': { |
|
'brightness_lower': brightness_lower, |
|
'brightness_upper': brightness_upper |
|
}, |
|
'contrast': { |
|
'contrast_lower': contrast_lower, |
|
'contrast_upper': contrast_upper |
|
}, |
|
'saturation': { |
|
'saturation_lower': saturation_lower, |
|
'saturation_upper': saturation_upper |
|
}, |
|
'hue': { |
|
'hue_lower': hue_lower, |
|
'hue_upper': hue_upper |
|
}, |
|
'sharpness': { |
|
'sharpness_lower': sharpness_lower, |
|
'sharpness_upper': sharpness_upper, |
|
} |
|
} |
|
prob_dict = { |
|
'brightness': self.brightness_prob, |
|
'contrast': self.contrast_prob, |
|
'saturation': self.saturation_prob, |
|
'hue': self.hue_prob, |
|
'sharpness': self.sharpness_prob |
|
} |
|
im = im.astype('uint8') |
|
im = Image.fromarray(im) |
|
for id in range(len(ops)): |
|
params = params_dict[ops[id].__name__] |
|
prob = prob_dict[ops[id].__name__] |
|
params['im'] = im |
|
if np.random.uniform(0, 1) < prob: |
|
im = ops[id](**params) |
|
im = np.asarray(im).astype('float32') |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |
|
|
|
|
|
@manager.TRANSFORMS.add_component |
|
class RandomAffine: |
|
""" |
|
Affine transform an image with random configurations. |
|
|
|
Args: |
|
size (tuple, optional): The target size after affine transformation. Default: (224, 224). |
|
translation_offset (float, optional): The maximum translation offset. Default: 0. |
|
max_rotation (float, optional): The maximum rotation degree. Default: 15. |
|
min_scale_factor (float, optional): The minimum scale. Default: 0.75. |
|
max_scale_factor (float, optional): The maximum scale. Default: 1.25. |
|
im_padding_value (float, optional): The padding value of raw image. Default: (128, 128, 128). |
|
label_padding_value (int, optional): The padding value of annotation image. Default: (255, 255, 255). |
|
""" |
|
|
|
def __init__(self, |
|
size=(224, 224), |
|
translation_offset=0, |
|
max_rotation=15, |
|
min_scale_factor=0.75, |
|
max_scale_factor=1.25, |
|
im_padding_value=(128, 128, 128), |
|
label_padding_value=(255, 255, 255)): |
|
self.size = size |
|
self.translation_offset = translation_offset |
|
self.max_rotation = max_rotation |
|
self.min_scale_factor = min_scale_factor |
|
self.max_scale_factor = max_scale_factor |
|
self.im_padding_value = im_padding_value |
|
self.label_padding_value = label_padding_value |
|
|
|
def __call__(self, im, label=None): |
|
""" |
|
Args: |
|
im (np.ndarray): The Image data. |
|
label (np.ndarray, optional): The label data. Default: None. |
|
|
|
Returns: |
|
(tuple). When label is None, it returns (im, ), otherwise it returns (im, label). |
|
""" |
|
|
|
w, h = self.size |
|
bbox = [0, 0, im.shape[1] - 1, im.shape[0] - 1] |
|
x_offset = (random.random() - 0.5) * 2 * self.translation_offset |
|
y_offset = (random.random() - 0.5) * 2 * self.translation_offset |
|
dx = (w - (bbox[2] + bbox[0])) / 2.0 |
|
dy = (h - (bbox[3] + bbox[1])) / 2.0 |
|
|
|
matrix_trans = np.array([[1.0, 0, dx], [0, 1.0, dy], [0, 0, 1.0]]) |
|
|
|
angle = random.random() * 2 * self.max_rotation - self.max_rotation |
|
scale = random.random() * (self.max_scale_factor - self.min_scale_factor |
|
) + self.min_scale_factor |
|
scale *= np.mean( |
|
[float(w) / (bbox[2] - bbox[0]), |
|
float(h) / (bbox[3] - bbox[1])]) |
|
alpha = scale * math.cos(angle / 180.0 * math.pi) |
|
beta = scale * math.sin(angle / 180.0 * math.pi) |
|
|
|
centerx = w / 2.0 + x_offset |
|
centery = h / 2.0 + y_offset |
|
matrix = np.array( |
|
[[alpha, beta, (1 - alpha) * centerx - beta * centery], |
|
[-beta, alpha, beta * centerx + (1 - alpha) * centery], |
|
[0, 0, 1.0]]) |
|
|
|
matrix = matrix.dot(matrix_trans)[0:2, :] |
|
im = cv2.warpAffine( |
|
np.uint8(im), |
|
matrix, |
|
tuple(self.size), |
|
flags=cv2.INTER_LINEAR, |
|
borderMode=cv2.BORDER_CONSTANT, |
|
borderValue=self.im_padding_value) |
|
if label is not None: |
|
label = cv2.warpAffine( |
|
np.uint8(label), |
|
matrix, |
|
tuple(self.size), |
|
flags=cv2.INTER_NEAREST, |
|
borderMode=cv2.BORDER_CONSTANT) |
|
if label is None: |
|
return (im,) |
|
else: |
|
return (im, label) |