[Fix] Update ppdet Version and Update QR Code (#64)
parent
6752db2de9
commit
a4957b21be
197 changed files with 14438 additions and 4071 deletions
Before Width: | Height: | Size: 280 KiB After Width: | Height: | Size: 281 KiB |
@ -1 +1,2 @@ |
||||
ppdet ba2aad26e6bc1e5c2dad76ca96692a0d63eccfac |
||||
ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef |
@ -0,0 +1,479 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import print_function |
||||
from __future__ import division |
||||
|
||||
try: |
||||
from collections.abc import Sequence |
||||
except Exception: |
||||
from collections import Sequence |
||||
|
||||
from numbers import Number, Integral |
||||
|
||||
import cv2 |
||||
import numpy as np |
||||
import math |
||||
import copy |
||||
|
||||
from .operators import register_op, BaseOperator |
||||
from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np |
||||
from paddlers.models.ppdet.utils.logger import setup_logger |
||||
logger = setup_logger(__name__) |
||||
|
||||
|
||||
@register_op |
||||
class RRotate(BaseOperator): |
||||
""" Rotate Image, Polygon, Box |
||||
|
||||
Args: |
||||
scale (float): rotate scale |
||||
angle (float): rotate angle |
||||
fill_value (int, tuple): fill color |
||||
auto_bound (bool): whether auto bound or not |
||||
""" |
||||
|
||||
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True): |
||||
super(RRotate, self).__init__() |
||||
self.scale = scale |
||||
self.angle = angle |
||||
self.fill_value = fill_value |
||||
self.auto_bound = auto_bound |
||||
|
||||
def get_rotated_matrix(self, angle, scale, h, w): |
||||
center = ((w - 1) * 0.5, (h - 1) * 0.5) |
||||
matrix = cv2.getRotationMatrix2D(center, -angle, scale) |
||||
# calculate the new size |
||||
cos = np.abs(matrix[0, 0]) |
||||
sin = np.abs(matrix[0, 1]) |
||||
new_w = h * sin + w * cos |
||||
new_h = h * cos + w * sin |
||||
# calculate offset |
||||
n_w = int(np.round(new_w)) |
||||
n_h = int(np.round(new_h)) |
||||
if self.auto_bound: |
||||
ratio = min(w / n_w, h / n_h) |
||||
matrix = cv2.getRotationMatrix2D(center, -angle, ratio) |
||||
else: |
||||
matrix[0, 2] += (new_w - w) * 0.5 |
||||
matrix[1, 2] += (new_h - h) * 0.5 |
||||
w = n_w |
||||
h = n_h |
||||
return matrix, h, w |
||||
|
||||
def get_rect_from_pts(self, pts, h, w): |
||||
""" get minimum rectangle of points |
||||
""" |
||||
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct' |
||||
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2], |
||||
axis=1) |
||||
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2], |
||||
axis=1) |
||||
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h) |
||||
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h) |
||||
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1) |
||||
return boxes |
||||
|
||||
def apply_image(self, image, matrix, h, w): |
||||
return cv2.warpAffine( |
||||
image, matrix, (w, h), borderValue=self.fill_value) |
||||
|
||||
def apply_pts(self, pts, matrix, h, w): |
||||
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct' |
||||
# n is number of samples and m is two times the number of points due to (x, y) |
||||
_, m = pts.shape |
||||
# transpose points |
||||
pts_ = pts.reshape(-1, 2).T |
||||
# pad 1 to convert the points to homogeneous coordinates |
||||
padding = np.ones((1, pts_.shape[1]), pts.dtype) |
||||
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0)) |
||||
return rotated_pts[:2, :].T.reshape(-1, m) |
||||
|
||||
def apply(self, sample, context=None): |
||||
image = sample['image'] |
||||
h, w = image.shape[:2] |
||||
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w) |
||||
sample['image'] = self.apply_image(image, matrix, h, w) |
||||
polys = sample['gt_poly'] |
||||
# TODO: segment or keypoint to be processed |
||||
if len(polys) > 0: |
||||
pts = self.apply_pts(polys, matrix, h, w) |
||||
sample['gt_poly'] = pts |
||||
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w) |
||||
|
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class RandomRRotate(BaseOperator): |
||||
""" Random Rotate Image |
||||
Args: |
||||
scale (float, tuple, list): rotate scale |
||||
scale_mode (str): mode of scale, [range, value, None] |
||||
angle (float, tuple, list): rotate angle |
||||
angle_mode (str): mode of angle, [range, value, None] |
||||
fill_value (float, tuple, list): fill value |
||||
rotate_prob (float): probability of rotation |
||||
auto_bound (bool): whether auto bound or not |
||||
""" |
||||
|
||||
def __init__(self, |
||||
scale=1.0, |
||||
scale_mode=None, |
||||
angle=0., |
||||
angle_mode=None, |
||||
fill_value=0., |
||||
rotate_prob=1.0, |
||||
auto_bound=True): |
||||
super(RandomRRotate, self).__init__() |
||||
self.scale = scale |
||||
self.scale_mode = scale_mode |
||||
self.angle = angle |
||||
self.angle_mode = angle_mode |
||||
self.fill_value = fill_value |
||||
self.rotate_prob = rotate_prob |
||||
self.auto_bound = auto_bound |
||||
|
||||
def get_angle(self, angle, angle_mode): |
||||
assert not angle_mode or angle_mode in [ |
||||
'range', 'value' |
||||
], 'angle mode should be in [range, value, None]' |
||||
if not angle_mode: |
||||
return angle |
||||
elif angle_mode == 'range': |
||||
low, high = angle |
||||
return np.random.rand() * (high - low) + low |
||||
elif angle_mode == 'value': |
||||
return np.random.choice(angle) |
||||
|
||||
def get_scale(self, scale, scale_mode): |
||||
assert not scale_mode or scale_mode in [ |
||||
'range', 'value' |
||||
], 'scale mode should be in [range, value, None]' |
||||
if not scale_mode: |
||||
return scale |
||||
elif scale_mode == 'range': |
||||
low, high = scale |
||||
return np.random.rand() * (high - low) + low |
||||
elif scale_mode == 'value': |
||||
return np.random.choice(scale) |
||||
|
||||
def apply(self, sample, context=None): |
||||
if np.random.rand() > self.rotate_prob: |
||||
return sample |
||||
|
||||
angle = self.get_angle(self.angle, self.angle_mode) |
||||
scale = self.get_scale(self.scale, self.scale_mode) |
||||
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound) |
||||
return rotator(sample) |
||||
|
||||
|
||||
@register_op |
||||
class Poly2RBox(BaseOperator): |
||||
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1 |
||||
|
||||
Args: |
||||
filter_threshold (int, float): threshold to filter annotations |
||||
filter_mode (str): filter mode, ['area', 'edge'] |
||||
rbox_type (str): rbox type, ['le135', 'oc'] |
||||
|
||||
""" |
||||
|
||||
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'): |
||||
super(Poly2RBox, self).__init__() |
||||
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode) |
||||
self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np |
||||
|
||||
def filter(self, size, threshold, mode): |
||||
if mode == 'area': |
||||
if size[0] * size[1] < threshold: |
||||
return True |
||||
elif mode == 'edge': |
||||
if min(size) < threshold: |
||||
return True |
||||
return False |
||||
|
||||
def get_rbox(self, polys): |
||||
valid_ids, rboxes, bboxes = [], [], [] |
||||
for i, poly in enumerate(polys): |
||||
cx, cy, w, h, angle = self.rbox_fn(poly) |
||||
if self.filter_fn((w, h)): |
||||
continue |
||||
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32)) |
||||
valid_ids.append(i) |
||||
xmin, ymin = min(poly[0::2]), min(poly[1::2]) |
||||
xmax, ymax = max(poly[0::2]), max(poly[1::2]) |
||||
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32)) |
||||
|
||||
if len(valid_ids) == 0: |
||||
rboxes = np.zeros((0, 5), dtype=np.float32) |
||||
bboxes = np.zeros((0, 4), dtype=np.float32) |
||||
else: |
||||
rboxes = np.stack(rboxes) |
||||
bboxes = np.stack(bboxes) |
||||
|
||||
return rboxes, bboxes, valid_ids |
||||
|
||||
def apply(self, sample, context=None): |
||||
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly']) |
||||
sample['gt_rbox'] = rboxes |
||||
sample['gt_bbox'] = bboxes |
||||
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']: |
||||
if k in sample: |
||||
sample[k] = sample[k][valid_ids] |
||||
|
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class Poly2Array(BaseOperator): |
||||
""" convert gt_poly to np.array for rotated bboxes |
||||
""" |
||||
|
||||
def __init__(self): |
||||
super(Poly2Array, self).__init__() |
||||
|
||||
def apply(self, sample, context=None): |
||||
if 'gt_poly' in sample: |
||||
sample['gt_poly'] = np.array( |
||||
sample['gt_poly'], dtype=np.float32).reshape((-1, 8)) |
||||
|
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class RResize(BaseOperator): |
||||
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR): |
||||
""" |
||||
Resize image to target size. if keep_ratio is True, |
||||
resize the image's long side to the maximum of target_size |
||||
if keep_ratio is False, resize the image to target size(h, w) |
||||
Args: |
||||
target_size (int|list): image target size |
||||
keep_ratio (bool): whether keep_ratio or not, default true |
||||
interp (int): the interpolation method |
||||
""" |
||||
super(RResize, self).__init__() |
||||
self.keep_ratio = keep_ratio |
||||
self.interp = interp |
||||
if not isinstance(target_size, (Integral, Sequence)): |
||||
raise TypeError( |
||||
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}". |
||||
format(type(target_size))) |
||||
if isinstance(target_size, Integral): |
||||
target_size = [target_size, target_size] |
||||
self.target_size = target_size |
||||
|
||||
def apply_image(self, image, scale): |
||||
im_scale_x, im_scale_y = scale |
||||
|
||||
return cv2.resize( |
||||
image, |
||||
None, |
||||
None, |
||||
fx=im_scale_x, |
||||
fy=im_scale_y, |
||||
interpolation=self.interp) |
||||
|
||||
def apply_pts(self, pts, scale, size): |
||||
im_scale_x, im_scale_y = scale |
||||
resize_w, resize_h = size |
||||
pts[:, 0::2] *= im_scale_x |
||||
pts[:, 1::2] *= im_scale_y |
||||
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w) |
||||
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h) |
||||
return pts |
||||
|
||||
def apply(self, sample, context=None): |
||||
""" Resize the image numpy. |
||||
""" |
||||
im = sample['image'] |
||||
if not isinstance(im, np.ndarray): |
||||
raise TypeError("{}: image type is not numpy.".format(self)) |
||||
if len(im.shape) != 3: |
||||
raise ImageError('{}: image is not 3-dimensional.'.format(self)) |
||||
|
||||
# apply image |
||||
im_shape = im.shape |
||||
if self.keep_ratio: |
||||
|
||||
im_size_min = np.min(im_shape[0:2]) |
||||
im_size_max = np.max(im_shape[0:2]) |
||||
|
||||
target_size_min = np.min(self.target_size) |
||||
target_size_max = np.max(self.target_size) |
||||
|
||||
im_scale = min(target_size_min / im_size_min, |
||||
target_size_max / im_size_max) |
||||
|
||||
resize_h = im_scale * float(im_shape[0]) |
||||
resize_w = im_scale * float(im_shape[1]) |
||||
|
||||
im_scale_x = im_scale |
||||
im_scale_y = im_scale |
||||
else: |
||||
resize_h, resize_w = self.target_size |
||||
im_scale_y = resize_h / im_shape[0] |
||||
im_scale_x = resize_w / im_shape[1] |
||||
|
||||
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y]) |
||||
sample['image'] = im.astype(np.float32) |
||||
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32) |
||||
if 'scale_factor' in sample: |
||||
scale_factor = sample['scale_factor'] |
||||
sample['scale_factor'] = np.asarray( |
||||
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x], |
||||
dtype=np.float32) |
||||
else: |
||||
sample['scale_factor'] = np.asarray( |
||||
[im_scale_y, im_scale_x], dtype=np.float32) |
||||
|
||||
# apply bbox |
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: |
||||
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], |
||||
[im_scale_x, im_scale_y], |
||||
[resize_w, resize_h]) |
||||
|
||||
# apply polygon |
||||
if 'gt_poly' in sample and len(sample['gt_poly']) > 0: |
||||
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], |
||||
[im_scale_x, im_scale_y], |
||||
[resize_w, resize_h]) |
||||
|
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class RandomRFlip(BaseOperator): |
||||
def __init__(self, prob=0.5): |
||||
""" |
||||
Args: |
||||
prob (float): the probability of flipping image |
||||
""" |
||||
super(RandomRFlip, self).__init__() |
||||
self.prob = prob |
||||
if not (isinstance(self.prob, float)): |
||||
raise TypeError("{}: input type is invalid.".format(self)) |
||||
|
||||
def apply_image(self, image): |
||||
return image[:, ::-1, :] |
||||
|
||||
def apply_pts(self, pts, width): |
||||
oldx = pts[:, 0::2].copy() |
||||
pts[:, 0::2] = width - oldx - 1 |
||||
return pts |
||||
|
||||
def apply(self, sample, context=None): |
||||
"""Filp the image and bounding box. |
||||
Operators: |
||||
1. Flip the image numpy. |
||||
2. Transform the bboxes' x coordinates. |
||||
(Must judge whether the coordinates are normalized!) |
||||
3. Transform the segmentations' x coordinates. |
||||
(Must judge whether the coordinates are normalized!) |
||||
Output: |
||||
sample: the image, bounding box and segmentation part |
||||
in sample are flipped. |
||||
""" |
||||
if np.random.uniform(0, 1) < self.prob: |
||||
im = sample['image'] |
||||
height, width = im.shape[:2] |
||||
im = self.apply_image(im) |
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: |
||||
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width) |
||||
if 'gt_poly' in sample and len(sample['gt_poly']) > 0: |
||||
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width) |
||||
|
||||
sample['flipped'] = True |
||||
sample['image'] = im |
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class VisibleRBox(BaseOperator): |
||||
""" |
||||
In debug mode, visualize images according to `gt_box`. |
||||
(Currently only supported when not cropping and flipping image.) |
||||
""" |
||||
|
||||
def __init__(self, output_dir='debug'): |
||||
super(VisibleRBox, self).__init__() |
||||
self.output_dir = output_dir |
||||
if not os.path.isdir(output_dir): |
||||
os.makedirs(output_dir) |
||||
|
||||
def apply(self, sample, context=None): |
||||
image = Image.fromarray(sample['image'].astype(np.uint8)) |
||||
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0]) |
||||
width = sample['w'] |
||||
height = sample['h'] |
||||
# gt_poly = sample['gt_rbox'] |
||||
gt_poly = sample['gt_poly'] |
||||
gt_class = sample['gt_class'] |
||||
draw = ImageDraw.Draw(image) |
||||
for i in range(gt_poly.shape[0]): |
||||
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i] |
||||
draw.line( |
||||
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)], |
||||
width=2, |
||||
fill='green') |
||||
# draw label |
||||
xmin = min(x1, x2, x3, x4) |
||||
ymin = min(y1, y2, y3, y4) |
||||
text = str(gt_class[i][0]) |
||||
tw, th = draw.textsize(text) |
||||
draw.rectangle( |
||||
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green') |
||||
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) |
||||
|
||||
if 'gt_keypoint' in sample.keys(): |
||||
gt_keypoint = sample['gt_keypoint'] |
||||
if self.is_normalized: |
||||
for i in range(gt_keypoint.shape[1]): |
||||
if i % 2: |
||||
gt_keypoint[:, i] = gt_keypoint[:, i] * height |
||||
else: |
||||
gt_keypoint[:, i] = gt_keypoint[:, i] * width |
||||
for i in range(gt_keypoint.shape[0]): |
||||
keypoint = gt_keypoint[i] |
||||
for j in range(int(keypoint.shape[0] / 2)): |
||||
x1 = round(keypoint[2 * j]).astype(np.int32) |
||||
y1 = round(keypoint[2 * j + 1]).astype(np.int32) |
||||
draw.ellipse( |
||||
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green') |
||||
save_path = os.path.join(self.output_dir, out_file_name) |
||||
image.save(save_path, quality=95) |
||||
return sample |
||||
|
||||
|
||||
@register_op |
||||
class Rbox2Poly(BaseOperator): |
||||
""" |
||||
Convert rbbox format to poly format. |
||||
""" |
||||
|
||||
def __init__(self): |
||||
super(Rbox2Poly, self).__init__() |
||||
|
||||
def apply(self, sample, context=None): |
||||
assert 'gt_rbox' in sample |
||||
assert sample['gt_rbox'].shape[1] == 5 |
||||
rboxes = sample['gt_rbox'] |
||||
polys = rbox2poly_np(rboxes) |
||||
sample['gt_poly'] = polys |
||||
xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1) |
||||
xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1) |
||||
sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1) |
||||
return sample |
@ -0,0 +1,72 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import numbers |
||||
import numpy as np |
||||
|
||||
try: |
||||
from collections.abc import Sequence, Mapping |
||||
except: |
||||
from collections import Sequence, Mapping |
||||
|
||||
|
||||
def default_collate_fn(batch): |
||||
""" |
||||
Default batch collating function for :code:`paddle.io.DataLoader`, |
||||
get input data as a list of sample datas, each element in list |
||||
if the data of a sample, and sample data should composed of list, |
||||
dictionary, string, number, numpy array, this |
||||
function will parse input data recursively and stack number, |
||||
numpy array and paddle.Tensor datas as batch datas. e.g. for |
||||
following input data: |
||||
[{'image': np.array(shape=[3, 224, 224]), 'label': 1}, |
||||
{'image': np.array(shape=[3, 224, 224]), 'label': 3}, |
||||
{'image': np.array(shape=[3, 224, 224]), 'label': 4}, |
||||
{'image': np.array(shape=[3, 224, 224]), 'label': 5},] |
||||
|
||||
|
||||
This default collate function zipped each number and numpy array |
||||
field together and stack each field as the batch field as follows: |
||||
{'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])} |
||||
Args: |
||||
batch(list of sample data): batch should be a list of sample data. |
||||
|
||||
Returns: |
||||
Batched data: batched each number, numpy array and paddle.Tensor |
||||
in input data. |
||||
""" |
||||
sample = batch[0] |
||||
if isinstance(sample, np.ndarray): |
||||
batch = np.stack(batch, axis=0) |
||||
return batch |
||||
elif isinstance(sample, numbers.Number): |
||||
batch = np.array(batch) |
||||
return batch |
||||
elif isinstance(sample, (str, bytes)): |
||||
return batch |
||||
elif isinstance(sample, Mapping): |
||||
return { |
||||
key: default_collate_fn([d[key] for d in batch]) |
||||
for key in sample |
||||
} |
||||
elif isinstance(sample, Sequence): |
||||
sample_fields_num = len(sample) |
||||
if not all(len(sample) == sample_fields_num for sample in iter(batch)): |
||||
raise RuntimeError( |
||||
"fileds number not same among samples in a batch") |
||||
return [default_collate_fn(fields) for fields in zip(*batch)] |
||||
|
||||
raise TypeError("batch data con only contains: tensor, numpy.ndarray, " |
||||
"dict, list, number, but got {}".format(type(sample))) |
@ -0,0 +1,35 @@ |
||||
# 自定义OP编译 |
||||
旋转框IOU计算OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。 |
||||
|
||||
## 1. 环境依赖 |
||||
- Paddle >= 2.0.1 |
||||
- gcc 8.2 |
||||
|
||||
## 2. 安装 |
||||
``` |
||||
python setup.py install |
||||
``` |
||||
|
||||
编译完成后即可使用,以下为`rbox_iou`的使用示例 |
||||
``` |
||||
# 引入自定义op |
||||
from ext_op import rbox_iou |
||||
|
||||
paddle.set_device('gpu:0') |
||||
paddle.disable_static() |
||||
|
||||
rbox1 = np.random.rand(13000, 5) |
||||
rbox2 = np.random.rand(7, 5) |
||||
|
||||
pd_rbox1 = paddle.to_tensor(rbox1) |
||||
pd_rbox2 = paddle.to_tensor(rbox2) |
||||
|
||||
iou = rbox_iou(pd_rbox1, pd_rbox2) |
||||
print('iou', iou) |
||||
``` |
||||
|
||||
## 3. 单元测试 |
||||
可以通过执行单元测试来确认自定义算子功能的正确性,执行单元测试的示例如下所示: |
||||
``` |
||||
python unittest/test_matched_rbox_iou.py |
||||
``` |
@ -0,0 +1,90 @@ |
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// The code is based on
|
||||
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
|
||||
|
||||
#include "paddle/extension.h" |
||||
#include "rbox_iou_op.h" |
||||
|
||||
template <typename T> |
||||
void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr, |
||||
const T *rbox2_data_ptr, T *output_data_ptr) { |
||||
|
||||
int i; |
||||
for (i = 0; i < rbox_num; i++) { |
||||
output_data_ptr[i] = |
||||
rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + i * 5); |
||||
} |
||||
} |
||||
|
||||
#define CHECK_INPUT_CPU(x) \ |
||||
PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") |
||||
|
||||
std::vector<paddle::Tensor> MatchedRboxIouCPUForward(const paddle::Tensor &rbox1, |
||||
const paddle::Tensor &rbox2) { |
||||
CHECK_INPUT_CPU(rbox1); |
||||
CHECK_INPUT_CPU(rbox2); |
||||
PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim"); |
||||
|
||||
auto rbox_num = rbox1.shape()[0]; |
||||
auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num}); |
||||
|
||||
PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] { |
||||
matched_rbox_iou_cpu_kernel<data_t>( |
||||
rbox_num, rbox1.data<data_t>(), |
||||
rbox2.data<data_t>(), |
||||
output.mutable_data<data_t>()); |
||||
})); |
||||
|
||||
return {output}; |
||||
} |
||||
|
||||
#ifdef PADDLE_WITH_CUDA |
||||
std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1, |
||||
const paddle::Tensor &rbox2); |
||||
#endif |
||||
|
||||
#define CHECK_INPUT_SAME(x1, x2) \ |
||||
PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") |
||||
|
||||
std::vector<paddle::Tensor> MatchedRboxIouForward(const paddle::Tensor &rbox1, |
||||
const paddle::Tensor &rbox2) { |
||||
CHECK_INPUT_SAME(rbox1, rbox2); |
||||
if (rbox1.place() == paddle::PlaceType::kCPU) { |
||||
return MatchedRboxIouCPUForward(rbox1, rbox2); |
||||
#ifdef PADDLE_WITH_CUDA |
||||
} else if (rbox1.place() == paddle::PlaceType::kGPU) { |
||||
return MatchedRboxIouCUDAForward(rbox1, rbox2); |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
std::vector<std::vector<int64_t>> |
||||
MatchedRboxIouInferShape(std::vector<int64_t> rbox1_shape, |
||||
std::vector<int64_t> rbox2_shape) { |
||||
return {{rbox1_shape[0]}}; |
||||
} |
||||
|
||||
std::vector<paddle::DataType> MatchedRboxIouInferDtype(paddle::DataType t1, |
||||
paddle::DataType t2) { |
||||
return {t1}; |
||||
} |
||||
|
||||
PD_BUILD_OP(matched_rbox_iou) |
||||
.Inputs({"RBOX1", "RBOX2"}) |
||||
.Outputs({"Output"}) |
||||
.SetKernelFn(PD_KERNEL(MatchedRboxIouForward)) |
||||
.SetInferShapeFn(PD_INFER_SHAPE(MatchedRboxIouInferShape)) |
||||
.SetInferDtypeFn(PD_INFER_DTYPE(MatchedRboxIouInferDtype)); |
@ -0,0 +1,63 @@ |
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
// |
||||
// The code is based on |
||||
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated |
||||
|
||||
#include "paddle/extension.h" |
||||
#include "rbox_iou_op.h" |
||||
|
||||
/** |
||||
Computes ceil(a / b) |
||||
*/ |
||||
|
||||
static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; } |
||||
|
||||
template <typename T> |
||||
__global__ void |
||||
matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr, |
||||
const T *rbox2_data_ptr, T *output_data_ptr) { |
||||
for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num; |
||||
tid += blockDim.x * gridDim.x) { |
||||
output_data_ptr[tid] = |
||||
rbox_iou_single<T>(rbox1_data_ptr + tid * 5, rbox2_data_ptr + tid * 5); |
||||
} |
||||
} |
||||
|
||||
#define CHECK_INPUT_GPU(x) \ |
||||
PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.") |
||||
|
||||
std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1, |
||||
const paddle::Tensor &rbox2) { |
||||
CHECK_INPUT_GPU(rbox1); |
||||
CHECK_INPUT_GPU(rbox2); |
||||
PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim"); |
||||
|
||||
auto rbox_num = rbox1.shape()[0]; |
||||
|
||||
auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num}); |
||||
|
||||
const int thread_per_block = 512; |
||||
const int block_per_grid = CeilDiv(rbox_num, thread_per_block); |
||||
|
||||
PD_DISPATCH_FLOATING_TYPES( |
||||
rbox1.type(), "matched_rbox_iou_cuda_kernel", ([&] { |
||||
matched_rbox_iou_cuda_kernel< |
||||
data_t><<<block_per_grid, thread_per_block, 0, rbox1.stream()>>>( |
||||
rbox_num, rbox1.data<data_t>(), rbox2.data<data_t>(), |
||||
output.mutable_data<data_t>()); |
||||
})); |
||||
|
||||
return {output}; |
||||
} |
@ -0,0 +1,97 @@ |
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
|
||||
|
||||
#include "rbox_iou_op.h" |
||||
#include "paddle/extension.h" |
||||
|
||||
|
||||
template <typename T> |
||||
void rbox_iou_cpu_kernel( |
||||
const int rbox1_num, |
||||
const int rbox2_num, |
||||
const T* rbox1_data_ptr, |
||||
const T* rbox2_data_ptr, |
||||
T* output_data_ptr) { |
||||
|
||||
int i, j; |
||||
for (i = 0; i < rbox1_num; i++) { |
||||
for (j = 0; j < rbox2_num; j++) { |
||||
int offset = i * rbox2_num + j; |
||||
output_data_ptr[offset] = rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") |
||||
|
||||
std::vector<paddle::Tensor> RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) { |
||||
CHECK_INPUT_CPU(rbox1); |
||||
CHECK_INPUT_CPU(rbox2); |
||||
|
||||
auto rbox1_num = rbox1.shape()[0]; |
||||
auto rbox2_num = rbox2.shape()[0]; |
||||
|
||||
auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num}); |
||||
|
||||
PD_DISPATCH_FLOATING_TYPES( |
||||
rbox1.type(), |
||||
"rbox_iou_cpu_kernel", |
||||
([&] { |
||||
rbox_iou_cpu_kernel<data_t>( |
||||
rbox1_num, |
||||
rbox2_num, |
||||
rbox1.data<data_t>(), |
||||
rbox2.data<data_t>(), |
||||
output.mutable_data<data_t>()); |
||||
})); |
||||
|
||||
return {output}; |
||||
} |
||||
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA |
||||
std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2); |
||||
#endif |
||||
|
||||
|
||||
#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") |
||||
|
||||
std::vector<paddle::Tensor> RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) { |
||||
CHECK_INPUT_SAME(rbox1, rbox2); |
||||
if (rbox1.place() == paddle::PlaceType::kCPU) { |
||||
return RboxIouCPUForward(rbox1, rbox2); |
||||
#ifdef PADDLE_WITH_CUDA |
||||
} else if (rbox1.place() == paddle::PlaceType::kGPU) { |
||||
return RboxIouCUDAForward(rbox1, rbox2); |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> rbox1_shape, std::vector<int64_t> rbox2_shape) { |
||||
return {{rbox1_shape[0], rbox2_shape[0]}}; |
||||
} |
||||
|
||||
std::vector<paddle::DataType> InferDtype(paddle::DataType t1, paddle::DataType t2) { |
||||
return {t1}; |
||||
} |
||||
|
||||
PD_BUILD_OP(rbox_iou) |
||||
.Inputs({"RBOX1", "RBOX2"}) |
||||
.Outputs({"Output"}) |
||||
.SetKernelFn(PD_KERNEL(RboxIouForward)) |
||||
.SetInferShapeFn(PD_INFER_SHAPE(InferShape)) |
||||
.SetInferDtypeFn(PD_INFER_DTYPE(InferDtype)); |
@ -0,0 +1,114 @@ |
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
// |
||||
// The code is based on |
||||
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated |
||||
|
||||
#include "paddle/extension.h" |
||||
#include "rbox_iou_op.h" |
||||
|
||||
// 2D block with 32 * 16 = 512 threads per block |
||||
const int BLOCK_DIM_X = 32; |
||||
const int BLOCK_DIM_Y = 16; |
||||
|
||||
/** |
||||
Computes ceil(a / b) |
||||
*/ |
||||
|
||||
static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; } |
||||
|
||||
template <typename T> |
||||
__global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num, |
||||
const T *rbox1_data_ptr, |
||||
const T *rbox2_data_ptr, |
||||
T *output_data_ptr) { |
||||
|
||||
// get row_start and col_start |
||||
const int rbox1_block_idx = blockIdx.x * blockDim.x; |
||||
const int rbox2_block_idx = blockIdx.y * blockDim.y; |
||||
|
||||
const int rbox1_thread_num = min(rbox1_num - rbox1_block_idx, blockDim.x); |
||||
const int rbox2_thread_num = min(rbox2_num - rbox2_block_idx, blockDim.y); |
||||
|
||||
__shared__ T block_boxes1[BLOCK_DIM_X * 5]; |
||||
__shared__ T block_boxes2[BLOCK_DIM_Y * 5]; |
||||
|
||||
// It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y |
||||
if (threadIdx.x < rbox1_thread_num && threadIdx.y == 0) { |
||||
block_boxes1[threadIdx.x * 5 + 0] = |
||||
rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 0]; |
||||
block_boxes1[threadIdx.x * 5 + 1] = |
||||
rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 1]; |
||||
block_boxes1[threadIdx.x * 5 + 2] = |
||||
rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 2]; |
||||
block_boxes1[threadIdx.x * 5 + 3] = |
||||
rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 3]; |
||||
block_boxes1[threadIdx.x * 5 + 4] = |
||||
rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 4]; |
||||
} |
||||
|
||||
// threadIdx.x < BLOCK_DIM_Y=rbox2_thread_num, just use same condition as |
||||
// above: threadIdx.y == 0 |
||||
if (threadIdx.x < rbox2_thread_num && threadIdx.y == 0) { |
||||
block_boxes2[threadIdx.x * 5 + 0] = |
||||
rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 0]; |
||||
block_boxes2[threadIdx.x * 5 + 1] = |
||||
rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 1]; |
||||
block_boxes2[threadIdx.x * 5 + 2] = |
||||
rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 2]; |
||||
block_boxes2[threadIdx.x * 5 + 3] = |
||||
rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 3]; |
||||
block_boxes2[threadIdx.x * 5 + 4] = |
||||
rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 4]; |
||||
} |
||||
|
||||
// sync |
||||
__syncthreads(); |
||||
|
||||
if (threadIdx.x < rbox1_thread_num && threadIdx.y < rbox2_thread_num) { |
||||
int offset = (rbox1_block_idx + threadIdx.x) * rbox2_num + rbox2_block_idx + |
||||
threadIdx.y; |
||||
output_data_ptr[offset] = rbox_iou_single<T>( |
||||
block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); |
||||
} |
||||
} |
||||
|
||||
#define CHECK_INPUT_GPU(x) \ |
||||
PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.") |
||||
|
||||
std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1, |
||||
const paddle::Tensor &rbox2) { |
||||
CHECK_INPUT_GPU(rbox1); |
||||
CHECK_INPUT_GPU(rbox2); |
||||
|
||||
auto rbox1_num = rbox1.shape()[0]; |
||||
auto rbox2_num = rbox2.shape()[0]; |
||||
|
||||
auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num}); |
||||
|
||||
const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X); |
||||
const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y); |
||||
|
||||
dim3 blocks(blocks_x, blocks_y); |
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); |
||||
|
||||
PD_DISPATCH_FLOATING_TYPES( |
||||
rbox1.type(), "rbox_iou_cuda_kernel", ([&] { |
||||
rbox_iou_cuda_kernel<data_t><<<blocks, threads, 0, rbox1.stream()>>>( |
||||
rbox1_num, rbox2_num, rbox1.data<data_t>(), rbox2.data<data_t>(), |
||||
output.mutable_data<data_t>()); |
||||
})); |
||||
|
||||
return {output}; |
||||
} |
@ -0,0 +1,348 @@ |
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// The code is based on
|
||||
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
|
||||
|
||||
#pragma once |
||||
|
||||
#include <cassert> |
||||
#include <cmath> |
||||
#include <vector> |
||||
|
||||
#ifdef __CUDACC__ |
||||
// Designates functions callable from the host (CPU) and the device (GPU)
|
||||
#define HOST_DEVICE __host__ __device__ |
||||
#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ |
||||
#else |
||||
#include <algorithm> |
||||
#define HOST_DEVICE |
||||
#define HOST_DEVICE_INLINE HOST_DEVICE inline |
||||
#endif |
||||
|
||||
namespace { |
||||
|
||||
template <typename T> struct RotatedBox { T x_ctr, y_ctr, w, h, a; }; |
||||
|
||||
template <typename T> struct Point { |
||||
T x, y; |
||||
HOST_DEVICE_INLINE Point(const T &px = 0, const T &py = 0) : x(px), y(py) {} |
||||
HOST_DEVICE_INLINE Point operator+(const Point &p) const { |
||||
return Point(x + p.x, y + p.y); |
||||
} |
||||
HOST_DEVICE_INLINE Point &operator+=(const Point &p) { |
||||
x += p.x; |
||||
y += p.y; |
||||
return *this; |
||||
} |
||||
HOST_DEVICE_INLINE Point operator-(const Point &p) const { |
||||
return Point(x - p.x, y - p.y); |
||||
} |
||||
HOST_DEVICE_INLINE Point operator*(const T coeff) const { |
||||
return Point(x * coeff, y * coeff); |
||||
} |
||||
}; |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE T dot_2d(const Point<T> &A, const Point<T> &B) { |
||||
return A.x * B.x + A.y * B.y; |
||||
} |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE T cross_2d(const Point<T> &A, const Point<T> &B) { |
||||
return A.x * B.y - B.x * A.y; |
||||
} |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox<T> &box, |
||||
Point<T> (&pts)[4]) { |
||||
// M_PI / 180. == 0.01745329251
|
||||
// double theta = box.a * 0.01745329251;
|
||||
// MODIFIED
|
||||
double theta = box.a; |
||||
T cosTheta2 = (T)cos(theta) * 0.5f; |
||||
T sinTheta2 = (T)sin(theta) * 0.5f; |
||||
|
||||
// y: top --> down; x: left --> right
|
||||
pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w; |
||||
pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; |
||||
pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w; |
||||
pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; |
||||
pts[2].x = 2 * box.x_ctr - pts[0].x; |
||||
pts[2].y = 2 * box.y_ctr - pts[0].y; |
||||
pts[3].x = 2 * box.x_ctr - pts[1].x; |
||||
pts[3].y = 2 * box.y_ctr - pts[1].y; |
||||
} |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE int get_intersection_points(const Point<T> (&pts1)[4], |
||||
const Point<T> (&pts2)[4], |
||||
Point<T> (&intersections)[24]) { |
||||
// Line vector
|
||||
// A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
|
||||
Point<T> vec1[4], vec2[4]; |
||||
for (int i = 0; i < 4; i++) { |
||||
vec1[i] = pts1[(i + 1) % 4] - pts1[i]; |
||||
vec2[i] = pts2[(i + 1) % 4] - pts2[i]; |
||||
} |
||||
|
||||
// Line test - test all line combos for intersection
|
||||
int num = 0; // number of intersections
|
||||
for (int i = 0; i < 4; i++) { |
||||
for (int j = 0; j < 4; j++) { |
||||
// Solve for 2x2 Ax=b
|
||||
T det = cross_2d<T>(vec2[j], vec1[i]); |
||||
|
||||
// This takes care of parallel lines
|
||||
if (fabs(det) <= 1e-14) { |
||||
continue; |
||||
} |
||||
|
||||
auto vec12 = pts2[j] - pts1[i]; |
||||
|
||||
T t1 = cross_2d<T>(vec2[j], vec12) / det; |
||||
T t2 = cross_2d<T>(vec1[i], vec12) / det; |
||||
|
||||
if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { |
||||
intersections[num++] = pts1[i] + vec1[i] * t1; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Check for vertices of rect1 inside rect2
|
||||
{ |
||||
const auto &AB = vec2[0]; |
||||
const auto &DA = vec2[3]; |
||||
auto ABdotAB = dot_2d<T>(AB, AB); |
||||
auto ADdotAD = dot_2d<T>(DA, DA); |
||||
for (int i = 0; i < 4; i++) { |
||||
// assume ABCD is the rectangle, and P is the point to be judged
|
||||
// P is inside ABCD iff. P's projection on AB lies within AB
|
||||
// and P's projection on AD lies within AD
|
||||
|
||||
auto AP = pts1[i] - pts2[0]; |
||||
|
||||
auto APdotAB = dot_2d<T>(AP, AB); |
||||
auto APdotAD = -dot_2d<T>(AP, DA); |
||||
|
||||
if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && |
||||
(APdotAD <= ADdotAD)) { |
||||
intersections[num++] = pts1[i]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Reverse the check - check for vertices of rect2 inside rect1
|
||||
{ |
||||
const auto &AB = vec1[0]; |
||||
const auto &DA = vec1[3]; |
||||
auto ABdotAB = dot_2d<T>(AB, AB); |
||||
auto ADdotAD = dot_2d<T>(DA, DA); |
||||
for (int i = 0; i < 4; i++) { |
||||
auto AP = pts2[i] - pts1[0]; |
||||
|
||||
auto APdotAB = dot_2d<T>(AP, AB); |
||||
auto APdotAD = -dot_2d<T>(AP, DA); |
||||
|
||||
if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && |
||||
(APdotAD <= ADdotAD)) { |
||||
intersections[num++] = pts2[i]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return num; |
||||
} |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24], |
||||
const int &num_in, Point<T> (&q)[24], |
||||
bool shift_to_zero = false) { |
||||
assert(num_in >= 2); |
||||
|
||||
// Step 1:
|
||||
// Find point with minimum y
|
||||
// if more than 1 points have the same minimum y,
|
||||
// pick the one with the minimum x.
|
||||
int t = 0; |
||||
for (int i = 1; i < num_in; i++) { |
||||
if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { |
||||
t = i; |
||||
} |
||||
} |
||||
auto &start = p[t]; // starting point
|
||||
|
||||
// Step 2:
|
||||
// Subtract starting point from every points (for sorting in the next step)
|
||||
for (int i = 0; i < num_in; i++) { |
||||
q[i] = p[i] - start; |
||||
} |
||||
|
||||
// Swap the starting point to position 0
|
||||
auto tmp = q[0]; |
||||
q[0] = q[t]; |
||||
q[t] = tmp; |
||||
|
||||
// Step 3:
|
||||
// Sort point 1 ~ num_in according to their relative cross-product values
|
||||
// (essentially sorting according to angles)
|
||||
// If the angles are the same, sort according to their distance to origin
|
||||
T dist[24]; |
||||
for (int i = 0; i < num_in; i++) { |
||||
dist[i] = dot_2d<T>(q[i], q[i]); |
||||
} |
||||
|
||||
#ifdef __CUDACC__ |
||||
// CUDA version
|
||||
// In the future, we can potentially use thrust
|
||||
// for sorting here to improve speed (though not guaranteed)
|
||||
for (int i = 1; i < num_in - 1; i++) { |
||||
for (int j = i + 1; j < num_in; j++) { |
||||
T crossProduct = cross_2d<T>(q[i], q[j]); |
||||
if ((crossProduct < -1e-6) || |
||||
(fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { |
||||
auto q_tmp = q[i]; |
||||
q[i] = q[j]; |
||||
q[j] = q_tmp; |
||||
auto dist_tmp = dist[i]; |
||||
dist[i] = dist[j]; |
||||
dist[j] = dist_tmp; |
||||
} |
||||
} |
||||
} |
||||
#else |
||||
// CPU version
|
||||
std::sort(q + 1, q + num_in, |
||||
[](const Point<T> &A, const Point<T> &B) -> bool { |
||||
T temp = cross_2d<T>(A, B); |
||||
if (fabs(temp) < 1e-6) { |
||||
return dot_2d<T>(A, A) < dot_2d<T>(B, B); |
||||
} else { |
||||
return temp > 0; |
||||
} |
||||
}); |
||||
#endif |
||||
|
||||
// Step 4:
|
||||
// Make sure there are at least 2 points (that don't overlap with each other)
|
||||
// in the stack
|
||||
int k; // index of the non-overlapped second point
|
||||
for (k = 1; k < num_in; k++) { |
||||
if (dist[k] > 1e-8) { |
||||
break; |
||||
} |
||||
} |
||||
if (k == num_in) { |
||||
// We reach the end, which means the convex hull is just one point
|
||||
q[0] = p[t]; |
||||
return 1; |
||||
} |
||||
q[1] = q[k]; |
||||
int m = 2; // 2 points in the stack
|
||||
// Step 5:
|
||||
// Finally we can start the scanning process.
|
||||
// When a non-convex relationship between the 3 points is found
|
||||
// (either concave shape or duplicated points),
|
||||
// we pop the previous point from the stack
|
||||
// until the 3-point relationship is convex again, or
|
||||
// until the stack only contains two points
|
||||
for (int i = k + 1; i < num_in; i++) { |
||||
while (m > 1 && cross_2d<T>(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) { |
||||
m--; |
||||
} |
||||
q[m++] = q[i]; |
||||
} |
||||
|
||||
// Step 6 (Optional):
|
||||
// In general sense we need the original coordinates, so we
|
||||
// need to shift the points back (reverting Step 2)
|
||||
// But if we're only interested in getting the area/perimeter of the shape
|
||||
// We can simply return.
|
||||
if (!shift_to_zero) { |
||||
for (int i = 0; i < m; i++) { |
||||
q[i] += start; |
||||
} |
||||
} |
||||
|
||||
return m; |
||||
} |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int &m) { |
||||
if (m <= 2) { |
||||
return 0; |
||||
} |
||||
|
||||
T area = 0; |
||||
for (int i = 1; i < m - 1; i++) { |
||||
area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0])); |
||||
} |
||||
|
||||
return area / 2.0; |
||||
} |
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE T rboxes_intersection(const RotatedBox<T> &box1, |
||||
const RotatedBox<T> &box2) { |
||||
// There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
|
||||
// from rotated_rect_intersection_pts
|
||||
Point<T> intersectPts[24], orderedPts[24]; |
||||
|
||||
Point<T> pts1[4]; |
||||
Point<T> pts2[4]; |
||||
get_rotated_vertices<T>(box1, pts1); |
||||
get_rotated_vertices<T>(box2, pts2); |
||||
|
||||
int num = get_intersection_points<T>(pts1, pts2, intersectPts); |
||||
|
||||
if (num <= 2) { |
||||
return 0.0; |
||||
} |
||||
|
||||
// Convex Hull to order the intersection points in clockwise order and find
|
||||
// the contour area.
|
||||
int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true); |
||||
return polygon_area<T>(orderedPts, num_convex); |
||||
} |
||||
|
||||
} // namespace
|
||||
|
||||
template <typename T> |
||||
HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw, |
||||
T const *const box2_raw) { |
||||
// shift center to the middle point to achieve higher precision in result
|
||||
RotatedBox<T> box1, box2; |
||||
auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0; |
||||
auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0; |
||||
box1.x_ctr = box1_raw[0] - center_shift_x; |
||||
box1.y_ctr = box1_raw[1] - center_shift_y; |
||||
box1.w = box1_raw[2]; |
||||
box1.h = box1_raw[3]; |
||||
box1.a = box1_raw[4]; |
||||
box2.x_ctr = box2_raw[0] - center_shift_x; |
||||
box2.y_ctr = box2_raw[1] - center_shift_y; |
||||
box2.w = box2_raw[2]; |
||||
box2.h = box2_raw[3]; |
||||
box2.a = box2_raw[4]; |
||||
|
||||
const T area1 = box1.w * box1.h; |
||||
const T area2 = box2.w * box2.h; |
||||
if (area1 < 1e-14 || area2 < 1e-14) { |
||||
return 0.f; |
||||
} |
||||
|
||||
const T intersection = rboxes_intersection<T>(box1, box2); |
||||
const T iou = intersection / (area1 + area2 - intersection); |
||||
return iou; |
||||
} |
@ -0,0 +1,33 @@ |
||||
import os |
||||
import glob |
||||
import paddle |
||||
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup |
||||
|
||||
|
||||
def get_extensions(): |
||||
root_dir = os.path.dirname(os.path.abspath(__file__)) |
||||
ext_root_dir = os.path.join(root_dir, 'csrc') |
||||
sources = [] |
||||
for ext_name in os.listdir(ext_root_dir): |
||||
ext_dir = os.path.join(ext_root_dir, ext_name) |
||||
source = glob.glob(os.path.join(ext_dir, '*.cc')) |
||||
kwargs = dict() |
||||
if paddle.device.is_compiled_with_cuda(): |
||||
source += glob.glob(os.path.join(ext_dir, '*.cu')) |
||||
|
||||
if not source: |
||||
continue |
||||
|
||||
sources += source |
||||
|
||||
if paddle.device.is_compiled_with_cuda(): |
||||
extension = CUDAExtension( |
||||
sources, extra_compile_args={'cxx': ['-DPADDLE_WITH_CUDA']}) |
||||
else: |
||||
extension = CppExtension(sources) |
||||
|
||||
return extension |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
setup(name='ext_op', ext_modules=get_extensions()) |
@ -0,0 +1,149 @@ |
||||
import numpy as np |
||||
import sys |
||||
import time |
||||
from shapely.geometry import Polygon |
||||
import paddle |
||||
import unittest |
||||
|
||||
from ext_op import matched_rbox_iou |
||||
|
||||
|
||||
def rbox2poly_single(rrect, get_best_begin_point=False): |
||||
""" |
||||
rrect:[x_ctr,y_ctr,w,h,angle] |
||||
to |
||||
poly:[x0,y0,x1,y1,x2,y2,x3,y3] |
||||
""" |
||||
x_ctr, y_ctr, width, height, angle = rrect[:5] |
||||
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2 |
||||
# rect 2x4 |
||||
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]]) |
||||
R = np.array([[np.cos(angle), -np.sin(angle)], |
||||
[np.sin(angle), np.cos(angle)]]) |
||||
# poly |
||||
poly = R.dot(rect) |
||||
x0, x1, x2, x3 = poly[0, :4] + x_ctr |
||||
y0, y1, y2, y3 = poly[1, :4] + y_ctr |
||||
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64) |
||||
return poly |
||||
|
||||
|
||||
def intersection(g, p): |
||||
""" |
||||
Intersection. |
||||
""" |
||||
|
||||
g = g[:8].reshape((4, 2)) |
||||
p = p[:8].reshape((4, 2)) |
||||
|
||||
a = g |
||||
b = p |
||||
|
||||
use_filter = True |
||||
if use_filter: |
||||
# step1: |
||||
inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0])) |
||||
inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0])) |
||||
inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1])) |
||||
inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1])) |
||||
if inter_x1 >= inter_x2 or inter_y1 >= inter_y2: |
||||
return 0. |
||||
x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0])) |
||||
x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0])) |
||||
y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1])) |
||||
y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1])) |
||||
if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2: |
||||
return 0. |
||||
|
||||
g = Polygon(g) |
||||
p = Polygon(p) |
||||
if not g.is_valid or not p.is_valid: |
||||
return 0 |
||||
|
||||
inter = Polygon(g).intersection(Polygon(p)).area |
||||
union = g.area + p.area - inter |
||||
if union == 0: |
||||
return 0 |
||||
else: |
||||
return inter / union |
||||
|
||||
|
||||
def matched_rbox_overlaps(anchors, gt_bboxes, use_cv2=False): |
||||
""" |
||||
|
||||
Args: |
||||
anchors: [M, 5] x1,y1,x2,y2,angle |
||||
gt_bboxes: [M, 5] x1,y1,x2,y2,angle |
||||
|
||||
Returns: |
||||
macthed_iou: [M] |
||||
""" |
||||
assert anchors.shape[1] == 5 |
||||
assert gt_bboxes.shape[1] == 5 |
||||
|
||||
gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes] |
||||
anchors_ploy = [rbox2poly_single(e) for e in anchors] |
||||
|
||||
num = len(anchors_ploy) |
||||
iou = np.zeros((num, ), dtype=np.float64) |
||||
|
||||
start_time = time.time() |
||||
for i in range(num): |
||||
try: |
||||
iou[i] = intersection(gt_bboxes_ploy[i], anchors_ploy[i]) |
||||
except Exception as e: |
||||
print('cur gt_bboxes_ploy[i]', gt_bboxes_ploy[i], 'anchors_ploy[j]', |
||||
anchors_ploy[i], e) |
||||
return iou |
||||
|
||||
|
||||
def gen_sample(n): |
||||
rbox = np.random.rand(n, 5) |
||||
rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001 |
||||
rbox[:, 4] = rbox[:, 4] - 0.5 |
||||
return rbox |
||||
|
||||
|
||||
class MatchedRBoxIoUTest(unittest.TestCase): |
||||
def setUp(self): |
||||
self.initTestCase() |
||||
self.rbox1 = gen_sample(self.n) |
||||
self.rbox2 = gen_sample(self.n) |
||||
|
||||
def initTestCase(self): |
||||
self.n = 1000 |
||||
|
||||
def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2): |
||||
self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg) |
||||
|
||||
def get_places(self): |
||||
places = [paddle.CPUPlace()] |
||||
if paddle.device.is_compiled_with_cuda(): |
||||
places.append(paddle.CUDAPlace(0)) |
||||
|
||||
return places |
||||
|
||||
def check_output(self, place): |
||||
paddle.disable_static() |
||||
pd_rbox1 = paddle.to_tensor(self.rbox1, place=place) |
||||
pd_rbox2 = paddle.to_tensor(self.rbox2, place=place) |
||||
actual_t = matched_rbox_iou(pd_rbox1, pd_rbox2).numpy() |
||||
poly_rbox1 = self.rbox1 |
||||
poly_rbox2 = self.rbox2 |
||||
poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024 |
||||
poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024 |
||||
expect_t = matched_rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False) |
||||
self.assertAllClose( |
||||
actual_t, |
||||
expect_t, |
||||
msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format( |
||||
str(place), str(expect_t), str(actual_t))) |
||||
|
||||
def test_output(self): |
||||
places = self.get_places() |
||||
for place in places: |
||||
self.check_output(place) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
unittest.main() |
@ -0,0 +1,151 @@ |
||||
import numpy as np |
||||
import sys |
||||
import time |
||||
from shapely.geometry import Polygon |
||||
import paddle |
||||
import unittest |
||||
|
||||
from ext_op import rbox_iou |
||||
|
||||
|
||||
def rbox2poly_single(rrect, get_best_begin_point=False): |
||||
""" |
||||
rrect:[x_ctr,y_ctr,w,h,angle] |
||||
to |
||||
poly:[x0,y0,x1,y1,x2,y2,x3,y3] |
||||
""" |
||||
x_ctr, y_ctr, width, height, angle = rrect[:5] |
||||
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2 |
||||
# rect 2x4 |
||||
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]]) |
||||
R = np.array([[np.cos(angle), -np.sin(angle)], |
||||
[np.sin(angle), np.cos(angle)]]) |
||||
# poly |
||||
poly = R.dot(rect) |
||||
x0, x1, x2, x3 = poly[0, :4] + x_ctr |
||||
y0, y1, y2, y3 = poly[1, :4] + y_ctr |
||||
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64) |
||||
return poly |
||||
|
||||
|
||||
def intersection(g, p): |
||||
""" |
||||
Intersection. |
||||
""" |
||||
|
||||
g = g[:8].reshape((4, 2)) |
||||
p = p[:8].reshape((4, 2)) |
||||
|
||||
a = g |
||||
b = p |
||||
|
||||
use_filter = True |
||||
if use_filter: |
||||
# step1: |
||||
inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0])) |
||||
inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0])) |
||||
inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1])) |
||||
inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1])) |
||||
if inter_x1 >= inter_x2 or inter_y1 >= inter_y2: |
||||
return 0. |
||||
x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0])) |
||||
x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0])) |
||||
y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1])) |
||||
y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1])) |
||||
if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2: |
||||
return 0. |
||||
|
||||
g = Polygon(g) |
||||
p = Polygon(p) |
||||
if not g.is_valid or not p.is_valid: |
||||
return 0 |
||||
|
||||
inter = Polygon(g).intersection(Polygon(p)).area |
||||
union = g.area + p.area - inter |
||||
if union == 0: |
||||
return 0 |
||||
else: |
||||
return inter / union |
||||
|
||||
|
||||
def rbox_overlaps(anchors, gt_bboxes, use_cv2=False): |
||||
""" |
||||
|
||||
Args: |
||||
anchors: [NA, 5] x1,y1,x2,y2,angle |
||||
gt_bboxes: [M, 5] x1,y1,x2,y2,angle |
||||
|
||||
Returns: |
||||
iou: [NA, M] |
||||
""" |
||||
assert anchors.shape[1] == 5 |
||||
assert gt_bboxes.shape[1] == 5 |
||||
|
||||
gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes] |
||||
anchors_ploy = [rbox2poly_single(e) for e in anchors] |
||||
|
||||
num_gt, num_anchors = len(gt_bboxes_ploy), len(anchors_ploy) |
||||
iou = np.zeros((num_anchors, num_gt), dtype=np.float64) |
||||
|
||||
start_time = time.time() |
||||
for i in range(num_anchors): |
||||
for j in range(num_gt): |
||||
try: |
||||
iou[i, j] = intersection(anchors_ploy[i], gt_bboxes_ploy[j]) |
||||
except Exception as e: |
||||
print('cur anchors_ploy[i]', anchors_ploy[i], |
||||
'gt_bboxes_ploy[j]', gt_bboxes_ploy[j], e) |
||||
return iou |
||||
|
||||
|
||||
def gen_sample(n): |
||||
rbox = np.random.rand(n, 5) |
||||
rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001 |
||||
rbox[:, 4] = rbox[:, 4] - 0.5 |
||||
return rbox |
||||
|
||||
|
||||
class RBoxIoUTest(unittest.TestCase): |
||||
def setUp(self): |
||||
self.initTestCase() |
||||
self.rbox1 = gen_sample(self.n) |
||||
self.rbox2 = gen_sample(self.m) |
||||
|
||||
def initTestCase(self): |
||||
self.n = 13000 |
||||
self.m = 7 |
||||
|
||||
def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2): |
||||
self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg) |
||||
|
||||
def get_places(self): |
||||
places = [paddle.CPUPlace()] |
||||
if paddle.device.is_compiled_with_cuda(): |
||||
places.append(paddle.CUDAPlace(0)) |
||||
|
||||
return places |
||||
|
||||
def check_output(self, place): |
||||
paddle.disable_static() |
||||
pd_rbox1 = paddle.to_tensor(self.rbox1, place=place) |
||||
pd_rbox2 = paddle.to_tensor(self.rbox2, place=place) |
||||
actual_t = rbox_iou(pd_rbox1, pd_rbox2).numpy() |
||||
poly_rbox1 = self.rbox1 |
||||
poly_rbox2 = self.rbox2 |
||||
poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024 |
||||
poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024 |
||||
expect_t = rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False) |
||||
self.assertAllClose( |
||||
actual_t, |
||||
expect_t, |
||||
msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format( |
||||
str(place), str(expect_t), str(actual_t))) |
||||
|
||||
def test_output(self): |
||||
places = self.get_places() |
||||
for place in places: |
||||
self.check_output(place) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
unittest.main() |
@ -0,0 +1 @@ |
||||
MODEL_ZOO |
@ -0,0 +1,13 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
@ -0,0 +1,48 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import os |
||||
import paddle |
||||
import paddlers.models.ppdet as ppdet |
||||
import unittest |
||||
|
||||
# NOTE: weights downloading costs time, we choose |
||||
# a small model for unittesting |
||||
MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco' |
||||
|
||||
|
||||
class TestGetConfigFile(unittest.TestCase): |
||||
def test_main(self): |
||||
try: |
||||
cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME) |
||||
assert os.path.isfile(cfg_file) |
||||
except: |
||||
self.assertTrue(False) |
||||
|
||||
|
||||
class TestGetModel(unittest.TestCase): |
||||
def test_main(self): |
||||
try: |
||||
model = ppdet.model_zoo.get_model(MODEL_NAME) |
||||
assert isinstance(model, paddle.nn.Layer) |
||||
except: |
||||
self.assertTrue(False) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,68 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import unittest |
||||
import paddlers.models.ppdet as ppdet |
||||
|
||||
|
||||
class TestListModel(unittest.TestCase): |
||||
def setUp(self): |
||||
self._filter = [] |
||||
|
||||
def test_main(self): |
||||
try: |
||||
ppdet.model_zoo.list_model(self._filter) |
||||
self.assertTrue(True) |
||||
except: |
||||
self.assertTrue(False) |
||||
|
||||
|
||||
class TestListModelYOLO(TestListModel): |
||||
def setUp(self): |
||||
self._filter = ['yolo'] |
||||
|
||||
|
||||
class TestListModelRCNN(TestListModel): |
||||
def setUp(self): |
||||
self._filter = ['rcnn'] |
||||
|
||||
|
||||
class TestListModelSSD(TestListModel): |
||||
def setUp(self): |
||||
self._filter = ['ssd'] |
||||
|
||||
|
||||
class TestListModelMultiFilter(TestListModel): |
||||
def setUp(self): |
||||
self._filter = ['yolo', 'darknet'] |
||||
|
||||
|
||||
class TestListModelError(unittest.TestCase): |
||||
def setUp(self): |
||||
self._filter = ['xxx'] |
||||
|
||||
def test_main(self): |
||||
try: |
||||
ppdet.model_zoo.list_model(self._filter) |
||||
self.assertTrue(False) |
||||
except ValueError: |
||||
self.assertTrue(True) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,79 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
__all__ = ['ByteTrack'] |
||||
|
||||
|
||||
@register |
||||
class ByteTrack(BaseArch): |
||||
""" |
||||
ByteTrack network, see https://arxiv.org/abs/2110.06864 |
||||
|
||||
Args: |
||||
detector (object): detector model instance |
||||
reid (object): reid model instance, default None |
||||
tracker (object): tracker instance |
||||
""" |
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, detector='YOLOX', reid=None, tracker='JDETracker'): |
||||
super(ByteTrack, self).__init__() |
||||
self.detector = detector |
||||
self.reid = reid |
||||
self.tracker = tracker |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
detector = create(cfg['detector']) |
||||
|
||||
if cfg['reid'] != 'None': |
||||
reid = create(cfg['reid']) |
||||
else: |
||||
reid = None |
||||
|
||||
tracker = create(cfg['tracker']) |
||||
|
||||
return { |
||||
"detector": detector, |
||||
"reid": reid, |
||||
"tracker": tracker, |
||||
} |
||||
|
||||
def _forward(self): |
||||
det_outs = self.detector(self.inputs) |
||||
|
||||
if self.training: |
||||
return det_outs |
||||
else: |
||||
if self.reid is not None: |
||||
assert 'crops' in self.inputs |
||||
crops = self.inputs['crops'] |
||||
pred_embs = self.reid(crops) |
||||
else: |
||||
pred_embs = None |
||||
det_outs['embeddings'] = pred_embs |
||||
return det_outs |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
return self._forward() |
@ -0,0 +1,68 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
import paddle |
||||
|
||||
__all__ = ['RetinaNet'] |
||||
|
||||
|
||||
@register |
||||
class RetinaNet(BaseArch): |
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, backbone, neck, head): |
||||
super(RetinaNet, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
kwargs = {'input_shape': neck.out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
'head': head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
body_feats = self.backbone(self.inputs) |
||||
neck_feats = self.neck(body_feats) |
||||
|
||||
if self.training: |
||||
return self.head(neck_feats, self.inputs) |
||||
else: |
||||
head_outs = self.head(neck_feats) |
||||
bbox, bbox_num = self.head.post_process( |
||||
head_outs, self.inputs['im_shape'], self.inputs['scale_factor']) |
||||
return {'bbox': bbox, 'bbox_num': bbox_num} |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
return self._forward() |
@ -0,0 +1,138 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, create |
||||
from .meta_arch import BaseArch |
||||
|
||||
import random |
||||
import paddle |
||||
import paddle.nn.functional as F |
||||
import paddle.distributed as dist |
||||
|
||||
__all__ = ['YOLOX'] |
||||
|
||||
|
||||
@register |
||||
class YOLOX(BaseArch): |
||||
""" |
||||
YOLOX network, see https://arxiv.org/abs/2107.08430 |
||||
|
||||
Args: |
||||
backbone (nn.Layer): backbone instance |
||||
neck (nn.Layer): neck instance |
||||
head (nn.Layer): head instance |
||||
for_mot (bool): whether used for MOT or not |
||||
input_size (list[int]): initial scale, will be reset by self._preprocess() |
||||
size_stride (int): stride of the size range |
||||
size_range (list[int]): multi-scale range for training |
||||
random_interval (int): interval of iter to change self._input_size |
||||
""" |
||||
__category__ = 'architecture' |
||||
|
||||
def __init__(self, |
||||
backbone='CSPDarkNet', |
||||
neck='YOLOCSPPAN', |
||||
head='YOLOXHead', |
||||
for_mot=False, |
||||
input_size=[640, 640], |
||||
size_stride=32, |
||||
size_range=[15, 25], |
||||
random_interval=10): |
||||
super(YOLOX, self).__init__() |
||||
self.backbone = backbone |
||||
self.neck = neck |
||||
self.head = head |
||||
self.for_mot = for_mot |
||||
|
||||
self.input_size = input_size |
||||
self._input_size = paddle.to_tensor(input_size) |
||||
self.size_stride = size_stride |
||||
self.size_range = size_range |
||||
self.random_interval = random_interval |
||||
self._step = 0 |
||||
|
||||
@classmethod |
||||
def from_config(cls, cfg, *args, **kwargs): |
||||
# backbone |
||||
backbone = create(cfg['backbone']) |
||||
|
||||
# fpn |
||||
kwargs = {'input_shape': backbone.out_shape} |
||||
neck = create(cfg['neck'], **kwargs) |
||||
|
||||
# head |
||||
kwargs = {'input_shape': neck.out_shape} |
||||
head = create(cfg['head'], **kwargs) |
||||
|
||||
return { |
||||
'backbone': backbone, |
||||
'neck': neck, |
||||
"head": head, |
||||
} |
||||
|
||||
def _forward(self): |
||||
if self.training: |
||||
self._preprocess() |
||||
body_feats = self.backbone(self.inputs) |
||||
neck_feats = self.neck(body_feats, self.for_mot) |
||||
|
||||
if self.training: |
||||
yolox_losses = self.head(neck_feats, self.inputs) |
||||
yolox_losses.update({'size': self._input_size[0]}) |
||||
return yolox_losses |
||||
else: |
||||
head_outs = self.head(neck_feats) |
||||
bbox, bbox_num = self.head.post_process( |
||||
head_outs, self.inputs['im_shape'], self.inputs['scale_factor']) |
||||
return {'bbox': bbox, 'bbox_num': bbox_num} |
||||
|
||||
def get_loss(self): |
||||
return self._forward() |
||||
|
||||
def get_pred(self): |
||||
return self._forward() |
||||
|
||||
def _preprocess(self): |
||||
# YOLOX multi-scale training, interpolate resize before inputs of the network. |
||||
self._get_size() |
||||
scale_y = self._input_size[0] / self.input_size[0] |
||||
scale_x = self._input_size[1] / self.input_size[1] |
||||
if scale_x != 1 or scale_y != 1: |
||||
self.inputs['image'] = F.interpolate( |
||||
self.inputs['image'], |
||||
size=self._input_size, |
||||
mode='bilinear', |
||||
align_corners=False) |
||||
gt_bboxes = self.inputs['gt_bbox'] |
||||
for i in range(len(gt_bboxes)): |
||||
if len(gt_bboxes[i]) > 0: |
||||
gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x |
||||
gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y |
||||
self.inputs['gt_bbox'] = gt_bboxes |
||||
|
||||
def _get_size(self): |
||||
# random_interval = 10 as default, every 10 iters to change self._input_size |
||||
image_ratio = self.input_size[1] * 1.0 / self.input_size[0] |
||||
if self._step % self.random_interval == 0: |
||||
size_factor = random.randint(*self.size_range) |
||||
size = [ |
||||
self.size_stride * size_factor, |
||||
self.size_stride * int(size_factor * image_ratio) |
||||
] |
||||
self._input_size = paddle.to_tensor(size) |
||||
self._step += 1 |
@ -0,0 +1,54 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register |
||||
from paddlers.models.ppdet.modeling.proposal_generator.target import label_box |
||||
|
||||
__all__ = ['MaxIoUAssigner'] |
||||
|
||||
|
||||
@register |
||||
class MaxIoUAssigner(object): |
||||
"""a standard bbox assigner based on max IoU, use ppdet's label_box |
||||
as backend. |
||||
Args: |
||||
positive_overlap (float): threshold for defining positive samples |
||||
negative_overlap (float): threshold for denining negative samples |
||||
allow_low_quality (bool): whether to lower IoU thr if a GT poorly |
||||
overlaps with candidate bboxes |
||||
""" |
||||
|
||||
def __init__(self, |
||||
positive_overlap, |
||||
negative_overlap, |
||||
allow_low_quality=True): |
||||
self.positive_overlap = positive_overlap |
||||
self.negative_overlap = negative_overlap |
||||
self.allow_low_quality = allow_low_quality |
||||
|
||||
def __call__(self, bboxes, gt_bboxes): |
||||
matches, match_labels = label_box( |
||||
bboxes, |
||||
gt_bboxes, |
||||
positive_overlap=self.positive_overlap, |
||||
negative_overlap=self.negative_overlap, |
||||
allow_low_quality=self.allow_low_quality, |
||||
ignore_thresh=-1, |
||||
is_crowd=None, |
||||
assign_on_cpu=False) |
||||
return matches, match_labels |
@ -0,0 +1,245 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
''' |
||||
Modified from https://github.com/facebookresearch/ConvNeXt |
||||
Copyright (c) Meta Platforms, Inc. and affiliates. |
||||
All rights reserved. |
||||
This source code is licensed under the license found in the |
||||
LICENSE file in the root directory of this source tree. |
||||
''' |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.nn.initializer import Constant |
||||
|
||||
import numpy as np |
||||
|
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from ..shape_spec import ShapeSpec |
||||
from .transformer_utils import DropPath, trunc_normal_, zeros_ |
||||
|
||||
__all__ = ['ConvNeXt'] |
||||
|
||||
|
||||
class Block(nn.Layer): |
||||
r""" ConvNeXt Block. There are two equivalent implementations: |
||||
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) |
||||
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back |
||||
We use (2) as we find it slightly faster in Pypaddle |
||||
|
||||
Args: |
||||
dim (int): Number of input channels. |
||||
drop_path (float): Stochastic depth rate. Default: 0.0 |
||||
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. |
||||
""" |
||||
|
||||
def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6): |
||||
super().__init__() |
||||
self.dwconv = nn.Conv2D( |
||||
dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv |
||||
self.norm = LayerNorm(dim, eps=1e-6) |
||||
self.pwconv1 = nn.Linear( |
||||
dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers |
||||
self.act = nn.GELU() |
||||
self.pwconv2 = nn.Linear(4 * dim, dim) |
||||
|
||||
if layer_scale_init_value > 0: |
||||
self.gamma = self.create_parameter( |
||||
shape=(dim, ), |
||||
attr=ParamAttr(initializer=Constant(layer_scale_init_value))) |
||||
else: |
||||
self.gamma = None |
||||
|
||||
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity( |
||||
) |
||||
|
||||
def forward(self, x): |
||||
input = x |
||||
x = self.dwconv(x) |
||||
x = x.transpose([0, 2, 3, 1]) |
||||
x = self.norm(x) |
||||
x = self.pwconv1(x) |
||||
x = self.act(x) |
||||
x = self.pwconv2(x) |
||||
if self.gamma is not None: |
||||
x = self.gamma * x |
||||
x = x.transpose([0, 3, 1, 2]) |
||||
x = input + self.drop_path(x) |
||||
return x |
||||
|
||||
|
||||
class LayerNorm(nn.Layer): |
||||
r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. |
||||
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with |
||||
shape (batch_size, height, width, channels) while channels_first corresponds to inputs |
||||
with shape (batch_size, channels, height, width). |
||||
""" |
||||
|
||||
def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): |
||||
super().__init__() |
||||
|
||||
self.weight = self.create_parameter( |
||||
shape=(normalized_shape, ), |
||||
attr=ParamAttr(initializer=Constant(1.))) |
||||
self.bias = self.create_parameter( |
||||
shape=(normalized_shape, ), |
||||
attr=ParamAttr(initializer=Constant(0.))) |
||||
|
||||
self.eps = eps |
||||
self.data_format = data_format |
||||
if self.data_format not in ["channels_last", "channels_first"]: |
||||
raise NotImplementedError |
||||
self.normalized_shape = (normalized_shape, ) |
||||
|
||||
def forward(self, x): |
||||
if self.data_format == "channels_last": |
||||
return F.layer_norm(x, self.normalized_shape, self.weight, |
||||
self.bias, self.eps) |
||||
elif self.data_format == "channels_first": |
||||
u = x.mean(1, keepdim=True) |
||||
s = (x - u).pow(2).mean(1, keepdim=True) |
||||
x = (x - u) / paddle.sqrt(s + self.eps) |
||||
x = self.weight[:, None, None] * x + self.bias[:, None, None] |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class ConvNeXt(nn.Layer): |
||||
r""" ConvNeXt |
||||
A Pypaddle impl of : `A ConvNet for the 2020s` - |
||||
https://arxiv.org/pdf/2201.03545.pdf |
||||
|
||||
Args: |
||||
in_chans (int): Number of input image channels. Default: 3 |
||||
depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] |
||||
dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] |
||||
drop_path_rate (float): Stochastic depth rate. Default: 0. |
||||
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. |
||||
""" |
||||
|
||||
arch_settings = { |
||||
'tiny': { |
||||
'depths': [3, 3, 9, 3], |
||||
'dims': [96, 192, 384, 768] |
||||
}, |
||||
'small': { |
||||
'depths': [3, 3, 27, 3], |
||||
'dims': [96, 192, 384, 768] |
||||
}, |
||||
'base': { |
||||
'depths': [3, 3, 27, 3], |
||||
'dims': [128, 256, 512, 1024] |
||||
}, |
||||
'large': { |
||||
'depths': [3, 3, 27, 3], |
||||
'dims': [192, 384, 768, 1536] |
||||
}, |
||||
'xlarge': { |
||||
'depths': [3, 3, 27, 3], |
||||
'dims': [256, 512, 1024, 2048] |
||||
}, |
||||
} |
||||
|
||||
def __init__( |
||||
self, |
||||
arch='tiny', |
||||
in_chans=3, |
||||
drop_path_rate=0., |
||||
layer_scale_init_value=1e-6, |
||||
return_idx=[1, 2, 3], |
||||
norm_output=True, |
||||
pretrained=None, ): |
||||
super().__init__() |
||||
depths = self.arch_settings[arch]['depths'] |
||||
dims = self.arch_settings[arch]['dims'] |
||||
self.downsample_layers = nn.LayerList( |
||||
) # stem and 3 intermediate downsampling conv layers |
||||
stem = nn.Sequential( |
||||
nn.Conv2D( |
||||
in_chans, dims[0], kernel_size=4, stride=4), |
||||
LayerNorm( |
||||
dims[0], eps=1e-6, data_format="channels_first")) |
||||
self.downsample_layers.append(stem) |
||||
for i in range(3): |
||||
downsample_layer = nn.Sequential( |
||||
LayerNorm( |
||||
dims[i], eps=1e-6, data_format="channels_first"), |
||||
nn.Conv2D( |
||||
dims[i], dims[i + 1], kernel_size=2, stride=2), ) |
||||
self.downsample_layers.append(downsample_layer) |
||||
|
||||
self.stages = nn.LayerList( |
||||
) # 4 feature resolution stages, each consisting of multiple residual blocks |
||||
dp_rates = [x for x in np.linspace(0, drop_path_rate, sum(depths))] |
||||
cur = 0 |
||||
for i in range(4): |
||||
stage = nn.Sequential(*[ |
||||
Block( |
||||
dim=dims[i], |
||||
drop_path=dp_rates[cur + j], |
||||
layer_scale_init_value=layer_scale_init_value) |
||||
for j in range(depths[i]) |
||||
]) |
||||
self.stages.append(stage) |
||||
cur += depths[i] |
||||
|
||||
self.return_idx = return_idx |
||||
self.dims = [dims[i] for i in return_idx] # [::-1] |
||||
|
||||
self.norm_output = norm_output |
||||
if norm_output: |
||||
self.norms = nn.LayerList([ |
||||
LayerNorm( |
||||
c, eps=1e-6, data_format="channels_first") |
||||
for c in self.dims |
||||
]) |
||||
|
||||
self.apply(self._init_weights) |
||||
|
||||
if pretrained is not None: |
||||
if 'http' in pretrained: #URL |
||||
path = paddle.utils.download.get_weights_path_from_url( |
||||
pretrained) |
||||
else: #model in local path |
||||
path = pretrained |
||||
self.set_state_dict(paddle.load(path)) |
||||
|
||||
def _init_weights(self, m): |
||||
if isinstance(m, (nn.Conv2D, nn.Linear)): |
||||
trunc_normal_(m.weight) |
||||
zeros_(m.bias) |
||||
|
||||
def forward_features(self, x): |
||||
output = [] |
||||
for i in range(4): |
||||
x = self.downsample_layers[i](x) |
||||
x = self.stages[i](x) |
||||
output.append(x) |
||||
|
||||
outputs = [output[i] for i in self.return_idx] |
||||
if self.norm_output: |
||||
outputs = [self.norms[i](out) for i, out in enumerate(outputs)] |
||||
|
||||
return outputs |
||||
|
||||
def forward(self, x): |
||||
x = self.forward_features(x['image']) |
||||
return x |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ShapeSpec(channels=c) for c in self.dims] |
@ -0,0 +1,404 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from paddlers.models.ppdet.modeling.initializer import conv_init_ |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = [ |
||||
'CSPDarkNet', 'BaseConv', 'DWConv', 'BottleNeck', 'SPPLayer', 'SPPFLayer' |
||||
] |
||||
|
||||
|
||||
class BaseConv(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
ksize, |
||||
stride, |
||||
groups=1, |
||||
bias=False, |
||||
act="silu"): |
||||
super(BaseConv, self).__init__() |
||||
self.conv = nn.Conv2D( |
||||
in_channels, |
||||
out_channels, |
||||
kernel_size=ksize, |
||||
stride=stride, |
||||
padding=(ksize - 1) // 2, |
||||
groups=groups, |
||||
bias_attr=bias) |
||||
self.bn = nn.BatchNorm2D( |
||||
out_channels, |
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))) |
||||
|
||||
self._init_weights() |
||||
|
||||
def _init_weights(self): |
||||
conv_init_(self.conv) |
||||
|
||||
def forward(self, x): |
||||
# use 'x * F.sigmoid(x)' replace 'silu' |
||||
x = self.bn(self.conv(x)) |
||||
y = x * F.sigmoid(x) |
||||
return y |
||||
|
||||
|
||||
class DWConv(nn.Layer): |
||||
"""Depthwise Conv""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
ksize, |
||||
stride=1, |
||||
bias=False, |
||||
act="silu"): |
||||
super(DWConv, self).__init__() |
||||
self.dw_conv = BaseConv( |
||||
in_channels, |
||||
in_channels, |
||||
ksize=ksize, |
||||
stride=stride, |
||||
groups=in_channels, |
||||
bias=bias, |
||||
act=act) |
||||
self.pw_conv = BaseConv( |
||||
in_channels, |
||||
out_channels, |
||||
ksize=1, |
||||
stride=1, |
||||
groups=1, |
||||
bias=bias, |
||||
act=act) |
||||
|
||||
def forward(self, x): |
||||
return self.pw_conv(self.dw_conv(x)) |
||||
|
||||
|
||||
class Focus(nn.Layer): |
||||
"""Focus width and height information into channel space, used in YOLOX.""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
ksize=3, |
||||
stride=1, |
||||
bias=False, |
||||
act="silu"): |
||||
super(Focus, self).__init__() |
||||
self.conv = BaseConv( |
||||
in_channels * 4, |
||||
out_channels, |
||||
ksize=ksize, |
||||
stride=stride, |
||||
bias=bias, |
||||
act=act) |
||||
|
||||
def forward(self, inputs): |
||||
# inputs [bs, C, H, W] -> outputs [bs, 4C, W/2, H/2] |
||||
top_left = inputs[:, :, 0::2, 0::2] |
||||
top_right = inputs[:, :, 0::2, 1::2] |
||||
bottom_left = inputs[:, :, 1::2, 0::2] |
||||
bottom_right = inputs[:, :, 1::2, 1::2] |
||||
outputs = paddle.concat( |
||||
[top_left, bottom_left, top_right, bottom_right], 1) |
||||
return self.conv(outputs) |
||||
|
||||
|
||||
class BottleNeck(nn.Layer): |
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
shortcut=True, |
||||
expansion=0.5, |
||||
depthwise=False, |
||||
bias=False, |
||||
act="silu"): |
||||
super(BottleNeck, self).__init__() |
||||
hidden_channels = int(out_channels * expansion) |
||||
Conv = DWConv if depthwise else BaseConv |
||||
self.conv1 = BaseConv( |
||||
in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
self.conv2 = Conv( |
||||
hidden_channels, |
||||
out_channels, |
||||
ksize=3, |
||||
stride=1, |
||||
bias=bias, |
||||
act=act) |
||||
self.add_shortcut = shortcut and in_channels == out_channels |
||||
|
||||
def forward(self, x): |
||||
y = self.conv2(self.conv1(x)) |
||||
if self.add_shortcut: |
||||
y = y + x |
||||
return y |
||||
|
||||
|
||||
class SPPLayer(nn.Layer): |
||||
"""Spatial Pyramid Pooling (SPP) layer used in YOLOv3-SPP and YOLOX""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
kernel_sizes=(5, 9, 13), |
||||
bias=False, |
||||
act="silu"): |
||||
super(SPPLayer, self).__init__() |
||||
hidden_channels = in_channels // 2 |
||||
self.conv1 = BaseConv( |
||||
in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
self.maxpoolings = nn.LayerList([ |
||||
nn.MaxPool2D( |
||||
kernel_size=ks, stride=1, padding=ks // 2) |
||||
for ks in kernel_sizes |
||||
]) |
||||
conv2_channels = hidden_channels * (len(kernel_sizes) + 1) |
||||
self.conv2 = BaseConv( |
||||
conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
|
||||
def forward(self, x): |
||||
x = self.conv1(x) |
||||
x = paddle.concat([x] + [mp(x) for mp in self.maxpoolings], axis=1) |
||||
x = self.conv2(x) |
||||
return x |
||||
|
||||
|
||||
class SPPFLayer(nn.Layer): |
||||
""" Spatial Pyramid Pooling - Fast (SPPF) layer used in YOLOv5 by Glenn Jocher, |
||||
equivalent to SPP(k=(5, 9, 13)) |
||||
""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
ksize=5, |
||||
bias=False, |
||||
act='silu'): |
||||
super(SPPFLayer, self).__init__() |
||||
hidden_channels = in_channels // 2 |
||||
self.conv1 = BaseConv( |
||||
in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
self.maxpooling = nn.MaxPool2D( |
||||
kernel_size=ksize, stride=1, padding=ksize // 2) |
||||
conv2_channels = hidden_channels * 4 |
||||
self.conv2 = BaseConv( |
||||
conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
|
||||
def forward(self, x): |
||||
x = self.conv1(x) |
||||
y1 = self.maxpooling(x) |
||||
y2 = self.maxpooling(y1) |
||||
y3 = self.maxpooling(y2) |
||||
concats = paddle.concat([x, y1, y2, y3], axis=1) |
||||
out = self.conv2(concats) |
||||
return out |
||||
|
||||
|
||||
class CSPLayer(nn.Layer): |
||||
"""CSP (Cross Stage Partial) layer with 3 convs, named C3 in YOLOv5""" |
||||
|
||||
def __init__(self, |
||||
in_channels, |
||||
out_channels, |
||||
num_blocks=1, |
||||
shortcut=True, |
||||
expansion=0.5, |
||||
depthwise=False, |
||||
bias=False, |
||||
act="silu"): |
||||
super(CSPLayer, self).__init__() |
||||
hidden_channels = int(out_channels * expansion) |
||||
self.conv1 = BaseConv( |
||||
in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
self.conv2 = BaseConv( |
||||
in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act) |
||||
self.bottlenecks = nn.Sequential(*[ |
||||
BottleNeck( |
||||
hidden_channels, |
||||
hidden_channels, |
||||
shortcut=shortcut, |
||||
expansion=1.0, |
||||
depthwise=depthwise, |
||||
bias=bias, |
||||
act=act) for _ in range(num_blocks) |
||||
]) |
||||
self.conv3 = BaseConv( |
||||
hidden_channels * 2, |
||||
out_channels, |
||||
ksize=1, |
||||
stride=1, |
||||
bias=bias, |
||||
act=act) |
||||
|
||||
def forward(self, x): |
||||
x_1 = self.conv1(x) |
||||
x_1 = self.bottlenecks(x_1) |
||||
x_2 = self.conv2(x) |
||||
x = paddle.concat([x_1, x_2], axis=1) |
||||
x = self.conv3(x) |
||||
return x |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class CSPDarkNet(nn.Layer): |
||||
""" |
||||
CSPDarkNet backbone. |
||||
Args: |
||||
arch (str): Architecture of CSPDarkNet, from {P5, P6, X}, default as X, |
||||
and 'X' means used in YOLOX, 'P5/P6' means used in YOLOv5. |
||||
depth_mult (float): Depth multiplier, multiply number of channels in |
||||
each layer, default as 1.0. |
||||
width_mult (float): Width multiplier, multiply number of blocks in |
||||
CSPLayer, default as 1.0. |
||||
depthwise (bool): Whether to use depth-wise conv layer. |
||||
act (str): Activation function type, default as 'silu'. |
||||
return_idx (list): Index of stages whose feature maps are returned. |
||||
""" |
||||
|
||||
__shared__ = ['depth_mult', 'width_mult', 'act', 'trt'] |
||||
|
||||
# in_channels, out_channels, num_blocks, add_shortcut, use_spp(use_sppf) |
||||
# 'X' means setting used in YOLOX, 'P5/P6' means setting used in YOLOv5. |
||||
arch_settings = { |
||||
'X': [[64, 128, 3, True, False], [128, 256, 9, True, False], |
||||
[256, 512, 9, True, False], [512, 1024, 3, False, True]], |
||||
'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False], |
||||
[256, 512, 9, True, False], [512, 1024, 3, True, True]], |
||||
'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False], |
||||
[256, 512, 9, True, False], [512, 768, 3, True, False], |
||||
[768, 1024, 3, True, True]], |
||||
} |
||||
|
||||
def __init__(self, |
||||
arch='X', |
||||
depth_mult=1.0, |
||||
width_mult=1.0, |
||||
depthwise=False, |
||||
act='silu', |
||||
trt=False, |
||||
return_idx=[2, 3, 4]): |
||||
super(CSPDarkNet, self).__init__() |
||||
self.arch = arch |
||||
self.return_idx = return_idx |
||||
Conv = DWConv if depthwise else BaseConv |
||||
arch_setting = self.arch_settings[arch] |
||||
base_channels = int(arch_setting[0][0] * width_mult) |
||||
|
||||
# Note: differences between the latest YOLOv5 and the original YOLOX |
||||
# 1. self.stem, use SPPF(in YOLOv5) or SPP(in YOLOX) |
||||
# 2. use SPPF(in YOLOv5) or SPP(in YOLOX) |
||||
# 3. put SPPF before(YOLOv5) or SPP after(YOLOX) the last cspdark block's CSPLayer |
||||
# 4. whether SPPF(SPP)'CSPLayer add shortcut, True in YOLOv5, False in YOLOX |
||||
if arch in ['P5', 'P6']: |
||||
# in the latest YOLOv5, use Conv stem, and SPPF (fast, only single spp kernal size) |
||||
self.stem = Conv( |
||||
3, base_channels, ksize=6, stride=2, bias=False, act=act) |
||||
spp_kernal_sizes = 5 |
||||
elif arch in ['X']: |
||||
# in the original YOLOX, use Focus stem, and SPP (three spp kernal sizes) |
||||
self.stem = Focus( |
||||
3, base_channels, ksize=3, stride=1, bias=False, act=act) |
||||
spp_kernal_sizes = (5, 9, 13) |
||||
else: |
||||
raise AttributeError("Unsupported arch type: {}".format(arch)) |
||||
|
||||
_out_channels = [base_channels] |
||||
layers_num = 1 |
||||
self.csp_dark_blocks = [] |
||||
|
||||
for i, (in_channels, out_channels, num_blocks, shortcut, |
||||
use_spp) in enumerate(arch_setting): |
||||
in_channels = int(in_channels * width_mult) |
||||
out_channels = int(out_channels * width_mult) |
||||
_out_channels.append(out_channels) |
||||
num_blocks = max(round(num_blocks * depth_mult), 1) |
||||
stage = [] |
||||
|
||||
conv_layer = self.add_sublayer( |
||||
'layers{}.stage{}.conv_layer'.format(layers_num, i + 1), |
||||
Conv( |
||||
in_channels, out_channels, 3, 2, bias=False, act=act)) |
||||
stage.append(conv_layer) |
||||
layers_num += 1 |
||||
|
||||
if use_spp and arch in ['X']: |
||||
# in YOLOX use SPPLayer |
||||
spp_layer = self.add_sublayer( |
||||
'layers{}.stage{}.spp_layer'.format(layers_num, i + 1), |
||||
SPPLayer( |
||||
out_channels, |
||||
out_channels, |
||||
kernel_sizes=spp_kernal_sizes, |
||||
bias=False, |
||||
act=act)) |
||||
stage.append(spp_layer) |
||||
layers_num += 1 |
||||
|
||||
csp_layer = self.add_sublayer( |
||||
'layers{}.stage{}.csp_layer'.format(layers_num, i + 1), |
||||
CSPLayer( |
||||
out_channels, |
||||
out_channels, |
||||
num_blocks=num_blocks, |
||||
shortcut=shortcut, |
||||
depthwise=depthwise, |
||||
bias=False, |
||||
act=act)) |
||||
stage.append(csp_layer) |
||||
layers_num += 1 |
||||
|
||||
if use_spp and arch in ['P5', 'P6']: |
||||
# in latest YOLOv5 use SPPFLayer instead of SPPLayer |
||||
sppf_layer = self.add_sublayer( |
||||
'layers{}.stage{}.sppf_layer'.format(layers_num, i + 1), |
||||
SPPFLayer( |
||||
out_channels, |
||||
out_channels, |
||||
ksize=5, |
||||
bias=False, |
||||
act=act)) |
||||
stage.append(sppf_layer) |
||||
layers_num += 1 |
||||
|
||||
self.csp_dark_blocks.append(nn.Sequential(*stage)) |
||||
|
||||
self._out_channels = [_out_channels[i] for i in self.return_idx] |
||||
self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx] |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
outputs = [] |
||||
x = self.stem(x) |
||||
for i, layer in enumerate(self.csp_dark_blocks): |
||||
x = layer(x) |
||||
if i + 1 in self.return_idx: |
||||
outputs.append(x) |
||||
return outputs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ |
||||
ShapeSpec( |
||||
channels=c, stride=s) |
||||
for c, s in zip(self._out_channels, self.strides) |
||||
] |
@ -0,0 +1,321 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from __future__ import absolute_import |
||||
from __future__ import division |
||||
from __future__ import print_function |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
import paddle.nn.functional as F |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import Constant |
||||
|
||||
from paddlers.models.ppdet.modeling.ops import get_act_fn |
||||
from paddlers.models.ppdet.core.workspace import register, serializable |
||||
from ..shape_spec import ShapeSpec |
||||
|
||||
__all__ = ['CSPResNet', 'BasicBlock', 'EffectiveSELayer', 'ConvBNLayer'] |
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
filter_size=3, |
||||
stride=1, |
||||
groups=1, |
||||
padding=0, |
||||
act=None): |
||||
super(ConvBNLayer, self).__init__() |
||||
|
||||
self.conv = nn.Conv2D( |
||||
in_channels=ch_in, |
||||
out_channels=ch_out, |
||||
kernel_size=filter_size, |
||||
stride=stride, |
||||
padding=padding, |
||||
groups=groups, |
||||
bias_attr=False) |
||||
|
||||
self.bn = nn.BatchNorm2D( |
||||
ch_out, |
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))) |
||||
self.act = get_act_fn(act) if act is None or isinstance(act, ( |
||||
str, dict)) else act |
||||
|
||||
def forward(self, x): |
||||
x = self.conv(x) |
||||
x = self.bn(x) |
||||
x = self.act(x) |
||||
|
||||
return x |
||||
|
||||
|
||||
class RepVggBlock(nn.Layer): |
||||
def __init__(self, ch_in, ch_out, act='relu', alpha=False): |
||||
super(RepVggBlock, self).__init__() |
||||
self.ch_in = ch_in |
||||
self.ch_out = ch_out |
||||
self.conv1 = ConvBNLayer( |
||||
ch_in, ch_out, 3, stride=1, padding=1, act=None) |
||||
self.conv2 = ConvBNLayer( |
||||
ch_in, ch_out, 1, stride=1, padding=0, act=None) |
||||
self.act = get_act_fn(act) if act is None or isinstance(act, ( |
||||
str, dict)) else act |
||||
if alpha: |
||||
self.alpha = self.create_parameter( |
||||
shape=[1], |
||||
attr=ParamAttr(initializer=Constant(value=1.)), |
||||
dtype="float32") |
||||
else: |
||||
self.alpha = None |
||||
|
||||
def forward(self, x): |
||||
if hasattr(self, 'conv'): |
||||
y = self.conv(x) |
||||
else: |
||||
if self.alpha: |
||||
y = self.conv1(x) + self.alpha * self.conv2(x) |
||||
else: |
||||
y = self.conv1(x) + self.conv2(x) |
||||
y = self.act(y) |
||||
return y |
||||
|
||||
def convert_to_deploy(self): |
||||
if not hasattr(self, 'conv'): |
||||
self.conv = nn.Conv2D( |
||||
in_channels=self.ch_in, |
||||
out_channels=self.ch_out, |
||||
kernel_size=3, |
||||
stride=1, |
||||
padding=1, |
||||
groups=1) |
||||
kernel, bias = self.get_equivalent_kernel_bias() |
||||
self.conv.weight.set_value(kernel) |
||||
self.conv.bias.set_value(bias) |
||||
self.__delattr__('conv1') |
||||
self.__delattr__('conv2') |
||||
|
||||
def get_equivalent_kernel_bias(self): |
||||
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1) |
||||
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2) |
||||
if self.alpha: |
||||
return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor( |
||||
kernel1x1), bias3x3 + self.alpha * bias1x1 |
||||
else: |
||||
return kernel3x3 + self._pad_1x1_to_3x3_tensor( |
||||
kernel1x1), bias3x3 + bias1x1 |
||||
|
||||
def _pad_1x1_to_3x3_tensor(self, kernel1x1): |
||||
if kernel1x1 is None: |
||||
return 0 |
||||
else: |
||||
return nn.functional.pad(kernel1x1, [1, 1, 1, 1]) |
||||
|
||||
def _fuse_bn_tensor(self, branch): |
||||
if branch is None: |
||||
return 0, 0 |
||||
kernel = branch.conv.weight |
||||
running_mean = branch.bn._mean |
||||
running_var = branch.bn._variance |
||||
gamma = branch.bn.weight |
||||
beta = branch.bn.bias |
||||
eps = branch.bn._epsilon |
||||
std = (running_var + eps).sqrt() |
||||
t = (gamma / std).reshape((-1, 1, 1, 1)) |
||||
return kernel * t, beta - running_mean * gamma / std |
||||
|
||||
|
||||
class BasicBlock(nn.Layer): |
||||
def __init__(self, |
||||
ch_in, |
||||
ch_out, |
||||
act='relu', |
||||
shortcut=True, |
||||
use_alpha=False): |
||||
super(BasicBlock, self).__init__() |
||||
assert ch_in == ch_out |
||||
self.conv1 = ConvBNLayer(ch_in, ch_out, 3, stride=1, padding=1, act=act) |
||||
self.conv2 = RepVggBlock(ch_out, ch_out, act=act, alpha=use_alpha) |
||||
self.shortcut = shortcut |
||||
|
||||
def forward(self, x): |
||||
y = self.conv1(x) |
||||
y = self.conv2(y) |
||||
if self.shortcut: |
||||
return paddle.add(x, y) |
||||
else: |
||||
return y |
||||
|
||||
|
||||
class EffectiveSELayer(nn.Layer): |
||||
""" Effective Squeeze-Excitation |
||||
From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 |
||||
""" |
||||
|
||||
def __init__(self, channels, act='hardsigmoid'): |
||||
super(EffectiveSELayer, self).__init__() |
||||
self.fc = nn.Conv2D(channels, channels, kernel_size=1, padding=0) |
||||
self.act = get_act_fn(act) if act is None or isinstance(act, ( |
||||
str, dict)) else act |
||||
|
||||
def forward(self, x): |
||||
x_se = x.mean((2, 3), keepdim=True) |
||||
x_se = self.fc(x_se) |
||||
return x * self.act(x_se) |
||||
|
||||
|
||||
class CSPResStage(nn.Layer): |
||||
def __init__(self, |
||||
block_fn, |
||||
ch_in, |
||||
ch_out, |
||||
n, |
||||
stride, |
||||
act='relu', |
||||
attn='eca', |
||||
use_alpha=False): |
||||
super(CSPResStage, self).__init__() |
||||
|
||||
ch_mid = (ch_in + ch_out) // 2 |
||||
if stride == 2: |
||||
self.conv_down = ConvBNLayer( |
||||
ch_in, ch_mid, 3, stride=2, padding=1, act=act) |
||||
else: |
||||
self.conv_down = None |
||||
self.conv1 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act) |
||||
self.conv2 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act) |
||||
self.blocks = nn.Sequential(*[ |
||||
block_fn( |
||||
ch_mid // 2, |
||||
ch_mid // 2, |
||||
act=act, |
||||
shortcut=True, |
||||
use_alpha=use_alpha) for i in range(n) |
||||
]) |
||||
if attn: |
||||
self.attn = EffectiveSELayer(ch_mid, act='hardsigmoid') |
||||
else: |
||||
self.attn = None |
||||
|
||||
self.conv3 = ConvBNLayer(ch_mid, ch_out, 1, act=act) |
||||
|
||||
def forward(self, x): |
||||
if self.conv_down is not None: |
||||
x = self.conv_down(x) |
||||
y1 = self.conv1(x) |
||||
y2 = self.blocks(self.conv2(x)) |
||||
y = paddle.concat([y1, y2], axis=1) |
||||
if self.attn is not None: |
||||
y = self.attn(y) |
||||
y = self.conv3(y) |
||||
return y |
||||
|
||||
|
||||
@register |
||||
@serializable |
||||
class CSPResNet(nn.Layer): |
||||
__shared__ = ['width_mult', 'depth_mult', 'trt'] |
||||
|
||||
def __init__(self, |
||||
layers=[3, 6, 6, 3], |
||||
channels=[64, 128, 256, 512, 1024], |
||||
act='swish', |
||||
return_idx=[1, 2, 3], |
||||
depth_wise=False, |
||||
use_large_stem=False, |
||||
width_mult=1.0, |
||||
depth_mult=1.0, |
||||
trt=False, |
||||
use_checkpoint=False, |
||||
use_alpha=False, |
||||
**args): |
||||
super(CSPResNet, self).__init__() |
||||
self.use_checkpoint = use_checkpoint |
||||
channels = [max(round(c * width_mult), 1) for c in channels] |
||||
layers = [max(round(l * depth_mult), 1) for l in layers] |
||||
act = get_act_fn( |
||||
act, trt=trt) if act is None or isinstance(act, |
||||
(str, dict)) else act |
||||
|
||||
if use_large_stem: |
||||
self.stem = nn.Sequential( |
||||
('conv1', ConvBNLayer( |
||||
3, channels[0] // 2, 3, stride=2, padding=1, act=act)), |
||||
('conv2', ConvBNLayer( |
||||
channels[0] // 2, |
||||
channels[0] // 2, |
||||
3, |
||||
stride=1, |
||||
padding=1, |
||||
act=act)), ('conv3', ConvBNLayer( |
||||
channels[0] // 2, |
||||
channels[0], |
||||
3, |
||||
stride=1, |
||||
padding=1, |
||||
act=act))) |
||||
else: |
||||
self.stem = nn.Sequential( |
||||
('conv1', ConvBNLayer( |
||||
3, channels[0] // 2, 3, stride=2, padding=1, act=act)), |
||||
('conv2', ConvBNLayer( |
||||
channels[0] // 2, |
||||
channels[0], |
||||
3, |
||||
stride=1, |
||||
padding=1, |
||||
act=act))) |
||||
|
||||
n = len(channels) - 1 |
||||
self.stages = nn.Sequential(*[(str(i), CSPResStage( |
||||
BasicBlock, |
||||
channels[i], |
||||
channels[i + 1], |
||||
layers[i], |
||||
2, |
||||
act=act, |
||||
use_alpha=use_alpha)) for i in range(n)]) |
||||
|
||||
self._out_channels = channels[1:] |
||||
self._out_strides = [4 * 2**i for i in range(n)] |
||||
self.return_idx = return_idx |
||||
if use_checkpoint: |
||||
paddle.seed(0) |
||||
|
||||
def forward(self, inputs): |
||||
x = inputs['image'] |
||||
x = self.stem(x) |
||||
outs = [] |
||||
for idx, stage in enumerate(self.stages): |
||||
if self.use_checkpoint and self.training: |
||||
x = paddle.distributed.fleet.utils.recompute( |
||||
stage, x, **{"preserve_rng_state": True}) |
||||
else: |
||||
x = stage(x) |
||||
if idx in self.return_idx: |
||||
outs.append(x) |
||||
|
||||
return outs |
||||
|
||||
@property |
||||
def out_shape(self): |
||||
return [ |
||||
ShapeSpec( |
||||
channels=self._out_channels[i], stride=self._out_strides[i]) |
||||
for i in self.return_idx |
||||
] |
@ -0,0 +1,266 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
""" |
||||
This code is the paddle implementation of MobileOne block, see: https://arxiv.org/pdf/2206.04040.pdf. |
||||
Some codes are based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py |
||||
Ths copyright of microsoft/Swin-Transformer is as follows: |
||||
MIT License [see LICENSE for details] |
||||
""" |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
from paddle import ParamAttr |
||||
from paddle.regularizer import L2Decay |
||||
from paddle.nn.initializer import Normal, Constant |
||||
|
||||
from paddlers.models.ppdet.modeling.ops import get_act_fn |
||||
from paddlers.models.ppdet.modeling.layers import ConvNormLayer |
||||
|
||||
|
||||
class MobileOneBlock(nn.Layer): |
||||
def __init__( |
||||
self, |
||||
ch_in, |
||||
ch_out, |
||||
stride, |
||||
kernel_size, |
||||
conv_num=1, |
||||
norm_type='bn', |
||||
norm_decay=0., |
||||
norm_groups=32, |
||||
bias_on=False, |
||||
lr_scale=1., |
||||
freeze_norm=False, |
||||
initializer=Normal( |
||||
mean=0., std=0.01), |
||||
skip_quant=False, |
||||
act='relu', ): |
||||
super(MobileOneBlock, self).__init__() |
||||
|
||||
self.ch_in = ch_in |
||||
self.ch_out = ch_out |
||||
self.kernel_size = kernel_size |
||||
self.stride = stride |
||||
self.padding = (kernel_size - 1) // 2 |
||||
self.k = conv_num |
||||
|
||||
self.depth_conv = nn.LayerList() |
||||
self.point_conv = nn.LayerList() |
||||
for _ in range(self.k): |
||||
self.depth_conv.append( |
||||
ConvNormLayer( |
||||
ch_in, |
||||
ch_in, |
||||
kernel_size, |
||||
stride=stride, |
||||
groups=ch_in, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
norm_groups=norm_groups, |
||||
bias_on=bias_on, |
||||
lr_scale=lr_scale, |
||||
freeze_norm=freeze_norm, |
||||
initializer=initializer, |
||||
skip_quant=skip_quant)) |
||||
self.point_conv.append( |
||||
ConvNormLayer( |
||||
ch_in, |
||||
ch_out, |
||||
1, |
||||
stride=1, |
||||
groups=1, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
norm_groups=norm_groups, |
||||
bias_on=bias_on, |
||||
lr_scale=lr_scale, |
||||
freeze_norm=freeze_norm, |
||||
initializer=initializer, |
||||
skip_quant=skip_quant)) |
||||
self.rbr_1x1 = ConvNormLayer( |
||||
ch_in, |
||||
ch_in, |
||||
1, |
||||
stride=self.stride, |
||||
groups=ch_in, |
||||
norm_type=norm_type, |
||||
norm_decay=norm_decay, |
||||
norm_groups=norm_groups, |
||||
bias_on=bias_on, |
||||
lr_scale=lr_scale, |
||||
freeze_norm=freeze_norm, |
||||
initializer=initializer, |
||||
skip_quant=skip_quant) |
||||
self.rbr_identity_st1 = nn.BatchNorm2D( |
||||
num_features=ch_in, |
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr(regularizer=L2Decay( |
||||
0.0))) if ch_in == ch_out and self.stride == 1 else None |
||||
self.rbr_identity_st2 = nn.BatchNorm2D( |
||||
num_features=ch_out, |
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), |
||||
bias_attr=ParamAttr(regularizer=L2Decay( |
||||
0.0))) if ch_in == ch_out and self.stride == 1 else None |
||||
self.act = get_act_fn(act) if act is None or isinstance(act, ( |
||||
str, dict)) else act |
||||
|
||||
def forward(self, x): |
||||
if hasattr(self, "conv1") and hasattr(self, "conv2"): |
||||
y = self.act(self.conv2(self.act(self.conv1(x)))) |
||||
else: |
||||
if self.rbr_identity_st1 is None: |
||||
id_out_st1 = 0 |
||||
else: |
||||
id_out_st1 = self.rbr_identity_st1(x) |
||||
|
||||
x1_1 = 0 |
||||
for i in range(self.k): |
||||
x1_1 += self.depth_conv[i](x) |
||||
|
||||
x1_2 = self.rbr_1x1(x) |
||||
x1 = self.act(x1_1 + x1_2 + id_out_st1) |
||||
|
||||
if self.rbr_identity_st2 is None: |
||||
id_out_st2 = 0 |
||||
else: |
||||
id_out_st2 = self.rbr_identity_st2(x1) |
||||
|
||||
x2_1 = 0 |
||||
for i in range(self.k): |
||||
x2_1 += self.point_conv[i](x1) |
||||
y = self.act(x2_1 + id_out_st2) |
||||
|
||||
return y |
||||
|
||||
def convert_to_deploy(self): |
||||
if not hasattr(self, 'conv1'): |
||||
self.conv1 = nn.Conv2D( |
||||
in_channels=self.ch_in, |
||||
out_channels=self.ch_in, |
||||
kernel_size=self.kernel_size, |
||||
stride=self.stride, |
||||
padding=self.padding, |
||||
groups=self.ch_in, |
||||
bias_attr=ParamAttr( |
||||
initializer=Constant(value=0.), learning_rate=1.)) |
||||
if not hasattr(self, 'conv2'): |
||||
self.conv2 = nn.Conv2D( |
||||
in_channels=self.ch_in, |
||||
out_channels=self.ch_out, |
||||
kernel_size=1, |
||||
stride=1, |
||||
padding='SAME', |
||||
groups=1, |
||||
bias_attr=ParamAttr( |
||||
initializer=Constant(value=0.), learning_rate=1.)) |
||||
|
||||
conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias( |
||||
) |
||||
self.conv1.weight.set_value(conv1_kernel) |
||||
self.conv1.bias.set_value(conv1_bias) |
||||
self.conv2.weight.set_value(conv2_kernel) |
||||
self.conv2.bias.set_value(conv2_bias) |
||||
self.__delattr__('depth_conv') |
||||
self.__delattr__('point_conv') |
||||
self.__delattr__('rbr_1x1') |
||||
if hasattr(self, 'rbr_identity_st1'): |
||||
self.__delattr__('rbr_identity_st1') |
||||
if hasattr(self, 'rbr_identity_st2'): |
||||
self.__delattr__('rbr_identity_st2') |
||||
|
||||
def get_equivalent_kernel_bias(self): |
||||
st1_kernel3x3, st1_bias3x3 = self._fuse_bn_tensor(self.depth_conv) |
||||
st1_kernel1x1, st1_bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) |
||||
st1_kernelid, st1_biasid = self._fuse_bn_tensor( |
||||
self.rbr_identity_st1, kernel_size=self.kernel_size) |
||||
|
||||
st2_kernel1x1, st2_bias1x1 = self._fuse_bn_tensor(self.point_conv) |
||||
st2_kernelid, st2_biasid = self._fuse_bn_tensor( |
||||
self.rbr_identity_st2, kernel_size=1) |
||||
|
||||
conv1_kernel = st1_kernel3x3 + self._pad_1x1_to_3x3_tensor( |
||||
st1_kernel1x1) + st1_kernelid |
||||
|
||||
conv1_bias = st1_bias3x3 + st1_bias1x1 + st1_biasid |
||||
|
||||
conv2_kernel = st2_kernel1x1 + st2_kernelid |
||||
conv2_bias = st2_bias1x1 + st2_biasid |
||||
|
||||
return conv1_kernel, conv1_bias, conv2_kernel, conv2_bias |
||||
|
||||
def _pad_1x1_to_3x3_tensor(self, kernel1x1): |
||||
if kernel1x1 is None: |
||||
return 0 |
||||
else: |
||||
padding_size = (self.kernel_size - 1) // 2 |
||||
return nn.functional.pad( |
||||
kernel1x1, |
||||
[padding_size, padding_size, padding_size, padding_size]) |
||||
|
||||
def _fuse_bn_tensor(self, branch, kernel_size=3): |
||||
if branch is None: |
||||
return 0, 0 |
||||
|
||||
if isinstance(branch, nn.LayerList): |
||||
fused_kernels = [] |
||||
fused_bias = [] |
||||
for block in branch: |
||||
kernel = block.conv.weight |
||||
running_mean = block.norm._mean |
||||
running_var = block.norm._variance |
||||
gamma = block.norm.weight |
||||
beta = block.norm.bias |
||||
eps = block.norm._epsilon |
||||
|
||||
std = (running_var + eps).sqrt() |
||||
t = (gamma / std).reshape((-1, 1, 1, 1)) |
||||
|
||||
fused_kernels.append(kernel * t) |
||||
fused_bias.append(beta - running_mean * gamma / std) |
||||
|
||||
return sum(fused_kernels), sum(fused_bias) |
||||
|
||||
elif isinstance(branch, ConvNormLayer): |
||||
kernel = branch.conv.weight |
||||
running_mean = branch.norm._mean |
||||
running_var = branch.norm._variance |
||||
gamma = branch.norm.weight |
||||
beta = branch.norm.bias |
||||
eps = branch.norm._epsilon |
||||
else: |
||||
assert isinstance(branch, nn.BatchNorm2D) |
||||
input_dim = self.ch_in if kernel_size == 1 else 1 |
||||
kernel_value = paddle.zeros( |
||||
shape=[self.ch_in, input_dim, kernel_size, kernel_size], |
||||
dtype='float32') |
||||
if kernel_size > 1: |
||||
for i in range(self.ch_in): |
||||
kernel_value[i, i % input_dim, (kernel_size - 1) // 2, ( |
||||
kernel_size - 1) // 2] = 1 |
||||
elif kernel_size == 1: |
||||
for i in range(self.ch_in): |
||||
kernel_value[i, i % input_dim, 0, 0] = 1 |
||||
else: |
||||
raise ValueError("Invalid kernel size recieved!") |
||||
kernel = paddle.to_tensor(kernel_value, place=branch.weight.place) |
||||
running_mean = branch._mean |
||||
running_var = branch._variance |
||||
gamma = branch.weight |
||||
beta = branch.bias |
||||
eps = branch._epsilon |
||||
|
||||
std = (running_var + eps).sqrt() |
||||
t = (gamma / std).reshape((-1, 1, 1, 1)) |
||||
|
||||
return kernel * t, beta - running_mean * gamma / std |
@ -0,0 +1,74 @@ |
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import paddle |
||||
import paddle.nn as nn |
||||
|
||||
from paddle.nn.initializer import TruncatedNormal, Constant, Assign |
||||
|
||||
# Common initializations |
||||
ones_ = Constant(value=1.) |
||||
zeros_ = Constant(value=0.) |
||||
trunc_normal_ = TruncatedNormal(std=.02) |
||||
|
||||
|
||||
# Common Layers |
||||
def drop_path(x, drop_prob=0., training=False): |
||||
""" |
||||
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). |
||||
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... |
||||
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... |
||||
""" |
||||
if drop_prob == 0. or not training: |
||||
return x |
||||
keep_prob = paddle.to_tensor(1 - drop_prob) |
||||
shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) |
||||
random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) |
||||
random_tensor = paddle.floor(random_tensor) # binarize |
||||
output = x.divide(keep_prob) * random_tensor |
||||
return output |
||||
|
||||
|
||||
class DropPath(nn.Layer): |
||||
def __init__(self, drop_prob=None): |
||||
super(DropPath, self).__init__() |
||||
self.drop_prob = drop_prob |
||||
|
||||
def forward(self, x): |
||||
return drop_path(x, self.drop_prob, self.training) |
||||
|
||||
|
||||
class Identity(nn.Layer): |
||||
def __init__(self): |
||||
super(Identity, self).__init__() |
||||
|
||||
def forward(self, input): |
||||
return input |
||||
|
||||
|
||||
# common funcs |
||||
|
||||
|
||||
def to_2tuple(x): |
||||
if isinstance(x, (list, tuple)): |
||||
return x |
||||
return tuple([x] * 2) |
||||
|
||||
|
||||
def add_parameter(layer, datas, name=None): |
||||
parameter = layer.create_parameter( |
||||
shape=(datas.shape), default_initializer=Assign(datas)) |
||||
if name: |
||||
layer.add_parameter(name, parameter) |
||||
return parameter |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue