Merge pull request #11 from juncaipeng/det3

Refine the pipeline of detection
own
Liu Yi 3 years ago committed by GitHub
commit a86e32f2fc
  1. 2
      paddlers/tasks/__init__.py
  2. 15
      paddlers/tasks/det/__init__.py
  3. 4
      paddlers/tasks/object_detector.py
  4. 56
      paddlers/transforms/operators.py
  5. 10
      tutorials/train/object_detection/faster_rcnn_sar_ship.py
  6. 54
      tutorials/train/object_detection/ppyolo.py
  7. 0
      tutorials/train/object_detection/readme.md

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import det
from .object_detector import *
from .segmenter import *
from .changedetector import *
from .classifier import *

@ -1,15 +0,0 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .detector import *

@ -29,8 +29,8 @@ from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH
from paddlers.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, \
_BatchPadding, _Gt2YoloTarget
from paddlers.transforms import arrange_transforms
from ..base import BaseModel
from ..utils.det_metrics import VOCMetric, COCOMetric
from .base import BaseModel
from .utils.det_metrics import VOCMetric, COCOMetric
from paddlers.models.ppdet.optimizer import ModelEMA
from paddlers.utils.checkpoint import det_pretrain_weights_dict

@ -120,7 +120,9 @@ class ImgDecoder(Transform):
if dataset == None:
raise Exception('Can not open', img_path)
im_data = dataset.ReadAsArray()
if im_data.ndim == 3:
if im_data.ndim == 2:
im_data = im_data[:, :, np.newaxis]
elif im_data.ndim == 3:
im_data = im_data.transpose((1, 2, 0))
return im_data
elif img_format in ['jpeg', 'bmp', 'png', 'jpg']:
@ -277,7 +279,10 @@ class Resize(Transform):
self.keep_ratio = keep_ratio
def apply_im(self, image, interp, target_size):
flag = image.shape[2] == 1
image = cv2.resize(image, target_size, interpolation=interp)
if flag:
image = image[:, :, np.newaxis]
return image
def apply_mask(self, mask, target_size):
@ -346,7 +351,6 @@ class Resize(Transform):
sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32)
return sample
@ -1001,8 +1005,8 @@ class Padding(Transform):
def apply_im(self, image, offsets, target_size):
x, y = offsets
im_h, im_w, channel = image.shape[:3]
h, w = target_size
im_h, im_w, channel = image.shape[:3]
canvas = np.ones((h, w, channel), dtype=np.float32)
canvas *= np.array(self.im_padding_value, dtype=np.float32)
canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
@ -1204,7 +1208,6 @@ class RandomDistort(Transform):
if np.random.uniform(0., 1.) < self.hue_prob:
return image
image = image.astype(np.float32)
# it works, but result differ from HSV version
delta = np.random.uniform(low, high)
u = np.cos(delta * np.pi)
@ -1215,22 +1218,45 @@ class RandomDistort(Transform):
ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
[1.0, -1.107, 1.705]])
t = np.dot(np.dot(ityiq, bt), tyiq).T
image = np.dot(image, t)
return image
res_list = []
channel = image.shape[2]
for i in range(channel // 3):
sub_img = image[:, :, 3*i : 3*(i+1)]
sub_img = sub_img.astype(np.float32)
sub_img = np.dot(image, t)
res_list.append(sub_img)
if channel % 3 != 0:
i = channel % 3
res_list.append(image[:, :, -i:])
return np.concatenate(res_list, axis=2)
def apply_saturation(self, image):
low, high = self.saturation_range
delta = np.random.uniform(low, high)
if np.random.uniform(0., 1.) < self.saturation_prob:
return image
delta = np.random.uniform(low, high)
image = image.astype(np.float32)
# it works, but result differ from HSV version
gray = image * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
image *= delta
image += gray
return image
res_list = []
channel = image.shape[2]
for i in range(channel // 3):
sub_img = image[:, :, 3*i : 3*(i+1)]
sub_img = sub_img.astype(np.float32)
# it works, but result differ from HSV version
gray = sub_img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
sub_img *= delta
sub_img += gray
res_list.append(sub_img)
if channel % 3 != 0:
i = channel % 3
res_list.append(image[:, :, -i:])
return np.concatenate(res_list, axis=2)
def apply_contrast(self, image):
low, high = self.contrast_range

@ -11,7 +11,7 @@ if not os.path.exists(data_dir):
# define transforms
train_transforms = T.Compose([
T.RandomDistort(),
T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]),
T.RandomExpand(),
T.RandomCrop(),
T.RandomHorizontalFlip(),
T.BatchRandomResize(
@ -21,9 +21,9 @@ train_transforms = T.Compose([
])
eval_transforms = T.Compose([
T.Resize(
target_size=608, interp='CUBIC'), T.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
T.Resize(target_size=608, interp='CUBIC'),
T.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# define dataset
@ -46,7 +46,7 @@ eval_dataset = pdrs.datasets.VOCDetection(
# define models
num_classes = len(train_dataset.labels)
model = pdrs.tasks.det.FasterRCNN(num_classes=num_classes)
model = pdrs.tasks.FasterRCNN(num_classes=num_classes)
# train
model.train(

@ -1,54 +0,0 @@
import sys
sys.path.append("/ssd2/pengjuncai/PaddleRS")
import paddlers as pdrs
from paddlers import transforms as T
train_transforms = T.Compose([
T.MixupImage(mixup_epoch=-1), T.RandomDistort(),
T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]), T.RandomCrop(),
T.RandomHorizontalFlip(), T.BatchRandomResize(
target_sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608],
interp='RANDOM'), T.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transforms = T.Compose([
T.Resize(
target_size=608, interp='CUBIC'), T.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = pdrs.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdrs.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms,
shuffle=False)
num_classes = len(train_dataset.labels)
model = pdrs.tasks.det.PPYOLO(num_classes=num_classes, backbone='ResNet50_vd_dcn')
model.train(
num_epochs=200,
train_dataset=train_dataset,
train_batch_size=8,
eval_dataset=eval_dataset,
pretrain_weights='COCO',
learning_rate=0.005 / 12,
warmup_steps=500,
warmup_start_lr=0.0,
save_interval_epochs=5,
lr_decay_epochs=[85, 135],
save_dir='output/ppyolo_r50vd_dcn',
use_vdl=True)
Loading…
Cancel
Save