Improve tests coverage and speed (#4340)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/4329/head^2
Glenn Jocher 1 year ago committed by GitHub
parent d704507217
commit 9f6d48d3cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 28
      docs/models/fast-sam.md
  2. 2
      tests/conftest.py
  3. 98
      tests/test_cli.py
  4. 8
      tests/test_engine.py
  5. 126
      tests/test_python.py
  6. 27
      ultralytics/models/fastsam/prompt.py
  7. 225
      ultralytics/models/fastsam/val.py
  8. 2
      ultralytics/models/rtdetr/model.py
  9. 8
      ultralytics/nn/modules/head.py
  10. 4
      ultralytics/nn/modules/transformer.py

@ -47,22 +47,17 @@ To perform object detection on an image, use the `predict` method as shown below
from ultralytics import FastSAM
from ultralytics.models.fastsam import FastSAMPrompt
# Define image path and inference device
IMAGE_PATH = 'ultralytics/assets/bus.jpg'
DEVICE = 'cpu'
# Define an inference source
source = 'path/to/bus.jpg'
# Create a FastSAM model
model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt
# Run inference on an image
everything_results = model(IMAGE_PATH,
device=DEVICE,
retina_masks=True,
imgsz=1024,
conf=0.4,
iou=0.9)
everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE)
# Prepare a Prompt Process object
prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
# Everything prompt
ann = prompt_process.everything_prompt()
@ -80,6 +75,12 @@ To perform object detection on an image, use the `predict` method as shown below
prompt_process.plot(annotations=ann, output='./')
```
=== "CLI"
```bash
# Load a FastSAM model and segment everything with it
yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640
```
This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image.
#### Val Usage
@ -89,7 +90,6 @@ Validation of the model on a dataset can be done as follows:
!!! example ""
=== "Python"
```python
from ultralytics import FastSAM
@ -100,6 +100,12 @@ Validation of the model on a dataset can be done as follows:
results = model.val(data='coco8-seg.yaml')
```
=== "CLI"
```bash
# Load a FastSAM model and validate it on the COCO8 example dataset at image size 640
yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640
```
Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0.
### FastSAM official Usage

@ -1,3 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import pytest

@ -8,12 +8,16 @@ import pytest
from ultralytics.utils import ONLINE, ROOT, SETTINGS
WEIGHT_DIR = Path(SETTINGS['weights_dir'])
TASK_ARGS = [ # (task, model, data)
('detect', 'yolov8n', 'coco8.yaml'), ('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
('classify', 'yolov8n-cls', 'imagenet10'), ('pose', 'yolov8n-pose', 'coco8-pose.yaml')]
EXPORT_ARGS = [ # (model, format)
('yolov8n', 'torchscript'), ('yolov8n-seg', 'torchscript'), ('yolov8n-cls', 'torchscript'),
('yolov8n-pose', 'torchscript')]
TASK_ARGS = [
('detect', 'yolov8n', 'coco8.yaml'),
('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
('classify', 'yolov8n-cls', 'imagenet10'),
('pose', 'yolov8n-pose', 'coco8-pose.yaml'), ] # (task, model, data)
EXPORT_ARGS = [
('yolov8n', 'torchscript'),
('yolov8n-seg', 'torchscript'),
('yolov8n-cls', 'torchscript'),
('yolov8n-pose', 'torchscript'), ] # (model, format)
def run(cmd):
@ -22,9 +26,12 @@ def run(cmd):
def test_special_modes():
run('yolo checks')
run('yolo settings')
run('yolo help')
run('yolo checks')
run('yolo version')
run('yolo settings reset')
run('yolo copy-cfg')
run('yolo cfg')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
@ -34,21 +41,82 @@ def test_train(task, model, data):
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_val(task, model, data):
run(f'yolo val {task} model={model}.pt data={data} imgsz=32')
run(f'yolo val {task} model={WEIGHT_DIR / model}.pt data={data} imgsz=32')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_predict(task, model, data):
run(f"yolo predict model={model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
if ONLINE:
run(f'yolo predict model={model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32')
run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_portrait_min.mov imgsz=32')
run(f"yolo predict model={WEIGHT_DIR / model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_predict_online(task, model, data):
mode = 'track' if task in ('detect', 'segment', 'pose') else 'predict' # mode for video inference
run(f'yolo predict model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
run(f'yolo {mode} model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32'
)
# Run Python YouTube tracking because CLI is broken. TODO: fix CLI YouTube
# run(f'yolo {mode} model={model}.pt source=https://youtu.be/G17sBkb38XQ imgsz=32 tracker=bytetrack.yaml')
@pytest.mark.parametrize('model,format', EXPORT_ARGS)
def test_export(model, format):
run(f'yolo export model={model}.pt format={format}')
run(f'yolo export model={WEIGHT_DIR / model}.pt format={format} imgsz=32')
# Test SAM, RTDETR Models
def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
# Warning: MUST use imgsz=640
run(f'yolo train {task} model={model} data={data} imgsz=640 epochs=1 cache=disk')
run(f'yolo val {task} model={model} data={data} imgsz=640')
run(f"yolo predict {task} model={model} source={ROOT / 'assets/bus.jpg'} imgsz=640 save save_crop save_txt")
def test_fastsam(task='segment', model='FastSAM-s.pt', data='coco8-seg.yaml'):
source = ROOT / 'assets/bus.jpg'
run(f'yolo segment val {task} model={model} data={data} imgsz=32')
run(f'yolo segment predict model={model} source={source} imgsz=32 save save_crop save_txt')
from ultralytics import FastSAM
from ultralytics.models.fastsam import FastSAMPrompt
# Create a FastSAM model
model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt
# Run inference on an image
everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
# Everything prompt
prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
ann = prompt_process.everything_prompt()
# Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
# Text prompt
ann = prompt_process.text_prompt(text='a photo of a dog')
# Point prompt
# points default [[0,0]] [[x1,y1],[x2,y2]]
# point_label default [0] [1,0] 0:background, 1:foreground
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
prompt_process.plot(annotations=ann, output='./')
def test_mobilesam():
from ultralytics import SAM
# Load the model
model = SAM('mobile_sam.pt')
# Predict a segment based on a point prompt
model.predict(ROOT / 'assets/zidane.jpg', points=[900, 370], labels=[1])
# Predict a segment based on a box prompt
model.predict(ROOT / 'assets/zidane.jpg', bboxes=[439, 437, 524, 709])
# Slow Tests

@ -10,13 +10,13 @@ from ultralytics.utils import DEFAULT_CFG, ROOT, SETTINGS
CFG_DET = 'yolov8n.yaml'
CFG_SEG = 'yolov8n-seg.yaml'
CFG_CLS = 'squeezenet1_0'
CFG_CLS = 'yolov8n-cls.yaml' # or 'squeezenet1_0'
CFG = get_cfg(DEFAULT_CFG)
MODEL = Path(SETTINGS['weights_dir']) / 'yolov8n'
SOURCE = ROOT / 'assets'
def test_func(model=None):
def test_func(*args): # noqa
print('callback test passed')
@ -31,6 +31,7 @@ def test_export():
def test_detect():
overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8.yaml'
CFG.imgsz = 32
# Trainer
trainer = detect.DetectionTrainer(overrides=overrides)
@ -65,6 +66,7 @@ def test_detect():
def test_segment():
overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8-seg.yaml'
CFG.imgsz = 32
# YOLO(CFG_SEG).train(**overrides) # works
# trainer
@ -99,7 +101,7 @@ def test_segment():
def test_classify():
overrides = {'data': 'imagenet10', 'model': 'yolov8n-cls.yaml', 'imgsz': 32, 'epochs': 1, 'save': False}
overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'imagenet10'
CFG.imgsz = 32
# YOLO(CFG_SEG).train(**overrides) # works

@ -10,9 +10,11 @@ from torchvision.transforms import ToTensor
from ultralytics import RTDETR, YOLO
from ultralytics.data.build import load_inference_source
from ultralytics.utils import LINUX, ONLINE, ROOT, SETTINGS
from ultralytics.utils import LINUX, MACOS, ONLINE, ROOT, SETTINGS
from ultralytics.utils.torch_utils import TORCH_1_9
MODEL = Path(SETTINGS['weights_dir']) / 'path with spaces' / 'yolov8n.pt' # test spaces in path
WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt' # test spaces in path
CFG = 'yolov8n.yaml'
SOURCE = ROOT / 'assets/bus.jpg'
SOURCE_GREYSCALE = Path(f'{SOURCE.parent / SOURCE.stem}_greyscale.jpg')
@ -26,39 +28,35 @@ im.convert('RGBA').save(SOURCE_RGBA) # 4-ch PNG with alpha
def test_model_forward():
model = YOLO(CFG)
model(SOURCE)
model(SOURCE, imgsz=32)
def test_model_info():
model = YOLO(CFG)
model.info()
model = YOLO(MODEL)
model.info(verbose=True)
def test_model_fuse():
model = YOLO(CFG)
model.fuse()
model = YOLO(MODEL)
model.fuse()
def test_predict_dir():
model = YOLO(MODEL)
model(source=ROOT / 'assets')
model(source=ROOT / 'assets', imgsz=32)
def test_predict_img():
model = YOLO(MODEL)
seg_model = YOLO('yolov8n-seg.pt')
cls_model = YOLO('yolov8n-cls.pt')
pose_model = YOLO('yolov8n-pose.pt')
seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
pose_model = YOLO(WEIGHTS_DIR / 'yolov8n-pose.pt')
im = cv2.imread(str(SOURCE))
assert len(model(source=Image.open(SOURCE), save=True, verbose=True)) == 1 # PIL
assert len(model(source=im, save=True, save_txt=True)) == 1 # ndarray
assert len(model(source=[im, im], save=True, save_txt=True)) == 2 # batch
assert len(list(model(source=[im, im], save=True, stream=True))) == 2 # stream
assert len(model(torch.zeros(320, 640, 3).numpy())) == 1 # tensor to numpy
assert len(model(source=Image.open(SOURCE), save=True, verbose=True, imgsz=32)) == 1 # PIL
assert len(model(source=im, save=True, save_txt=True, imgsz=32)) == 1 # ndarray
assert len(model(source=[im, im], save=True, save_txt=True, imgsz=32)) == 2 # batch
assert len(list(model(source=[im, im], save=True, stream=True, imgsz=32))) == 2 # stream
assert len(model(torch.zeros(320, 640, 3).numpy(), imgsz=32)) == 1 # tensor to numpy
batch = [
str(SOURCE), # filename
Path(SOURCE), # Path
@ -66,20 +64,20 @@ def test_predict_img():
cv2.imread(str(SOURCE)), # OpenCV
Image.open(SOURCE), # PIL
np.zeros((320, 640, 3))] # numpy
assert len(model(batch, visualize=True)) == len(batch) # multiple sources in a batch
assert len(model(batch, imgsz=32)) == len(batch) # multiple sources in a batch
# Test tensor inference
im = cv2.imread(str(SOURCE)) # OpenCV
t = cv2.resize(im, (32, 32))
t = ToTensor()(t)
t = torch.stack([t, t, t, t])
results = model(t, visualize=True)
results = model(t, imgsz=32)
assert len(results) == t.shape[0]
results = seg_model(t, visualize=True)
results = seg_model(t, imgsz=32)
assert len(results) == t.shape[0]
results = cls_model(t, visualize=True)
results = cls_model(t, imgsz=32)
assert len(results) == t.shape[0]
results = pose_model(t, visualize=True)
results = pose_model(t, imgsz=32)
assert len(results) == t.shape[0]
@ -87,16 +85,17 @@ def test_predict_grey_and_4ch():
model = YOLO(MODEL)
for f in SOURCE_RGBA, SOURCE_GREYSCALE:
for source in Image.open(f), cv2.imread(str(f)), f:
model(source, save=True, verbose=True)
model(source, save=True, verbose=True, imgsz=32)
def test_val():
def test_track_stream():
# Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
model = YOLO(MODEL)
model.val(data='coco8.yaml', imgsz=32)
model.track('https://youtu.be/G17sBkb38XQ', imgsz=32, tracker='bytetrack.yaml')
def test_val_scratch():
model = YOLO(CFG)
def test_val():
model = YOLO(MODEL)
model.val(data='coco8.yaml', imgsz=32)
@ -109,7 +108,7 @@ def test_amp():
def test_train_scratch():
model = YOLO(CFG)
model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk') # test disk caching
model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk', batch=-1) # test disk caching with AutoBatch
model(SOURCE)
@ -125,12 +124,6 @@ def test_export_torchscript():
YOLO(f)(SOURCE) # exported model inference
def test_export_torchscript_scratch():
model = YOLO(CFG)
f = model.export(format='torchscript')
YOLO(f)(SOURCE) # exported model inference
def test_export_onnx():
model = YOLO(MODEL)
f = model.export(format='onnx')
@ -138,14 +131,15 @@ def test_export_onnx():
def test_export_openvino():
model = YOLO(MODEL)
f = model.export(format='openvino')
YOLO(f)(SOURCE) # exported model inference
if not MACOS:
model = YOLO(MODEL)
f = model.export(format='openvino')
YOLO(f)(SOURCE) # exported model inference
def test_export_coreml(): # sourcery skip: move-assign
model = YOLO(MODEL)
model.export(format='coreml')
model.export(format='coreml', nms=True)
# if MACOS:
# YOLO(f)(SOURCE) # model prediction only supported on macOS
@ -174,9 +168,10 @@ def test_export_paddle(enabled=False):
def test_all_model_yamls():
for m in list((ROOT / 'models').rglob('yolo*.yaml')):
if m.name == 'yolov8-rtdetr.yaml': # except the rtdetr model
RTDETR(m.name)
for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
if 'rtdetr' in m.name:
if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
RTDETR(m.name)
else:
YOLO(m.name)
@ -190,10 +185,9 @@ def test_workflow():
def test_predict_callback_and_setup():
# test callback addition for prediction
# Test callback addition for prediction
def on_predict_batch_end(predictor): # results -> List[batch_size]
path, im0s, _, _ = predictor.batch
# print('on_predict_batch_end', im0s[0].shape)
im0s = im0s if isinstance(im0s, list) else [im0s]
bs = [predictor.dataset.bs for _ in range(len(path))]
predictor.results = zip(predictor.results, im0s, bs)
@ -204,42 +198,26 @@ def test_predict_callback_and_setup():
dataset = load_inference_source(source=SOURCE)
bs = dataset.bs # noqa access predictor properties
results = model.predict(dataset, stream=True) # source already setup
for _, (result, im0, bs) in enumerate(results):
for r, im0, bs in results:
print('test_callback', im0.shape)
print('test_callback', bs)
boxes = result.boxes # Boxes object for bbox outputs
boxes = r.boxes # Boxes object for bbox outputs
print(boxes)
def _test_results_api(res):
# General apis except plot
res = res.cpu().numpy()
# res = res.cuda()
res = res.to(device='cpu', dtype=torch.float32)
res.save_txt('label.txt', save_conf=False)
res.save_txt('label.txt', save_conf=True)
res.save_crop('crops/')
res.tojson(normalize=False)
res.tojson(normalize=True)
res.plot(pil=True)
res.plot(conf=True, boxes=False)
res.plot()
print(res)
print(res.path)
for k in res.keys:
print(getattr(res, k))
def test_results():
for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt']:
model = YOLO(m)
res = model([SOURCE, SOURCE])
_test_results_api(res[0])
def test_track():
im = cv2.imread(str(SOURCE))
for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt']:
for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
model = YOLO(m)
res = model.track(source=im)
_test_results_api(res[0])
results = model([SOURCE, SOURCE])
for r in results:
r = r.cpu().numpy()
r = r.to(device='cpu', dtype=torch.float32)
r.save_txt(txt_file='label.txt', save_conf=True)
r.save_crop(save_dir='crops/')
r.tojson(normalize=True)
r.plot(pil=True)
r.plot(conf=True, boxes=True)
print(r)
print(r.path)
for k in r.keys:
print(getattr(r, k))

@ -1,6 +1,7 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import os
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
@ -8,6 +9,8 @@ import numpy as np
import torch
from PIL import Image
from ultralytics.utils import LOGGER
class FastSAMPrompt:
@ -15,8 +18,8 @@ class FastSAMPrompt:
# self.img_path = img_path
self.device = device
self.results = results
self.img_path = img_path
self.ori_img = cv2.imread(img_path)
self.img_path = str(img_path)
self.ori_img = cv2.imread(self.img_path)
# Import and assign clip
try:
@ -111,7 +114,7 @@ class FastSAMPrompt:
original_w = image.shape[1]
# for macOS only
# plt.switch_backend('TkAgg')
plt.figure(figsize=(original_w / 100, original_h / 100))
fig = plt.figure(figsize=(original_w / 100, original_h / 100))
# Add subplot with no margin.
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
plt.margins(0, 0)
@ -174,21 +177,11 @@ class FastSAMPrompt:
contour_mask = temp / 255 * color.reshape(1, 1, -1)
plt.imshow(contour_mask)
save_path = output
if not os.path.exists(save_path):
os.makedirs(save_path)
save_path = Path(output) / result_name
save_path.parent.mkdir(exist_ok=True, parents=True)
plt.axis('off')
fig = plt.gcf()
plt.draw()
try:
buf = fig.canvas.tostring_rgb()
except AttributeError:
fig.canvas.draw()
buf = fig.canvas.tostring_rgb()
cols, rows = fig.canvas.get_width_height()
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
cv2.imwrite(os.path.join(save_path, result_name), cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
fig.savefig(save_path)
LOGGER.info(f'Saved to {save_path.absolute()}')
# CPU post process
def fast_show_mask(

@ -1,231 +1,14 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from multiprocessing.pool import ThreadPool
from pathlib import Path
from ultralytics.models.yolo.segment import SegmentationValidator
from ultralytics.utils.metrics import SegmentMetrics
import numpy as np
import torch
import torch.nn.functional as F
from ultralytics.models.yolo.detect import DetectionValidator
from ultralytics.utils import LOGGER, NUM_THREADS, ops
from ultralytics.utils.checks import check_requirements
from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou
from ultralytics.utils.plotting import output_to_target, plot_images
class FastSAMValidator(DetectionValidator):
class FastSAMValidator(SegmentationValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
"""Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics."""
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
self.args.task = 'segment'
self.args.plots = False # disable ConfusionMatrix and other plots to avoid errors
self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
def preprocess(self, batch):
"""Preprocesses batch by converting masks to float and sending to device."""
batch = super().preprocess(batch)
batch['masks'] = batch['masks'].to(self.device).float()
return batch
def init_metrics(self, model):
"""Initialize metrics and select mask processing function based on save_json flag."""
super().init_metrics(model)
self.plot_masks = []
if self.args.save_json:
check_requirements('pycocotools>=2.0.6')
self.process = ops.process_mask_upsample # more accurate
else:
self.process = ops.process_mask # faster
def get_desc(self):
"""Return a formatted description of evaluation metrics."""
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P',
'R', 'mAP50', 'mAP50-95)')
def postprocess(self, preds):
"""Post-processes YOLO predictions and returns output detections with proto."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det,
nc=self.nc)
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
return p, proto
def update_metrics(self, preds, batch):
"""Metrics."""
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
idx = batch['batch_idx'] == si
cls = batch['cls'][idx]
bbox = batch['bboxes'][idx]
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
shape = batch['ori_shape'][si]
correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
self.seen += 1
if npr == 0:
if nl:
self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
(2, 0), device=self.device), cls.squeeze(-1)))
if self.args.plots:
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
continue
# Masks
midx = [si] if self.args.overlap_mask else idx
gt_masks = batch['masks'][midx]
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:])
# Predictions
if self.args.single_cls:
pred[:, 5] = 0
predn = pred.clone()
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
ratio_pad=batch['ratio_pad'][si]) # native-space pred
# Evaluate
if nl:
height, width = batch['img'].shape[2:]
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
(width, height, width, height), device=self.device) # target boxes
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
ratio_pad=batch['ratio_pad'][si]) # native-space labels
labelsn = torch.cat((cls, tbox), 1) # native-space labels
correct_bboxes = self._process_batch(predn, labelsn)
# TODO: maybe remove these `self.` arguments as they already are member variable
correct_masks = self._process_batch(predn,
labelsn,
pred_masks,
gt_masks,
overlap=self.args.overlap_mask,
masks=True)
if self.args.plots:
self.confusion_matrix.process_batch(predn, labelsn)
# Append correct_masks, correct_boxes, pconf, pcls, tcls
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
if self.args.plots and self.batch_i < 3:
self.plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot
# Save
if self.args.save_json:
pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
shape,
ratio_pad=batch['ratio_pad'][si])
self.pred_to_json(predn, batch['im_file'][si], pred_masks)
# if self.args.save_txt:
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
def finalize_metrics(self, *args, **kwargs):
"""Sets speed and confusion matrix for evaluation metrics."""
self.metrics.speed = self.speed
self.metrics.confusion_matrix = self.confusion_matrix
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
Return correct prediction matrix
Arguments:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
labels (array[M, 5]), class, x1, y1, x2, y2
Returns:
correct (array[N, 10]), for 10 IoU levels
"""
if masks:
if overlap:
nl = len(labels)
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
if gt_masks.shape[1:] != pred_masks.shape[1:]:
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode='bilinear', align_corners=False)[0]
gt_masks = gt_masks.gt_(0.5)
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
else: # boxes
iou = box_iou(labels[:, 1:], detections[:, :4])
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
def plot_val_samples(self, batch, ni):
"""Plots validation samples with bounding box labels."""
plot_images(batch['img'],
batch['batch_idx'],
batch['cls'].squeeze(-1),
batch['bboxes'],
batch['masks'],
paths=batch['im_file'],
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
names=self.names,
on_plot=self.on_plot)
def plot_predictions(self, batch, preds, ni):
"""Plots batch predictions with masks and bounding boxes."""
plot_images(
batch['img'],
*output_to_target(preds[0], max_det=15), # not set to self.args.max_det due to slow plotting speed
torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks,
paths=batch['im_file'],
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
names=self.names,
on_plot=self.on_plot) # pred
self.plot_masks.clear()
def pred_to_json(self, predn, filename, pred_masks):
"""Save one JSON result."""
# Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
from pycocotools.mask import encode # noqa
def single_encode(x):
"""Encode predicted masks as RLE and append results to jdict."""
rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0]
rle['counts'] = rle['counts'].decode('utf-8')
return rle
stem = Path(filename).stem
image_id = int(stem) if stem.isnumeric() else stem
box = ops.xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
pred_masks = np.transpose(pred_masks, (2, 0, 1))
with ThreadPool(NUM_THREADS) as pool:
rles = pool.map(single_encode, pred_masks)
for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
self.jdict.append({
'image_id': image_id,
'category_id': self.class_map[int(p[5])],
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5),
'segmentation': rles[i]})
def eval_json(self, stats):
"""Return COCO-style object detection evaluation metrics."""
if self.args.save_json and self.is_coco and len(self.jdict):
anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations
pred_json = self.save_dir / 'predictions.json' # predictions
LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
check_requirements('pycocotools>=2.0.6')
from pycocotools.coco import COCO # noqa
from pycocotools.cocoeval import COCOeval # noqa
for x in anno_json, pred_json:
assert x.is_file(), f'{x} file not found'
anno = COCO(str(anno_json)) # init annotations api
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
if self.is_coco:
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
eval.evaluate()
eval.accumulate()
eval.summarize()
idx = i * 4 + 2
stats[self.metrics.keys[idx + 1]], stats[
self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50
except Exception as e:
LOGGER.warning(f'pycocotools unable to run: {e}')
return stats

@ -16,7 +16,7 @@ class RTDETR(Model):
"""
def __init__(self, model='rtdetr-l.pt') -> None:
if model and not model.split('.')[-1] in ('pt', 'yaml', 'yml'):
if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
super().__init__(model=model, task='detect')

@ -9,7 +9,7 @@ import torch
import torch.nn as nn
from torch.nn.init import constant_, xavier_uniform_
from ultralytics.utils.tal import dist2bbox, make_anchors
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, make_anchors
from .block import DFL, Proto
from .conv import Conv
@ -267,9 +267,9 @@ class RTDETRDecoder(nn.Module):
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
anchors = []
for i, (h, w) in enumerate(shapes):
grid_y, grid_x = torch.meshgrid(torch.arange(end=h, dtype=dtype, device=device),
torch.arange(end=w, dtype=dtype, device=device),
indexing='ij')
sy = torch.arange(end=h, dtype=dtype, device=device)
sx = torch.arange(end=w, dtype=dtype, device=device)
grid_y, grid_x = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2)
valid_WH = torch.tensor([h, w], dtype=dtype, device=device)

@ -22,6 +22,10 @@ class TransformerEncoderLayer(nn.Module):
def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
super().__init__()
from ...utils.torch_utils import TORCH_1_9
if not TORCH_1_9:
raise ModuleNotFoundError(
'TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).')
self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True)
# Implementation of Feedforward model
self.fc1 = nn.Linear(c1, cm)

Loading…
Cancel
Save