Improve tests coverage and speed (#4340)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/4329/head^2
Glenn Jocher 1 year ago committed by GitHub
parent d704507217
commit 9f6d48d3cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 28
      docs/models/fast-sam.md
  2. 2
      tests/conftest.py
  3. 98
      tests/test_cli.py
  4. 8
      tests/test_engine.py
  5. 126
      tests/test_python.py
  6. 27
      ultralytics/models/fastsam/prompt.py
  7. 225
      ultralytics/models/fastsam/val.py
  8. 2
      ultralytics/models/rtdetr/model.py
  9. 8
      ultralytics/nn/modules/head.py
  10. 4
      ultralytics/nn/modules/transformer.py

@ -47,22 +47,17 @@ To perform object detection on an image, use the `predict` method as shown below
from ultralytics import FastSAM from ultralytics import FastSAM
from ultralytics.models.fastsam import FastSAMPrompt from ultralytics.models.fastsam import FastSAMPrompt
# Define image path and inference device # Define an inference source
IMAGE_PATH = 'ultralytics/assets/bus.jpg' source = 'path/to/bus.jpg'
DEVICE = 'cpu'
# Create a FastSAM model # Create a FastSAM model
model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt
# Run inference on an image # Run inference on an image
everything_results = model(IMAGE_PATH, everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
device=DEVICE,
retina_masks=True,
imgsz=1024,
conf=0.4,
iou=0.9)
prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE) # Prepare a Prompt Process object
prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
# Everything prompt # Everything prompt
ann = prompt_process.everything_prompt() ann = prompt_process.everything_prompt()
@ -80,6 +75,12 @@ To perform object detection on an image, use the `predict` method as shown below
prompt_process.plot(annotations=ann, output='./') prompt_process.plot(annotations=ann, output='./')
``` ```
=== "CLI"
```bash
# Load a FastSAM model and segment everything with it
yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640
```
This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image. This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image.
#### Val Usage #### Val Usage
@ -89,7 +90,6 @@ Validation of the model on a dataset can be done as follows:
!!! example "" !!! example ""
=== "Python" === "Python"
```python ```python
from ultralytics import FastSAM from ultralytics import FastSAM
@ -100,6 +100,12 @@ Validation of the model on a dataset can be done as follows:
results = model.val(data='coco8-seg.yaml') results = model.val(data='coco8-seg.yaml')
``` ```
=== "CLI"
```bash
# Load a FastSAM model and validate it on the COCO8 example dataset at image size 640
yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640
```
Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0. Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0.
### FastSAM official Usage ### FastSAM official Usage

@ -1,3 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import pytest import pytest

@ -8,12 +8,16 @@ import pytest
from ultralytics.utils import ONLINE, ROOT, SETTINGS from ultralytics.utils import ONLINE, ROOT, SETTINGS
WEIGHT_DIR = Path(SETTINGS['weights_dir']) WEIGHT_DIR = Path(SETTINGS['weights_dir'])
TASK_ARGS = [ # (task, model, data) TASK_ARGS = [
('detect', 'yolov8n', 'coco8.yaml'), ('segment', 'yolov8n-seg', 'coco8-seg.yaml'), ('detect', 'yolov8n', 'coco8.yaml'),
('classify', 'yolov8n-cls', 'imagenet10'), ('pose', 'yolov8n-pose', 'coco8-pose.yaml')] ('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
EXPORT_ARGS = [ # (model, format) ('classify', 'yolov8n-cls', 'imagenet10'),
('yolov8n', 'torchscript'), ('yolov8n-seg', 'torchscript'), ('yolov8n-cls', 'torchscript'), ('pose', 'yolov8n-pose', 'coco8-pose.yaml'), ] # (task, model, data)
('yolov8n-pose', 'torchscript')] EXPORT_ARGS = [
('yolov8n', 'torchscript'),
('yolov8n-seg', 'torchscript'),
('yolov8n-cls', 'torchscript'),
('yolov8n-pose', 'torchscript'), ] # (model, format)
def run(cmd): def run(cmd):
@ -22,9 +26,12 @@ def run(cmd):
def test_special_modes(): def test_special_modes():
run('yolo checks')
run('yolo settings')
run('yolo help') run('yolo help')
run('yolo checks')
run('yolo version')
run('yolo settings reset')
run('yolo copy-cfg')
run('yolo cfg')
@pytest.mark.parametrize('task,model,data', TASK_ARGS) @pytest.mark.parametrize('task,model,data', TASK_ARGS)
@ -34,21 +41,82 @@ def test_train(task, model, data):
@pytest.mark.parametrize('task,model,data', TASK_ARGS) @pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_val(task, model, data): def test_val(task, model, data):
run(f'yolo val {task} model={model}.pt data={data} imgsz=32') run(f'yolo val {task} model={WEIGHT_DIR / model}.pt data={data} imgsz=32')
@pytest.mark.parametrize('task,model,data', TASK_ARGS) @pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_predict(task, model, data): def test_predict(task, model, data):
run(f"yolo predict model={model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt") run(f"yolo predict model={WEIGHT_DIR / model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
if ONLINE:
run(f'yolo predict model={model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32') @pytest.mark.skipif(not ONLINE, reason='environment is offline')
run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_portrait_min.mov imgsz=32') @pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_predict_online(task, model, data):
mode = 'track' if task in ('detect', 'segment', 'pose') else 'predict' # mode for video inference
run(f'yolo predict model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
run(f'yolo {mode} model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32'
)
# Run Python YouTube tracking because CLI is broken. TODO: fix CLI YouTube
# run(f'yolo {mode} model={model}.pt source=https://youtu.be/G17sBkb38XQ imgsz=32 tracker=bytetrack.yaml')
@pytest.mark.parametrize('model,format', EXPORT_ARGS) @pytest.mark.parametrize('model,format', EXPORT_ARGS)
def test_export(model, format): def test_export(model, format):
run(f'yolo export model={model}.pt format={format}') run(f'yolo export model={WEIGHT_DIR / model}.pt format={format} imgsz=32')
# Test SAM, RTDETR Models
def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
# Warning: MUST use imgsz=640
run(f'yolo train {task} model={model} data={data} imgsz=640 epochs=1 cache=disk')
run(f'yolo val {task} model={model} data={data} imgsz=640')
run(f"yolo predict {task} model={model} source={ROOT / 'assets/bus.jpg'} imgsz=640 save save_crop save_txt")
def test_fastsam(task='segment', model='FastSAM-s.pt', data='coco8-seg.yaml'):
source = ROOT / 'assets/bus.jpg'
run(f'yolo segment val {task} model={model} data={data} imgsz=32')
run(f'yolo segment predict model={model} source={source} imgsz=32 save save_crop save_txt')
from ultralytics import FastSAM
from ultralytics.models.fastsam import FastSAMPrompt
# Create a FastSAM model
model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt
# Run inference on an image
everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
# Everything prompt
prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
ann = prompt_process.everything_prompt()
# Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
# Text prompt
ann = prompt_process.text_prompt(text='a photo of a dog')
# Point prompt
# points default [[0,0]] [[x1,y1],[x2,y2]]
# point_label default [0] [1,0] 0:background, 1:foreground
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
prompt_process.plot(annotations=ann, output='./')
def test_mobilesam():
from ultralytics import SAM
# Load the model
model = SAM('mobile_sam.pt')
# Predict a segment based on a point prompt
model.predict(ROOT / 'assets/zidane.jpg', points=[900, 370], labels=[1])
# Predict a segment based on a box prompt
model.predict(ROOT / 'assets/zidane.jpg', bboxes=[439, 437, 524, 709])
# Slow Tests # Slow Tests

@ -10,13 +10,13 @@ from ultralytics.utils import DEFAULT_CFG, ROOT, SETTINGS
CFG_DET = 'yolov8n.yaml' CFG_DET = 'yolov8n.yaml'
CFG_SEG = 'yolov8n-seg.yaml' CFG_SEG = 'yolov8n-seg.yaml'
CFG_CLS = 'squeezenet1_0' CFG_CLS = 'yolov8n-cls.yaml' # or 'squeezenet1_0'
CFG = get_cfg(DEFAULT_CFG) CFG = get_cfg(DEFAULT_CFG)
MODEL = Path(SETTINGS['weights_dir']) / 'yolov8n' MODEL = Path(SETTINGS['weights_dir']) / 'yolov8n'
SOURCE = ROOT / 'assets' SOURCE = ROOT / 'assets'
def test_func(model=None): def test_func(*args): # noqa
print('callback test passed') print('callback test passed')
@ -31,6 +31,7 @@ def test_export():
def test_detect(): def test_detect():
overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False} overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8.yaml' CFG.data = 'coco8.yaml'
CFG.imgsz = 32
# Trainer # Trainer
trainer = detect.DetectionTrainer(overrides=overrides) trainer = detect.DetectionTrainer(overrides=overrides)
@ -65,6 +66,7 @@ def test_detect():
def test_segment(): def test_segment():
overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False} overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8-seg.yaml' CFG.data = 'coco8-seg.yaml'
CFG.imgsz = 32
# YOLO(CFG_SEG).train(**overrides) # works # YOLO(CFG_SEG).train(**overrides) # works
# trainer # trainer
@ -99,7 +101,7 @@ def test_segment():
def test_classify(): def test_classify():
overrides = {'data': 'imagenet10', 'model': 'yolov8n-cls.yaml', 'imgsz': 32, 'epochs': 1, 'save': False} overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'imagenet10' CFG.data = 'imagenet10'
CFG.imgsz = 32 CFG.imgsz = 32
# YOLO(CFG_SEG).train(**overrides) # works # YOLO(CFG_SEG).train(**overrides) # works

@ -10,9 +10,11 @@ from torchvision.transforms import ToTensor
from ultralytics import RTDETR, YOLO from ultralytics import RTDETR, YOLO
from ultralytics.data.build import load_inference_source from ultralytics.data.build import load_inference_source
from ultralytics.utils import LINUX, ONLINE, ROOT, SETTINGS from ultralytics.utils import LINUX, MACOS, ONLINE, ROOT, SETTINGS
from ultralytics.utils.torch_utils import TORCH_1_9
MODEL = Path(SETTINGS['weights_dir']) / 'path with spaces' / 'yolov8n.pt' # test spaces in path WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt' # test spaces in path
CFG = 'yolov8n.yaml' CFG = 'yolov8n.yaml'
SOURCE = ROOT / 'assets/bus.jpg' SOURCE = ROOT / 'assets/bus.jpg'
SOURCE_GREYSCALE = Path(f'{SOURCE.parent / SOURCE.stem}_greyscale.jpg') SOURCE_GREYSCALE = Path(f'{SOURCE.parent / SOURCE.stem}_greyscale.jpg')
@ -26,39 +28,35 @@ im.convert('RGBA').save(SOURCE_RGBA) # 4-ch PNG with alpha
def test_model_forward(): def test_model_forward():
model = YOLO(CFG) model = YOLO(CFG)
model(SOURCE) model(SOURCE, imgsz=32)
def test_model_info(): def test_model_info():
model = YOLO(CFG)
model.info()
model = YOLO(MODEL) model = YOLO(MODEL)
model.info(verbose=True) model.info(verbose=True)
def test_model_fuse(): def test_model_fuse():
model = YOLO(CFG)
model.fuse()
model = YOLO(MODEL) model = YOLO(MODEL)
model.fuse() model.fuse()
def test_predict_dir(): def test_predict_dir():
model = YOLO(MODEL) model = YOLO(MODEL)
model(source=ROOT / 'assets') model(source=ROOT / 'assets', imgsz=32)
def test_predict_img(): def test_predict_img():
model = YOLO(MODEL) model = YOLO(MODEL)
seg_model = YOLO('yolov8n-seg.pt') seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
cls_model = YOLO('yolov8n-cls.pt') cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
pose_model = YOLO('yolov8n-pose.pt') pose_model = YOLO(WEIGHTS_DIR / 'yolov8n-pose.pt')
im = cv2.imread(str(SOURCE)) im = cv2.imread(str(SOURCE))
assert len(model(source=Image.open(SOURCE), save=True, verbose=True)) == 1 # PIL assert len(model(source=Image.open(SOURCE), save=True, verbose=True, imgsz=32)) == 1 # PIL
assert len(model(source=im, save=True, save_txt=True)) == 1 # ndarray assert len(model(source=im, save=True, save_txt=True, imgsz=32)) == 1 # ndarray
assert len(model(source=[im, im], save=True, save_txt=True)) == 2 # batch assert len(model(source=[im, im], save=True, save_txt=True, imgsz=32)) == 2 # batch
assert len(list(model(source=[im, im], save=True, stream=True))) == 2 # stream assert len(list(model(source=[im, im], save=True, stream=True, imgsz=32))) == 2 # stream
assert len(model(torch.zeros(320, 640, 3).numpy())) == 1 # tensor to numpy assert len(model(torch.zeros(320, 640, 3).numpy(), imgsz=32)) == 1 # tensor to numpy
batch = [ batch = [
str(SOURCE), # filename str(SOURCE), # filename
Path(SOURCE), # Path Path(SOURCE), # Path
@ -66,20 +64,20 @@ def test_predict_img():
cv2.imread(str(SOURCE)), # OpenCV cv2.imread(str(SOURCE)), # OpenCV
Image.open(SOURCE), # PIL Image.open(SOURCE), # PIL
np.zeros((320, 640, 3))] # numpy np.zeros((320, 640, 3))] # numpy
assert len(model(batch, visualize=True)) == len(batch) # multiple sources in a batch assert len(model(batch, imgsz=32)) == len(batch) # multiple sources in a batch
# Test tensor inference # Test tensor inference
im = cv2.imread(str(SOURCE)) # OpenCV im = cv2.imread(str(SOURCE)) # OpenCV
t = cv2.resize(im, (32, 32)) t = cv2.resize(im, (32, 32))
t = ToTensor()(t) t = ToTensor()(t)
t = torch.stack([t, t, t, t]) t = torch.stack([t, t, t, t])
results = model(t, visualize=True) results = model(t, imgsz=32)
assert len(results) == t.shape[0] assert len(results) == t.shape[0]
results = seg_model(t, visualize=True) results = seg_model(t, imgsz=32)
assert len(results) == t.shape[0] assert len(results) == t.shape[0]
results = cls_model(t, visualize=True) results = cls_model(t, imgsz=32)
assert len(results) == t.shape[0] assert len(results) == t.shape[0]
results = pose_model(t, visualize=True) results = pose_model(t, imgsz=32)
assert len(results) == t.shape[0] assert len(results) == t.shape[0]
@ -87,16 +85,17 @@ def test_predict_grey_and_4ch():
model = YOLO(MODEL) model = YOLO(MODEL)
for f in SOURCE_RGBA, SOURCE_GREYSCALE: for f in SOURCE_RGBA, SOURCE_GREYSCALE:
for source in Image.open(f), cv2.imread(str(f)), f: for source in Image.open(f), cv2.imread(str(f)), f:
model(source, save=True, verbose=True) model(source, save=True, verbose=True, imgsz=32)
def test_val(): def test_track_stream():
# Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
model = YOLO(MODEL) model = YOLO(MODEL)
model.val(data='coco8.yaml', imgsz=32) model.track('https://youtu.be/G17sBkb38XQ', imgsz=32, tracker='bytetrack.yaml')
def test_val_scratch(): def test_val():
model = YOLO(CFG) model = YOLO(MODEL)
model.val(data='coco8.yaml', imgsz=32) model.val(data='coco8.yaml', imgsz=32)
@ -109,7 +108,7 @@ def test_amp():
def test_train_scratch(): def test_train_scratch():
model = YOLO(CFG) model = YOLO(CFG)
model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk') # test disk caching model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk', batch=-1) # test disk caching with AutoBatch
model(SOURCE) model(SOURCE)
@ -125,12 +124,6 @@ def test_export_torchscript():
YOLO(f)(SOURCE) # exported model inference YOLO(f)(SOURCE) # exported model inference
def test_export_torchscript_scratch():
model = YOLO(CFG)
f = model.export(format='torchscript')
YOLO(f)(SOURCE) # exported model inference
def test_export_onnx(): def test_export_onnx():
model = YOLO(MODEL) model = YOLO(MODEL)
f = model.export(format='onnx') f = model.export(format='onnx')
@ -138,14 +131,15 @@ def test_export_onnx():
def test_export_openvino(): def test_export_openvino():
model = YOLO(MODEL) if not MACOS:
f = model.export(format='openvino') model = YOLO(MODEL)
YOLO(f)(SOURCE) # exported model inference f = model.export(format='openvino')
YOLO(f)(SOURCE) # exported model inference
def test_export_coreml(): # sourcery skip: move-assign def test_export_coreml(): # sourcery skip: move-assign
model = YOLO(MODEL) model = YOLO(MODEL)
model.export(format='coreml') model.export(format='coreml', nms=True)
# if MACOS: # if MACOS:
# YOLO(f)(SOURCE) # model prediction only supported on macOS # YOLO(f)(SOURCE) # model prediction only supported on macOS
@ -174,9 +168,10 @@ def test_export_paddle(enabled=False):
def test_all_model_yamls(): def test_all_model_yamls():
for m in list((ROOT / 'models').rglob('yolo*.yaml')): for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
if m.name == 'yolov8-rtdetr.yaml': # except the rtdetr model if 'rtdetr' in m.name:
RTDETR(m.name) if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
RTDETR(m.name)
else: else:
YOLO(m.name) YOLO(m.name)
@ -190,10 +185,9 @@ def test_workflow():
def test_predict_callback_and_setup(): def test_predict_callback_and_setup():
# test callback addition for prediction # Test callback addition for prediction
def on_predict_batch_end(predictor): # results -> List[batch_size] def on_predict_batch_end(predictor): # results -> List[batch_size]
path, im0s, _, _ = predictor.batch path, im0s, _, _ = predictor.batch
# print('on_predict_batch_end', im0s[0].shape)
im0s = im0s if isinstance(im0s, list) else [im0s] im0s = im0s if isinstance(im0s, list) else [im0s]
bs = [predictor.dataset.bs for _ in range(len(path))] bs = [predictor.dataset.bs for _ in range(len(path))]
predictor.results = zip(predictor.results, im0s, bs) predictor.results = zip(predictor.results, im0s, bs)
@ -204,42 +198,26 @@ def test_predict_callback_and_setup():
dataset = load_inference_source(source=SOURCE) dataset = load_inference_source(source=SOURCE)
bs = dataset.bs # noqa access predictor properties bs = dataset.bs # noqa access predictor properties
results = model.predict(dataset, stream=True) # source already setup results = model.predict(dataset, stream=True) # source already setup
for _, (result, im0, bs) in enumerate(results): for r, im0, bs in results:
print('test_callback', im0.shape) print('test_callback', im0.shape)
print('test_callback', bs) print('test_callback', bs)
boxes = result.boxes # Boxes object for bbox outputs boxes = r.boxes # Boxes object for bbox outputs
print(boxes) print(boxes)
def _test_results_api(res):
# General apis except plot
res = res.cpu().numpy()
# res = res.cuda()
res = res.to(device='cpu', dtype=torch.float32)
res.save_txt('label.txt', save_conf=False)
res.save_txt('label.txt', save_conf=True)
res.save_crop('crops/')
res.tojson(normalize=False)
res.tojson(normalize=True)
res.plot(pil=True)
res.plot(conf=True, boxes=False)
res.plot()
print(res)
print(res.path)
for k in res.keys:
print(getattr(res, k))
def test_results(): def test_results():
for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt']: for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
model = YOLO(m)
res = model([SOURCE, SOURCE])
_test_results_api(res[0])
def test_track():
im = cv2.imread(str(SOURCE))
for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt']:
model = YOLO(m) model = YOLO(m)
res = model.track(source=im) results = model([SOURCE, SOURCE])
_test_results_api(res[0]) for r in results:
r = r.cpu().numpy()
r = r.to(device='cpu', dtype=torch.float32)
r.save_txt(txt_file='label.txt', save_conf=True)
r.save_crop(save_dir='crops/')
r.tojson(normalize=True)
r.plot(pil=True)
r.plot(conf=True, boxes=True)
print(r)
print(r.path)
for k in r.keys:
print(getattr(r, k))

@ -1,6 +1,7 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
import os import os
from pathlib import Path
import cv2 import cv2
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@ -8,6 +9,8 @@ import numpy as np
import torch import torch
from PIL import Image from PIL import Image
from ultralytics.utils import LOGGER
class FastSAMPrompt: class FastSAMPrompt:
@ -15,8 +18,8 @@ class FastSAMPrompt:
# self.img_path = img_path # self.img_path = img_path
self.device = device self.device = device
self.results = results self.results = results
self.img_path = img_path self.img_path = str(img_path)
self.ori_img = cv2.imread(img_path) self.ori_img = cv2.imread(self.img_path)
# Import and assign clip # Import and assign clip
try: try:
@ -111,7 +114,7 @@ class FastSAMPrompt:
original_w = image.shape[1] original_w = image.shape[1]
# for macOS only # for macOS only
# plt.switch_backend('TkAgg') # plt.switch_backend('TkAgg')
plt.figure(figsize=(original_w / 100, original_h / 100)) fig = plt.figure(figsize=(original_w / 100, original_h / 100))
# Add subplot with no margin. # Add subplot with no margin.
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
plt.margins(0, 0) plt.margins(0, 0)
@ -174,21 +177,11 @@ class FastSAMPrompt:
contour_mask = temp / 255 * color.reshape(1, 1, -1) contour_mask = temp / 255 * color.reshape(1, 1, -1)
plt.imshow(contour_mask) plt.imshow(contour_mask)
save_path = output save_path = Path(output) / result_name
if not os.path.exists(save_path): save_path.parent.mkdir(exist_ok=True, parents=True)
os.makedirs(save_path)
plt.axis('off') plt.axis('off')
fig = plt.gcf() fig.savefig(save_path)
plt.draw() LOGGER.info(f'Saved to {save_path.absolute()}')
try:
buf = fig.canvas.tostring_rgb()
except AttributeError:
fig.canvas.draw()
buf = fig.canvas.tostring_rgb()
cols, rows = fig.canvas.get_width_height()
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
cv2.imwrite(os.path.join(save_path, result_name), cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
# CPU post process # CPU post process
def fast_show_mask( def fast_show_mask(

@ -1,231 +1,14 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
from multiprocessing.pool import ThreadPool from ultralytics.models.yolo.segment import SegmentationValidator
from pathlib import Path from ultralytics.utils.metrics import SegmentMetrics
import numpy as np
import torch
import torch.nn.functional as F
from ultralytics.models.yolo.detect import DetectionValidator class FastSAMValidator(SegmentationValidator):
from ultralytics.utils import LOGGER, NUM_THREADS, ops
from ultralytics.utils.checks import check_requirements
from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou
from ultralytics.utils.plotting import output_to_target, plot_images
class FastSAMValidator(DetectionValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
"""Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.""" """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics."""
super().__init__(dataloader, save_dir, pbar, args, _callbacks) super().__init__(dataloader, save_dir, pbar, args, _callbacks)
self.args.task = 'segment' self.args.task = 'segment'
self.args.plots = False # disable ConfusionMatrix and other plots to avoid errors
self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot) self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
def preprocess(self, batch):
"""Preprocesses batch by converting masks to float and sending to device."""
batch = super().preprocess(batch)
batch['masks'] = batch['masks'].to(self.device).float()
return batch
def init_metrics(self, model):
"""Initialize metrics and select mask processing function based on save_json flag."""
super().init_metrics(model)
self.plot_masks = []
if self.args.save_json:
check_requirements('pycocotools>=2.0.6')
self.process = ops.process_mask_upsample # more accurate
else:
self.process = ops.process_mask # faster
def get_desc(self):
"""Return a formatted description of evaluation metrics."""
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P',
'R', 'mAP50', 'mAP50-95)')
def postprocess(self, preds):
"""Post-processes YOLO predictions and returns output detections with proto."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det,
nc=self.nc)
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
return p, proto
def update_metrics(self, preds, batch):
"""Metrics."""
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
idx = batch['batch_idx'] == si
cls = batch['cls'][idx]
bbox = batch['bboxes'][idx]
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
shape = batch['ori_shape'][si]
correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
self.seen += 1
if npr == 0:
if nl:
self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
(2, 0), device=self.device), cls.squeeze(-1)))
if self.args.plots:
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
continue
# Masks
midx = [si] if self.args.overlap_mask else idx
gt_masks = batch['masks'][midx]
pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:])
# Predictions
if self.args.single_cls:
pred[:, 5] = 0
predn = pred.clone()
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
ratio_pad=batch['ratio_pad'][si]) # native-space pred
# Evaluate
if nl:
height, width = batch['img'].shape[2:]
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
(width, height, width, height), device=self.device) # target boxes
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
ratio_pad=batch['ratio_pad'][si]) # native-space labels
labelsn = torch.cat((cls, tbox), 1) # native-space labels
correct_bboxes = self._process_batch(predn, labelsn)
# TODO: maybe remove these `self.` arguments as they already are member variable
correct_masks = self._process_batch(predn,
labelsn,
pred_masks,
gt_masks,
overlap=self.args.overlap_mask,
masks=True)
if self.args.plots:
self.confusion_matrix.process_batch(predn, labelsn)
# Append correct_masks, correct_boxes, pconf, pcls, tcls
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
if self.args.plots and self.batch_i < 3:
self.plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot
# Save
if self.args.save_json:
pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
shape,
ratio_pad=batch['ratio_pad'][si])
self.pred_to_json(predn, batch['im_file'][si], pred_masks)
# if self.args.save_txt:
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
def finalize_metrics(self, *args, **kwargs):
"""Sets speed and confusion matrix for evaluation metrics."""
self.metrics.speed = self.speed
self.metrics.confusion_matrix = self.confusion_matrix
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
Return correct prediction matrix
Arguments:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
labels (array[M, 5]), class, x1, y1, x2, y2
Returns:
correct (array[N, 10]), for 10 IoU levels
"""
if masks:
if overlap:
nl = len(labels)
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
if gt_masks.shape[1:] != pred_masks.shape[1:]:
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode='bilinear', align_corners=False)[0]
gt_masks = gt_masks.gt_(0.5)
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
else: # boxes
iou = box_iou(labels[:, 1:], detections[:, :4])
return self.match_predictions(detections[:, 5], labels[:, 0], iou)
def plot_val_samples(self, batch, ni):
"""Plots validation samples with bounding box labels."""
plot_images(batch['img'],
batch['batch_idx'],
batch['cls'].squeeze(-1),
batch['bboxes'],
batch['masks'],
paths=batch['im_file'],
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
names=self.names,
on_plot=self.on_plot)
def plot_predictions(self, batch, preds, ni):
"""Plots batch predictions with masks and bounding boxes."""
plot_images(
batch['img'],
*output_to_target(preds[0], max_det=15), # not set to self.args.max_det due to slow plotting speed
torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks,
paths=batch['im_file'],
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
names=self.names,
on_plot=self.on_plot) # pred
self.plot_masks.clear()
def pred_to_json(self, predn, filename, pred_masks):
"""Save one JSON result."""
# Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
from pycocotools.mask import encode # noqa
def single_encode(x):
"""Encode predicted masks as RLE and append results to jdict."""
rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0]
rle['counts'] = rle['counts'].decode('utf-8')
return rle
stem = Path(filename).stem
image_id = int(stem) if stem.isnumeric() else stem
box = ops.xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
pred_masks = np.transpose(pred_masks, (2, 0, 1))
with ThreadPool(NUM_THREADS) as pool:
rles = pool.map(single_encode, pred_masks)
for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
self.jdict.append({
'image_id': image_id,
'category_id': self.class_map[int(p[5])],
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5),
'segmentation': rles[i]})
def eval_json(self, stats):
"""Return COCO-style object detection evaluation metrics."""
if self.args.save_json and self.is_coco and len(self.jdict):
anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations
pred_json = self.save_dir / 'predictions.json' # predictions
LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
check_requirements('pycocotools>=2.0.6')
from pycocotools.coco import COCO # noqa
from pycocotools.cocoeval import COCOeval # noqa
for x in anno_json, pred_json:
assert x.is_file(), f'{x} file not found'
anno = COCO(str(anno_json)) # init annotations api
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
if self.is_coco:
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
eval.evaluate()
eval.accumulate()
eval.summarize()
idx = i * 4 + 2
stats[self.metrics.keys[idx + 1]], stats[
self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50
except Exception as e:
LOGGER.warning(f'pycocotools unable to run: {e}')
return stats

@ -16,7 +16,7 @@ class RTDETR(Model):
""" """
def __init__(self, model='rtdetr-l.pt') -> None: def __init__(self, model='rtdetr-l.pt') -> None:
if model and not model.split('.')[-1] in ('pt', 'yaml', 'yml'): if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.') raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
super().__init__(model=model, task='detect') super().__init__(model=model, task='detect')

@ -9,7 +9,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from torch.nn.init import constant_, xavier_uniform_ from torch.nn.init import constant_, xavier_uniform_
from ultralytics.utils.tal import dist2bbox, make_anchors from ultralytics.utils.tal import TORCH_1_10, dist2bbox, make_anchors
from .block import DFL, Proto from .block import DFL, Proto
from .conv import Conv from .conv import Conv
@ -267,9 +267,9 @@ class RTDETRDecoder(nn.Module):
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2): def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
anchors = [] anchors = []
for i, (h, w) in enumerate(shapes): for i, (h, w) in enumerate(shapes):
grid_y, grid_x = torch.meshgrid(torch.arange(end=h, dtype=dtype, device=device), sy = torch.arange(end=h, dtype=dtype, device=device)
torch.arange(end=w, dtype=dtype, device=device), sx = torch.arange(end=w, dtype=dtype, device=device)
indexing='ij') grid_y, grid_x = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2) grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2)
valid_WH = torch.tensor([h, w], dtype=dtype, device=device) valid_WH = torch.tensor([h, w], dtype=dtype, device=device)

@ -22,6 +22,10 @@ class TransformerEncoderLayer(nn.Module):
def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False): def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
super().__init__() super().__init__()
from ...utils.torch_utils import TORCH_1_9
if not TORCH_1_9:
raise ModuleNotFoundError(
'TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).')
self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True) self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True)
# Implementation of Feedforward model # Implementation of Feedforward model
self.fc1 = nn.Linear(c1, cm) self.fc1 = nn.Linear(c1, cm)

Loading…
Cancel
Save