Improve tests coverage and speed (#4340)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 year ago · 9f6d48d3cf
parent d704507217
commit 9f6d48d3cf
10 changed files with 182 additions and 346 deletions
--- a/docs/models/fast-sam.md
+++ b/docs/models/fast-sam.md
@ -47,22 +47,17 @@ To perform object detection on an image, use the `predict` method as shown below
        from ultralytics import FastSAM
        from ultralytics.models.fastsam import FastSAMPrompt
-        # Define image path and inference device
+        # Define an inference source
-        IMAGE_PATH = 'ultralytics/assets/bus.jpg'
+        source = 'path/to/bus.jpg'
        DEVICE = 'cpu'
        # Create a FastSAM model
        model = FastSAM('FastSAM-s.pt')  # or FastSAM-x.pt
        # Run inference on an image
-        everything_results = model(IMAGE_PATH,
+        everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
                                 device=DEVICE,
                                 retina_masks=True,
                                 imgsz=1024,
                                 conf=0.4,
                                 iou=0.9)
-        prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE)
+        # Prepare a Prompt Process object
        prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
        # Everything prompt
        ann = prompt_process.everything_prompt()
@ -80,6 +75,12 @@ To perform object detection on an image, use the `predict` method as shown below
        prompt_process.plot(annotations=ann, output='./')
        ```
    === "CLI"
        ```bash
        # Load a FastSAM model and segment everything with it
        yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640
        ```
 This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image.
 #### Val Usage
@ -89,7 +90,6 @@ Validation of the model on a dataset can be done as follows:
 !!! example ""
    === "Python"
        ```python
        from ultralytics import FastSAM
@ -100,6 +100,12 @@ Validation of the model on a dataset can be done as follows:
        results = model.val(data='coco8-seg.yaml')
        ```
    === "CLI"
        ```bash
        # Load a FastSAM model and validate it on the COCO8 example dataset at image size 640
        yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640
        ```
 Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0.
 ### FastSAM official Usage
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,3 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import pytest
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -8,12 +8,16 @@ import pytest
 from ultralytics.utils import ONLINE, ROOT, SETTINGS
 WEIGHT_DIR = Path(SETTINGS['weights_dir'])
-TASK_ARGS = [  # (task, model, data)
+TASK_ARGS = [
-    ('detect', 'yolov8n', 'coco8.yaml'), ('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
+    ('detect', 'yolov8n', 'coco8.yaml'),
-    ('classify', 'yolov8n-cls', 'imagenet10'), ('pose', 'yolov8n-pose', 'coco8-pose.yaml')]
+    ('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
-EXPORT_ARGS = [  # (model, format)
+    ('classify', 'yolov8n-cls', 'imagenet10'),
-    ('yolov8n', 'torchscript'), ('yolov8n-seg', 'torchscript'), ('yolov8n-cls', 'torchscript'),
+    ('pose', 'yolov8n-pose', 'coco8-pose.yaml'), ]  # (task, model, data)
-    ('yolov8n-pose', 'torchscript')]
+EXPORT_ARGS = [
    ('yolov8n', 'torchscript'),
    ('yolov8n-seg', 'torchscript'),
    ('yolov8n-cls', 'torchscript'),
    ('yolov8n-pose', 'torchscript'), ]  # (model, format)
 def run(cmd):
@ -22,9 +26,12 @@ def run(cmd):
 def test_special_modes():
    run('yolo checks')
    run('yolo settings')
    run('yolo help')
    run('yolo checks')
    run('yolo version')
    run('yolo settings reset')
    run('yolo copy-cfg')
    run('yolo cfg')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
@ -34,21 +41,82 @@ def test_train(task, model, data):
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_val(task, model, data):
-    run(f'yolo val {task} model={model}.pt data={data} imgsz=32')
+    run(f'yolo val {task} model={WEIGHT_DIR / model}.pt data={data} imgsz=32')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_predict(task, model, data):
-    run(f"yolo predict model={model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
+    run(f"yolo predict model={WEIGHT_DIR / model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
-    if ONLINE:
+
-        run(f'yolo predict model={model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
+
-        run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32')
+@pytest.mark.skipif(not ONLINE, reason='environment is offline')
-        run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_portrait_min.mov imgsz=32')
+@pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_predict_online(task, model, data):
    mode = 'track' if task in ('detect', 'segment', 'pose') else 'predict'  # mode for video inference
    run(f'yolo predict model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
    run(f'yolo {mode} model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32'
        )
    # Run Python YouTube tracking because CLI is broken. TODO: fix CLI YouTube
    # run(f'yolo {mode} model={model}.pt source=https://youtu.be/G17sBkb38XQ imgsz=32 tracker=bytetrack.yaml')
@pytest.mark.parametrize('model,format', EXPORT_ARGS)
 def test_export(model, format):
-    run(f'yolo export model={model}.pt format={format}')
+    run(f'yolo export model={WEIGHT_DIR / model}.pt format={format} imgsz=32')
 # Test SAM, RTDETR Models
 def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
    # Warning: MUST use imgsz=640
    run(f'yolo train {task} model={model} data={data} imgsz=640 epochs=1 cache=disk')
    run(f'yolo val {task} model={model} data={data} imgsz=640')
    run(f"yolo predict {task} model={model} source={ROOT / 'assets/bus.jpg'} imgsz=640 save save_crop save_txt")
 def test_fastsam(task='segment', model='FastSAM-s.pt', data='coco8-seg.yaml'):
    source = ROOT / 'assets/bus.jpg'
    run(f'yolo segment val {task} model={model} data={data} imgsz=32')
    run(f'yolo segment predict model={model} source={source} imgsz=32 save save_crop save_txt')
    from ultralytics import FastSAM
    from ultralytics.models.fastsam import FastSAMPrompt
    # Create a FastSAM model
    model = FastSAM('FastSAM-s.pt')  # or FastSAM-x.pt
    # Run inference on an image
    everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
    # Everything prompt
    prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
    ann = prompt_process.everything_prompt()
    # Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
    ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
    # Text prompt
    ann = prompt_process.text_prompt(text='a photo of a dog')
    # Point prompt
    # points default [[0,0]] [[x1,y1],[x2,y2]]
    # point_label default [0] [1,0] 0:background, 1:foreground
    ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
    prompt_process.plot(annotations=ann, output='./')
 def test_mobilesam():
    from ultralytics import SAM
    # Load the model
    model = SAM('mobile_sam.pt')
    # Predict a segment based on a point prompt
    model.predict(ROOT / 'assets/zidane.jpg', points=[900, 370], labels=[1])
    # Predict a segment based on a box prompt
    model.predict(ROOT / 'assets/zidane.jpg', bboxes=[439, 437, 524, 709])
 # Slow Tests
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@ -10,13 +10,13 @@ from ultralytics.utils import DEFAULT_CFG, ROOT, SETTINGS
 CFG_DET = 'yolov8n.yaml'
 CFG_SEG = 'yolov8n-seg.yaml'
-CFG_CLS = 'squeezenet1_0'
+CFG_CLS = 'yolov8n-cls.yaml'  # or 'squeezenet1_0'
 CFG = get_cfg(DEFAULT_CFG)
 MODEL = Path(SETTINGS['weights_dir']) / 'yolov8n'
 SOURCE = ROOT / 'assets'
-def test_func(model=None):
+def test_func(*args):  # noqa
    print('callback test passed')
@ -31,6 +31,7 @@ def test_export():
 def test_detect():
    overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
    CFG.data = 'coco8.yaml'
    CFG.imgsz = 32
    # Trainer
    trainer = detect.DetectionTrainer(overrides=overrides)
@ -65,6 +66,7 @@ def test_detect():
 def test_segment():
    overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
    CFG.data = 'coco8-seg.yaml'
    CFG.imgsz = 32
    # YOLO(CFG_SEG).train(**overrides)  # works
    # trainer
@ -99,7 +101,7 @@ def test_segment():
 def test_classify():
-    overrides = {'data': 'imagenet10', 'model': 'yolov8n-cls.yaml', 'imgsz': 32, 'epochs': 1, 'save': False}
+    overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
    CFG.data = 'imagenet10'
    CFG.imgsz = 32
    # YOLO(CFG_SEG).train(**overrides)  # works
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -10,9 +10,11 @@ from torchvision.transforms import ToTensor
 from ultralytics import RTDETR, YOLO
 from ultralytics.data.build import load_inference_source
-from ultralytics.utils import LINUX, ONLINE, ROOT, SETTINGS
+from ultralytics.utils import LINUX, MACOS, ONLINE, ROOT, SETTINGS
 from ultralytics.utils.torch_utils import TORCH_1_9
-MODEL = Path(SETTINGS['weights_dir']) / 'path with spaces' / 'yolov8n.pt'  # test spaces in path
+WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
 MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt'  # test spaces in path
 CFG = 'yolov8n.yaml'
 SOURCE = ROOT / 'assets/bus.jpg'
 SOURCE_GREYSCALE = Path(f'{SOURCE.parent / SOURCE.stem}_greyscale.jpg')
@ -26,39 +28,35 @@ im.convert('RGBA').save(SOURCE_RGBA)  # 4-ch PNG with alpha
 def test_model_forward():
    model = YOLO(CFG)
-    model(SOURCE)
+    model(SOURCE, imgsz=32)
 def test_model_info():
    model = YOLO(CFG)
    model.info()
    model = YOLO(MODEL)
    model.info(verbose=True)
 def test_model_fuse():
    model = YOLO(CFG)
    model.fuse()
    model = YOLO(MODEL)
    model.fuse()
 def test_predict_dir():
    model = YOLO(MODEL)
-    model(source=ROOT / 'assets')
+    model(source=ROOT / 'assets', imgsz=32)
 def test_predict_img():
    model = YOLO(MODEL)
-    seg_model = YOLO('yolov8n-seg.pt')
+    seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
-    cls_model = YOLO('yolov8n-cls.pt')
+    cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
-    pose_model = YOLO('yolov8n-pose.pt')
+    pose_model = YOLO(WEIGHTS_DIR / 'yolov8n-pose.pt')
    im = cv2.imread(str(SOURCE))
-    assert len(model(source=Image.open(SOURCE), save=True, verbose=True)) == 1  # PIL
+    assert len(model(source=Image.open(SOURCE), save=True, verbose=True, imgsz=32)) == 1  # PIL
-    assert len(model(source=im, save=True, save_txt=True)) == 1  # ndarray
+    assert len(model(source=im, save=True, save_txt=True, imgsz=32)) == 1  # ndarray
-    assert len(model(source=[im, im], save=True, save_txt=True)) == 2  # batch
+    assert len(model(source=[im, im], save=True, save_txt=True, imgsz=32)) == 2  # batch
-    assert len(list(model(source=[im, im], save=True, stream=True))) == 2  # stream
+    assert len(list(model(source=[im, im], save=True, stream=True, imgsz=32))) == 2  # stream
-    assert len(model(torch.zeros(320, 640, 3).numpy())) == 1  # tensor to numpy
+    assert len(model(torch.zeros(320, 640, 3).numpy(), imgsz=32)) == 1  # tensor to numpy
    batch = [
        str(SOURCE),  # filename
        Path(SOURCE),  # Path
@ -66,20 +64,20 @@ def test_predict_img():
        cv2.imread(str(SOURCE)),  # OpenCV
        Image.open(SOURCE),  # PIL
        np.zeros((320, 640, 3))]  # numpy
-    assert len(model(batch, visualize=True)) == len(batch)  # multiple sources in a batch
+    assert len(model(batch, imgsz=32)) == len(batch)  # multiple sources in a batch
    # Test tensor inference
    im = cv2.imread(str(SOURCE))  # OpenCV
    t = cv2.resize(im, (32, 32))
    t = ToTensor()(t)
    t = torch.stack([t, t, t, t])
-    results = model(t, visualize=True)
+    results = model(t, imgsz=32)
    assert len(results) == t.shape[0]
-    results = seg_model(t, visualize=True)
+    results = seg_model(t, imgsz=32)
    assert len(results) == t.shape[0]
-    results = cls_model(t, visualize=True)
+    results = cls_model(t, imgsz=32)
    assert len(results) == t.shape[0]
-    results = pose_model(t, visualize=True)
+    results = pose_model(t, imgsz=32)
    assert len(results) == t.shape[0]
@ -87,16 +85,17 @@ def test_predict_grey_and_4ch():
    model = YOLO(MODEL)
    for f in SOURCE_RGBA, SOURCE_GREYSCALE:
        for source in Image.open(f), cv2.imread(str(f)), f:
-            model(source, save=True, verbose=True)
+            model(source, save=True, verbose=True, imgsz=32)
-def test_val():
+def test_track_stream():
    # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
    model = YOLO(MODEL)
-    model.val(data='coco8.yaml', imgsz=32)
+    model.track('https://youtu.be/G17sBkb38XQ', imgsz=32, tracker='bytetrack.yaml')
-def test_val_scratch():
+def test_val():
-    model = YOLO(CFG)
+    model = YOLO(MODEL)
    model.val(data='coco8.yaml', imgsz=32)
@ -109,7 +108,7 @@ def test_amp():
 def test_train_scratch():
    model = YOLO(CFG)
-    model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk')  # test disk caching
+    model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk', batch=-1)  # test disk caching with AutoBatch
    model(SOURCE)
@ -125,12 +124,6 @@ def test_export_torchscript():
    YOLO(f)(SOURCE)  # exported model inference
 def test_export_torchscript_scratch():
    model = YOLO(CFG)
    f = model.export(format='torchscript')
    YOLO(f)(SOURCE)  # exported model inference
 def test_export_onnx():
    model = YOLO(MODEL)
    f = model.export(format='onnx')
@ -138,6 +131,7 @@ def test_export_onnx():
 def test_export_openvino():
    if not MACOS:
        model = YOLO(MODEL)
        f = model.export(format='openvino')
        YOLO(f)(SOURCE)  # exported model inference
@ -145,7 +139,7 @@ def test_export_openvino():
 def test_export_coreml():  # sourcery skip: move-assign
    model = YOLO(MODEL)
-    model.export(format='coreml')
+    model.export(format='coreml', nms=True)
    # if MACOS:
    #    YOLO(f)(SOURCE)  # model prediction only supported on macOS
@ -174,8 +168,9 @@ def test_export_paddle(enabled=False):
 def test_all_model_yamls():
-    for m in list((ROOT / 'models').rglob('yolo*.yaml')):
+    for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
-        if m.name == 'yolov8-rtdetr.yaml':  # except the rtdetr model
+        if 'rtdetr' in m.name:
            if TORCH_1_9:  # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
                RTDETR(m.name)
        else:
            YOLO(m.name)
@ -190,10 +185,9 @@ def test_workflow():
 def test_predict_callback_and_setup():
-    # test callback addition for prediction
+    # Test callback addition for prediction
    def on_predict_batch_end(predictor):  # results -> List[batch_size]
        path, im0s, _, _ = predictor.batch
        # print('on_predict_batch_end', im0s[0].shape)
        im0s = im0s if isinstance(im0s, list) else [im0s]
        bs = [predictor.dataset.bs for _ in range(len(path))]
        predictor.results = zip(predictor.results, im0s, bs)
@ -204,42 +198,26 @@ def test_predict_callback_and_setup():
    dataset = load_inference_source(source=SOURCE)
    bs = dataset.bs  # noqa access predictor properties
    results = model.predict(dataset, stream=True)  # source already setup
-    for _, (result, im0, bs) in enumerate(results):
+    for r, im0, bs in results:
        print('test_callback', im0.shape)
        print('test_callback', bs)
-        boxes = result.boxes  # Boxes object for bbox outputs
+        boxes = r.boxes  # Boxes object for bbox outputs
        print(boxes)
 def _test_results_api(res):
    # General apis except plot
    res = res.cpu().numpy()
    # res = res.cuda()
    res = res.to(device='cpu', dtype=torch.float32)
    res.save_txt('label.txt', save_conf=False)
    res.save_txt('label.txt', save_conf=True)
    res.save_crop('crops/')
    res.tojson(normalize=False)
    res.tojson(normalize=True)
    res.plot(pil=True)
    res.plot(conf=True, boxes=False)
    res.plot()
    print(res)
    print(res.path)
    for k in res.keys:
        print(getattr(res, k))
 def test_results():
-    for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt']:
+    for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
        model = YOLO(m)
        res = model([SOURCE, SOURCE])
        _test_results_api(res[0])
 def test_track():
    im = cv2.imread(str(SOURCE))
    for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt']:
        model = YOLO(m)
-        res = model.track(source=im)
+        results = model([SOURCE, SOURCE])
-        _test_results_api(res[0])
+        for r in results:
            r = r.cpu().numpy()
            r = r.to(device='cpu', dtype=torch.float32)
            r.save_txt(txt_file='label.txt', save_conf=True)
            r.save_crop(save_dir='crops/')
            r.tojson(normalize=True)
            r.plot(pil=True)
            r.plot(conf=True, boxes=True)
            print(r)
            print(r.path)
            for k in r.keys:
                print(getattr(r, k))
--- a/ultralytics/models/fastsam/prompt.py
+++ b/ultralytics/models/fastsam/prompt.py
@ -1,6 +1,7 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import os
 from pathlib import Path
 import cv2
 import matplotlib.pyplot as plt
@ -8,6 +9,8 @@ import numpy as np
 import torch
 from PIL import Image
 from ultralytics.utils import LOGGER
 class FastSAMPrompt:
@ -15,8 +18,8 @@ class FastSAMPrompt:
        # self.img_path = img_path
        self.device = device
        self.results = results
-        self.img_path = img_path
+        self.img_path = str(img_path)
-        self.ori_img = cv2.imread(img_path)
+        self.ori_img = cv2.imread(self.img_path)
        # Import and assign clip
        try:
@ -111,7 +114,7 @@ class FastSAMPrompt:
        original_w = image.shape[1]
        # for macOS only
        # plt.switch_backend('TkAgg')
-        plt.figure(figsize=(original_w / 100, original_h / 100))
+        fig = plt.figure(figsize=(original_w / 100, original_h / 100))
        # Add subplot with no margin.
        plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
        plt.margins(0, 0)
@ -174,21 +177,11 @@ class FastSAMPrompt:
            contour_mask = temp / 255 * color.reshape(1, 1, -1)
            plt.imshow(contour_mask)
-        save_path = output
+        save_path = Path(output) / result_name
-        if not os.path.exists(save_path):
+        save_path.parent.mkdir(exist_ok=True, parents=True)
            os.makedirs(save_path)
        plt.axis('off')
-        fig = plt.gcf()
+        fig.savefig(save_path)
-        plt.draw()
+        LOGGER.info(f'Saved to {save_path.absolute()}')
        try:
            buf = fig.canvas.tostring_rgb()
        except AttributeError:
            fig.canvas.draw()
            buf = fig.canvas.tostring_rgb()
        cols, rows = fig.canvas.get_width_height()
        img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
        cv2.imwrite(os.path.join(save_path, result_name), cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
    #   CPU post process
    def fast_show_mask(
--- a/ultralytics/models/fastsam/val.py
+++ b/ultralytics/models/fastsam/val.py
@ -1,231 +1,14 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-from multiprocessing.pool import ThreadPool
+from ultralytics.models.yolo.segment import SegmentationValidator
-from pathlib import Path
+from ultralytics.utils.metrics import SegmentMetrics
 import numpy as np
 import torch
 import torch.nn.functional as F
-from ultralytics.models.yolo.detect import DetectionValidator
+class FastSAMValidator(SegmentationValidator):
 from ultralytics.utils import LOGGER, NUM_THREADS, ops
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou
 from ultralytics.utils.plotting import output_to_target, plot_images
 class FastSAMValidator(DetectionValidator):
    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics."""
        super().__init__(dataloader, save_dir, pbar, args, _callbacks)
        self.args.task = 'segment'
        self.args.plots = False  # disable ConfusionMatrix and other plots to avoid errors
        self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
    def preprocess(self, batch):
        """Preprocesses batch by converting masks to float and sending to device."""
        batch = super().preprocess(batch)
        batch['masks'] = batch['masks'].to(self.device).float()
        return batch
    def init_metrics(self, model):
        """Initialize metrics and select mask processing function based on save_json flag."""
        super().init_metrics(model)
        self.plot_masks = []
        if self.args.save_json:
            check_requirements('pycocotools>=2.0.6')
            self.process = ops.process_mask_upsample  # more accurate
        else:
            self.process = ops.process_mask  # faster
    def get_desc(self):
        """Return a formatted description of evaluation metrics."""
        return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P',
                                         'R', 'mAP50', 'mAP50-95)')
    def postprocess(self, preds):
        """Post-processes YOLO predictions and returns output detections with proto."""
        p = ops.non_max_suppression(preds[0],
                                    self.args.conf,
                                    self.args.iou,
                                    labels=self.lb,
                                    multi_label=True,
                                    agnostic=self.args.single_cls,
                                    max_det=self.args.max_det,
                                    nc=self.nc)
        proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
        return p, proto
    def update_metrics(self, preds, batch):
        """Metrics."""
        for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
            idx = batch['batch_idx'] == si
            cls = batch['cls'][idx]
            bbox = batch['bboxes'][idx]
            nl, npr = cls.shape[0], pred.shape[0]  # number of labels, predictions
            shape = batch['ori_shape'][si]
            correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device)  # init
            correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device)  # init
            self.seen += 1
            if npr == 0:
                if nl:
                    self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
                        (2, 0), device=self.device), cls.squeeze(-1)))
                    if self.args.plots:
                        self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
                continue
            # Masks
            midx = [si] if self.args.overlap_mask else idx
            gt_masks = batch['masks'][midx]
            pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:])
            # Predictions
            if self.args.single_cls:
                pred[:, 5] = 0
            predn = pred.clone()
            ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
                            ratio_pad=batch['ratio_pad'][si])  # native-space pred
            # Evaluate
            if nl:
                height, width = batch['img'].shape[2:]
                tbox = ops.xywh2xyxy(bbox) * torch.tensor(
                    (width, height, width, height), device=self.device)  # target boxes
                ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
                                ratio_pad=batch['ratio_pad'][si])  # native-space labels
                labelsn = torch.cat((cls, tbox), 1)  # native-space labels
                correct_bboxes = self._process_batch(predn, labelsn)
                # TODO: maybe remove these `self.` arguments as they already are member variable
                correct_masks = self._process_batch(predn,
                                                    labelsn,
                                                    pred_masks,
                                                    gt_masks,
                                                    overlap=self.args.overlap_mask,
                                                    masks=True)
                if self.args.plots:
                    self.confusion_matrix.process_batch(predn, labelsn)
            # Append correct_masks, correct_boxes, pconf, pcls, tcls
            self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
            if self.args.plots and self.batch_i < 3:
                self.plot_masks.append(pred_masks[:15].cpu())  # filter top 15 to plot
            # Save
            if self.args.save_json:
                pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                                             shape,
                                             ratio_pad=batch['ratio_pad'][si])
                self.pred_to_json(predn, batch['im_file'][si], pred_masks)
            # if self.args.save_txt:
            #    save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
    def finalize_metrics(self, *args, **kwargs):
        """Sets speed and confusion matrix for evaluation metrics."""
        self.metrics.speed = self.speed
        self.metrics.confusion_matrix = self.confusion_matrix
    def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
        """
        Return correct prediction matrix
        Arguments:
            detections (array[N, 6]), x1, y1, x2, y2, conf, class
            labels (array[M, 5]), class, x1, y1, x2, y2
        Returns:
            correct (array[N, 10]), for 10 IoU levels
        """
        if masks:
            if overlap:
                nl = len(labels)
                index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
                gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
                gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
            if gt_masks.shape[1:] != pred_masks.shape[1:]:
                gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode='bilinear', align_corners=False)[0]
                gt_masks = gt_masks.gt_(0.5)
            iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
        else:  # boxes
            iou = box_iou(labels[:, 1:], detections[:, :4])
        return self.match_predictions(detections[:, 5], labels[:, 0], iou)
    def plot_val_samples(self, batch, ni):
        """Plots validation samples with bounding box labels."""
        plot_images(batch['img'],
                    batch['batch_idx'],
                    batch['cls'].squeeze(-1),
                    batch['bboxes'],
                    batch['masks'],
                    paths=batch['im_file'],
                    fname=self.save_dir / f'val_batch{ni}_labels.jpg',
                    names=self.names,
                    on_plot=self.on_plot)
    def plot_predictions(self, batch, preds, ni):
        """Plots batch predictions with masks and bounding boxes."""
        plot_images(
            batch['img'],
            *output_to_target(preds[0], max_det=15),  # not set to self.args.max_det due to slow plotting speed
            torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks,
            paths=batch['im_file'],
            fname=self.save_dir / f'val_batch{ni}_pred.jpg',
            names=self.names,
            on_plot=self.on_plot)  # pred
        self.plot_masks.clear()
    def pred_to_json(self, predn, filename, pred_masks):
        """Save one JSON result."""
        # Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
        from pycocotools.mask import encode  # noqa
        def single_encode(x):
            """Encode predicted masks as RLE and append results to jdict."""
            rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0]
            rle['counts'] = rle['counts'].decode('utf-8')
            return rle
        stem = Path(filename).stem
        image_id = int(stem) if stem.isnumeric() else stem
        box = ops.xyxy2xywh(predn[:, :4])  # xywh
        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
        pred_masks = np.transpose(pred_masks, (2, 0, 1))
        with ThreadPool(NUM_THREADS) as pool:
            rles = pool.map(single_encode, pred_masks)
        for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
            self.jdict.append({
                'image_id': image_id,
                'category_id': self.class_map[int(p[5])],
                'bbox': [round(x, 3) for x in b],
                'score': round(p[4], 5),
                'segmentation': rles[i]})
    def eval_json(self, stats):
        """Return COCO-style object detection evaluation metrics."""
        if self.args.save_json and self.is_coco and len(self.jdict):
            anno_json = self.data['path'] / 'annotations/instances_val2017.json'  # annotations
            pred_json = self.save_dir / 'predictions.json'  # predictions
            LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
            try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
                check_requirements('pycocotools>=2.0.6')
                from pycocotools.coco import COCO  # noqa
                from pycocotools.cocoeval import COCOeval  # noqa
                for x in anno_json, pred_json:
                    assert x.is_file(), f'{x} file not found'
                anno = COCO(str(anno_json))  # init annotations api
                pred = anno.loadRes(str(pred_json))  # init predictions api (must pass string, not Path)
                for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
                    if self.is_coco:
                        eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files]  # im to eval
                    eval.evaluate()
                    eval.accumulate()
                    eval.summarize()
                    idx = i * 4 + 2
                    stats[self.metrics.keys[idx + 1]], stats[
                        self.metrics.keys[idx]] = eval.stats[:2]  # update mAP50-95 and mAP50
            except Exception as e:
                LOGGER.warning(f'pycocotools unable to run: {e}')
        return stats
--- a/ultralytics/models/rtdetr/model.py
+++ b/ultralytics/models/rtdetr/model.py
@ -16,7 +16,7 @@ class RTDETR(Model):
    """
    def __init__(self, model='rtdetr-l.pt') -> None:
-        if model and not model.split('.')[-1] in ('pt', 'yaml', 'yml'):
+        if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
            raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
        super().__init__(model=model, task='detect')
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -9,7 +9,7 @@ import torch
 import torch.nn as nn
 from torch.nn.init import constant_, xavier_uniform_
-from ultralytics.utils.tal import dist2bbox, make_anchors
+from ultralytics.utils.tal import TORCH_1_10, dist2bbox, make_anchors
 from .block import DFL, Proto
 from .conv import Conv
@ -267,9 +267,9 @@ class RTDETRDecoder(nn.Module):
    def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
        anchors = []
        for i, (h, w) in enumerate(shapes):
-            grid_y, grid_x = torch.meshgrid(torch.arange(end=h, dtype=dtype, device=device),
+            sy = torch.arange(end=h, dtype=dtype, device=device)
-                                            torch.arange(end=w, dtype=dtype, device=device),
+            sx = torch.arange(end=w, dtype=dtype, device=device)
-                                            indexing='ij')
+            grid_y, grid_x = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
            grid_xy = torch.stack([grid_x, grid_y], -1)  # (h, w, 2)
            valid_WH = torch.tensor([h, w], dtype=dtype, device=device)
--- a/ultralytics/nn/modules/transformer.py
+++ b/ultralytics/nn/modules/transformer.py
@ -22,6 +22,10 @@ class TransformerEncoderLayer(nn.Module):
    def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
        super().__init__()
        from ...utils.torch_utils import TORCH_1_9
        if not TORCH_1_9:
            raise ModuleNotFoundError(
                'TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).')
        self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True)
        # Implementation of Feedforward model
        self.fc1 = nn.Linear(c1, cm)
`@ -1,3 +1,5 @@`
		`# Ultralytics YOLO 🚀, AGPL-3.0 license`

	`import pytest`	`import pytest`