Merge branch 'main' into replace-thop

replace-thop
Ultralytics Assistant 7 months ago committed by GitHub
commit 4f5869fd59
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 5
      .github/workflows/format.yml
  2. 2
      .github/workflows/links.yml
  3. 6
      docs/build_docs.py
  4. 2
      docs/en/datasets/obb/index.md
  5. 4
      docs/en/reference/models/fastsam/utils.md
  6. 1
      docs/mkdocs_github_authors.yaml
  7. 2
      tests/test_python.py
  8. 2
      ultralytics/__init__.py
  9. 89
      ultralytics/models/fastsam/predict.py
  10. 42
      ultralytics/models/fastsam/utils.py
  11. 3
      ultralytics/models/nas/model.py
  12. 4
      ultralytics/models/yolo/classify/predict.py
  13. 4
      ultralytics/models/yolo/detect/predict.py
  14. 4
      ultralytics/models/yolo/pose/predict.py
  15. 4
      ultralytics/models/yolo/segment/predict.py
  16. 2
      ultralytics/utils/downloads.py

@ -5,6 +5,8 @@
name: Ultralytics Actions
on:
issues:
types: [opened, edited]
pull_request_target:
branches: [main]
types: [opened, closed, synchronize]
@ -16,7 +18,8 @@ jobs:
- name: Run Ultralytics Formatting
uses: ultralytics/actions@main
with:
token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, do not modify
token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # note GITHUB_TOKEN automatically generated
labels: true # autolabel issues and PRs
python: true # format Python code and docstrings
markdown: true # format Markdown
prettier: true # format YAML

@ -52,6 +52,7 @@ jobs:
--exclude-path docs/hi \
--exclude-path docs/ar \
--github-token ${{ secrets.GITHUB_TOKEN }} \
--header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \
'./**/*.md' \
'./**/*.html'
@ -82,6 +83,7 @@ jobs:
--exclude-path docs/hi \
--exclude-path docs/ar \
--github-token ${{ secrets.GITHUB_TOKEN }} \
--header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \
'./**/*.md' \
'./**/*.html' \
'./**/*.yml' \

@ -192,7 +192,11 @@ def convert_plaintext_links_to_html(content):
for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items
for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
new_text = re.sub(r"(https?://\S+?)(?=[,.!?;:]?\s|[,.!?;:]?$)", r'<a href="\1">\1</a>', str(text_node))
new_text = re.sub(
r'(https?://[^\s()<>]+(?:\.[^\s()<>]+)+)(?<![.,:;\'"])',
r'<a href="\1">\1</a>',
str(text_node),
)
if "<a" in new_text:
new_soup = BeautifulSoup(new_text, "html.parser")
text_node.replace_with(new_soup)

@ -15,7 +15,7 @@ Training a precise object detection model with oriented bounding boxes (OBB) req
The YOLO OBB format designates bounding boxes by their four corner points with coordinates normalized between 0 and 1. It follows this format:
```bash
class_index, x1, y1, x2, y2, x3, y3, x4, y4
class_index x1 y1 x2 y2 x3 y3 x4 y4
```
Internally, YOLO processes losses and outputs in the `xywhr` format, which represents the bounding box's center point (xy), width, height, and rotation.

@ -13,8 +13,4 @@ keywords: FastSAM, bounding boxes, IoU, Ultralytics, image processing, computer
## ::: ultralytics.models.fastsam.utils.adjust_bboxes_to_image_border
<br><br><hr><br>
## ::: ultralytics.models.fastsam.utils.bbox_iou
<br><br>

@ -36,6 +36,7 @@ plashchynski@gmail.com: plashchynski
priytosh.revolution@live.com: priytosh-tripathi
rulosanti@gmail.com: null
shuizhuyuanluo@126.com: null
sometimesocrazy@gmail.com: null
stormsson@users.noreply.github.com: stormsson
waxmann.sergiu@me.com: sergiuwaxmann
web@ultralytics.com: UltralyticsAssistant

@ -95,7 +95,7 @@ def test_predict_img(model_name):
Image.open(SOURCE), # PIL
np.zeros((320, 640, 3), dtype=np.uint8), # numpy
]
assert len(model(batch, imgsz=32, augment=True)) == len(batch) # multiple sources in a batch
assert len(model(batch, imgsz=32)) == len(batch) # multiple sources in a batch
@pytest.mark.parametrize("model", MODELS)

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.2.62"
__version__ = "8.2.63"
import os

@ -1,84 +1,31 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import torch
from ultralytics.engine.results import Results
from ultralytics.models.fastsam.utils import bbox_iou
from ultralytics.models.yolo.detect.predict import DetectionPredictor
from ultralytics.utils import DEFAULT_CFG, ops
from ultralytics.models.yolo.segment import SegmentationPredictor
from ultralytics.utils.metrics import box_iou
from .utils import adjust_bboxes_to_image_border
class FastSAMPredictor(DetectionPredictor):
class FastSAMPredictor(SegmentationPredictor):
"""
FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics
YOLO framework.
This class extends the DetectionPredictor, customizing the prediction pipeline specifically for fast SAM.
It adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing
for single-class segmentation.
Attributes:
cfg (dict): Configuration parameters for prediction.
overrides (dict, optional): Optional parameter overrides for custom behavior.
_callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
This class extends the SegmentationPredictor, customizing the prediction pipeline specifically for fast SAM. It
adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing for single-
class segmentation.
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""
Initializes the FastSAMPredictor class, inheriting from DetectionPredictor and setting the task to 'segment'.
Args:
cfg (dict): Configuration parameters for prediction.
overrides (dict, optional): Optional parameter overrides for custom behavior.
_callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
"""
super().__init__(cfg, overrides, _callbacks)
self.args.task = "segment"
def postprocess(self, preds, img, orig_imgs):
"""
Perform post-processing steps on predictions, including non-max suppression and scaling boxes to original image
size, and returns the final results.
Args:
preds (list): The raw output predictions from the model.
img (torch.Tensor): The processed image tensor.
orig_imgs (list | torch.Tensor): The original image or list of images.
Returns:
(list): A list of Results objects, each containing processed boxes, masks, and other metadata.
"""
p = ops.non_max_suppression(
preds[0],
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
nc=1, # set to 1 class since SAM has no class predictions
classes=self.args.classes,
)
full_box = torch.zeros(p[0].shape[1], device=p[0].device)
full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
full_box = full_box.view(1, -1)
critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
if critical_iou_index.numel() != 0:
full_box[0][4] = p[0][critical_iou_index][:, 4]
full_box[0][6:] = p[0][critical_iou_index][:, 6:]
p[0][critical_iou_index] = full_box
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
results = []
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
for i, (pred, orig_img, img_path) in enumerate(zip(p, orig_imgs, self.batch[0])):
if not len(pred): # save empty boxes
masks = None
elif self.args.retina_masks:
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
else:
masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
"""Applies box postprocess for FastSAM predictions."""
results = super().postprocess(preds, img, orig_imgs)
for result in results:
full_box = torch.tensor(
[0, 0, result.orig_shape[1], result.orig_shape[0]], device=preds[0].device, dtype=torch.float32
)
boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
if idx.numel() != 0:
result.boxes.xyxy[idx] = full_box
return results

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import torch
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
"""
@ -25,43 +23,3 @@ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
boxes[boxes[:, 2] > w - threshold, 2] = w # x2
boxes[boxes[:, 3] > h - threshold, 3] = h # y2
return boxes
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
"""
Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
Args:
box1 (torch.Tensor): (4, )
boxes (torch.Tensor): (n, 4)
iou_thres (float): IoU threshold
image_shape (tuple): (height, width)
raw_output (bool): If True, return the raw IoU values instead of the indices
Returns:
high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
"""
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
# Obtain coordinates for intersections
x1 = torch.max(box1[0], boxes[:, 0])
y1 = torch.max(box1[1], boxes[:, 1])
x2 = torch.min(box1[2], boxes[:, 2])
y2 = torch.min(box1[3], boxes[:, 3])
# Compute the area of intersection
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
# Compute the area of both individual boxes
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# Compute the area of union
union = box1_area + box2_area - intersection
# Compute the IoU
iou = intersection / union # Should be shape (n, )
if raw_output:
return 0 if iou.numel() == 0 else iou
# return indices of boxes with IoU > thres
return torch.nonzero(iou > iou_thres).flatten()

@ -16,6 +16,7 @@ from pathlib import Path
import torch
from ultralytics.engine.model import Model
from ultralytics.utils.downloads import attempt_download_asset
from ultralytics.utils.torch_utils import model_info, smart_inference_mode
from .predict import NASPredictor
@ -56,7 +57,7 @@ class NAS(Model):
suffix = Path(weights).suffix
if suffix == ".pt":
self.model = torch.load(weights)
self.model = torch.load(attempt_download_asset(weights))
elif suffix == "":
self.model = super_gradients.training.models.get(weights, pretrained_weights="coco")
# Standardize model

@ -54,8 +54,6 @@ class ClassificationPredictor(BasePredictor):
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
results = []
for i, pred in enumerate(preds):
orig_img = orig_imgs[i]
img_path = self.batch[0][i]
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
results.append(Results(orig_img, path=img_path, names=self.model.names, probs=pred))
return results

@ -35,9 +35,7 @@ class DetectionPredictor(BasePredictor):
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
results = []
for i, pred in enumerate(preds):
orig_img = orig_imgs[i]
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
img_path = self.batch[0][i]
results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
return results

@ -46,12 +46,10 @@ class PosePredictor(DetectionPredictor):
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
results = []
for i, pred in enumerate(preds):
orig_img = orig_imgs[i]
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round()
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
img_path = self.batch[0][i]
results.append(
Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts)
)

@ -42,9 +42,7 @@ class SegmentationPredictor(DetectionPredictor):
results = []
proto = preds[1][-1] if isinstance(preds[1], tuple) else preds[1] # tuple if PyTorch model or array if exported
for i, pred in enumerate(p):
orig_img = orig_imgs[i]
img_path = self.batch[0][i]
for i, (pred, orig_img, img_path) in enumerate(zip(p, orig_imgs, self.batch[0])):
if not len(pred): # save empty boxes
masks = None
elif self.args.retina_masks:

@ -199,7 +199,7 @@ def check_disk_space(url="https://ultralytics.com/assets/coco8.zip", path=Path.c
Check if there is sufficient disk space to download and store a file.
Args:
url (str, optional): The URL to the file. Defaults to 'https://github.com/ultralytics/assets/releases/download/v0.0.0/coco8.zip'.
url (str, optional): The URL to the file. Defaults to 'https://ultralytics.com/assets/coco8.zip'.
path (str | Path, optional): The path or drive to check the available free space on.
sf (float, optional): Safety factor, the multiplier for the required free space. Defaults to 2.0.
hard (bool, optional): Whether to throw an error or not on insufficient disk space. Defaults to True.

Loading…
Cancel
Save