diff --git a/docs/reference/data/utils.md b/docs/reference/data/utils.md index 467c482711..39c8e149d0 100644 --- a/docs/reference/data/utils.md +++ b/docs/reference/data/utils.md @@ -45,6 +45,10 @@ keywords: Ultralytics, data utils, YOLO, img2label_paths, exif_size, polygon2mas ## ::: ultralytics.data.utils.polygons2masks_overlap

+--- +## ::: ultralytics.data.utils.find_dataset_yaml +

+ --- ## ::: ultralytics.data.utils.check_det_dataset

diff --git a/docs/reference/utils/__init__.md b/docs/reference/utils/__init__.md index 6369acbadf..31c22ad18b 100644 --- a/docs/reference/utils/__init__.md +++ b/docs/reference/utils/__init__.md @@ -9,6 +9,10 @@ keywords: Ultralytics, Utils, utilitarian functions, colorstr, yaml_save, set_lo Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/__init__.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/__init__.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏! +--- +## ::: ultralytics.utils.TQDM +

+ --- ## ::: ultralytics.utils.SimpleClass

diff --git a/docs/reference/utils/ops.md b/docs/reference/utils/ops.md index 9595f5ee07..eaa7978f9a 100644 --- a/docs/reference/utils/ops.md +++ b/docs/reference/utils/ops.md @@ -117,6 +117,10 @@ keywords: Ultralytics YOLO, Utility Operations, segment2box, make_divisible, cli ## ::: ultralytics.utils.ops.masks2segments

+--- +## ::: ultralytics.utils.ops.convert_torch2numpy_batch +

+ --- ## ::: ultralytics.utils.ops.clean_str

diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index d89edac321..752d7f2bea 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.169' +__version__ = '8.0.170' from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.models.fastsam import FastSAM diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py index eee46909fd..d6763ffba2 100644 --- a/ultralytics/engine/results.py +++ b/ultralytics/engine/results.py @@ -205,7 +205,7 @@ class Results(SimpleClass): ``` """ if img is None and isinstance(self.orig_img, torch.Tensor): - img = (self.orig_img[0].detach().permute(1, 2, 0).cpu().contiguous() * 255).to(torch.uint8).numpy() + img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy() # Deprecation warn TODO: remove in 8.2 if 'show_conf' in kwargs: diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py index a1c2712e18..f94a173816 100644 --- a/ultralytics/models/fastsam/predict.py +++ b/ultralytics/models/fastsam/predict.py @@ -30,21 +30,22 @@ class FastSAMPredictor(DetectionPredictor): full_box[0][4] = p[0][critical_iou_index][:, 4] full_box[0][6:] = p[0][critical_iou_index][:, 6:] p[0][critical_iou_index] = full_box + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported for i, pred in enumerate(p): - orig_img = orig_imgs[i] if is_list else orig_imgs + orig_img = orig_imgs[i] img_path = self.batch[0][i] if not len(pred): # save empty boxes masks = None elif self.args.retina_masks: - if is_list: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC else: masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC - if is_list: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) return results diff --git a/ultralytics/models/nas/predict.py b/ultralytics/models/nas/predict.py index b29d2dc244..fe06c2981f 100644 --- a/ultralytics/models/nas/predict.py +++ b/ultralytics/models/nas/predict.py @@ -23,12 +23,13 @@ class NASPredictor(BasePredictor): max_det=self.args.max_det, classes=self.args.classes) + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor for i, pred in enumerate(preds): - orig_img = orig_imgs[i] if is_list else orig_imgs - if is_list: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) img_path = self.batch[0][i] results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) return results diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py index b81e97eb4f..d79d370e27 100644 --- a/ultralytics/models/rtdetr/predict.py +++ b/ultralytics/models/rtdetr/predict.py @@ -27,8 +27,11 @@ class RTDETRPredictor(BasePredictor): """Postprocess predictions and returns a list of Results objects.""" nd = preds[0].shape[-1] bboxes, scores = preds[0].split((4, nd - 4), dim=-1) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor for i, bbox in enumerate(bboxes): # (300, 4) bbox = ops.xywh2xyxy(bbox) score, cls = scores[i].max(-1, keepdim=True) # (300, 1) @@ -36,11 +39,10 @@ class RTDETRPredictor(BasePredictor): if self.args.classes is not None: idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx pred = torch.cat([bbox, score, cls], dim=-1)[idx] # filter - orig_img = orig_imgs[i] if is_list else orig_imgs + orig_img = orig_imgs[i] oh, ow = orig_img.shape[:2] - if is_list: - pred[..., [0, 2]] *= ow - pred[..., [1, 3]] *= oh + pred[..., [0, 2]] *= ow + pred[..., [1, 3]] *= oh img_path = self.batch[0][i] results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) return results diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index e1c3b481d8..fbab9e5913 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -312,10 +312,13 @@ class Predictor(BasePredictor): pred_masks, pred_scores = preds[:2] pred_bboxes = preds[2] if self.segment_all else None names = dict(enumerate(str(i) for i in range(len(pred_masks)))) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor for i, masks in enumerate([pred_masks]): - orig_img = orig_imgs[i] if is_list else orig_imgs + orig_img = orig_imgs[i] if pred_bboxes is not None: pred_bboxes = ops.scale_boxes(img.shape[2:], pred_bboxes.float(), orig_img.shape, padding=False) cls = torch.arange(len(pred_masks), dtype=torch.int32, device=pred_masks.device) diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py index 18815137ce..a22616e574 100644 --- a/ultralytics/models/yolo/classify/predict.py +++ b/ultralytics/models/yolo/classify/predict.py @@ -4,7 +4,7 @@ import torch from ultralytics.engine.predictor import BasePredictor from ultralytics.engine.results import Results -from ultralytics.utils import DEFAULT_CFG +from ultralytics.utils import DEFAULT_CFG, ops class ClassificationPredictor(BasePredictor): @@ -38,10 +38,12 @@ class ClassificationPredictor(BasePredictor): def postprocess(self, preds, img, orig_imgs): """Post-processes predictions to return Results objects.""" + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor for i, pred in enumerate(preds): - orig_img = orig_imgs[i] if is_list else orig_imgs + orig_img = orig_imgs[i] img_path = self.batch[0][i] results.append(Results(orig_img, path=img_path, names=self.model.names, probs=pred)) return results diff --git a/ultralytics/models/yolo/detect/predict.py b/ultralytics/models/yolo/detect/predict.py index fdf3e176c5..28cbd7cecf 100644 --- a/ultralytics/models/yolo/detect/predict.py +++ b/ultralytics/models/yolo/detect/predict.py @@ -29,12 +29,13 @@ class DetectionPredictor(BasePredictor): max_det=self.args.max_det, classes=self.args.classes) + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor for i, pred in enumerate(preds): - orig_img = orig_imgs[i] if is_list else orig_imgs - if is_list: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) img_path = self.batch[0][i] results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) return results diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py index bf89fb1a9d..14ae40b118 100644 --- a/ultralytics/models/yolo/pose/predict.py +++ b/ultralytics/models/yolo/pose/predict.py @@ -37,10 +37,12 @@ class PosePredictor(DetectionPredictor): classes=self.args.classes, nc=len(self.model.names)) + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor for i, pred in enumerate(preds): - orig_img = orig_imgs[i] if is_list else orig_imgs + orig_img = orig_imgs[i] pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round() pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape) diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py index 3d650a1a3c..7d51f7d42a 100644 --- a/ultralytics/models/yolo/segment/predict.py +++ b/ultralytics/models/yolo/segment/predict.py @@ -32,21 +32,22 @@ class SegmentationPredictor(DetectionPredictor): max_det=self.args.max_det, nc=len(self.model.names), classes=self.args.classes) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + results = [] - is_list = isinstance(orig_imgs, list) # input images are a list, not a torch.Tensor proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported for i, pred in enumerate(p): - orig_img = orig_imgs[i] if is_list else orig_imgs + orig_img = orig_imgs[i] img_path = self.batch[0][i] if not len(pred): # save empty boxes masks = None elif self.args.retina_masks: - if is_list: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC else: masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC - if is_list: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) return results diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index e842294e14..872ce5fd60 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -112,8 +112,8 @@ class TQDM(tqdm_original): Custom Ultralytics tqdm class with different default arguments. Args: - (*args): Positional arguments passed to original tqdm. - (**kwargs): Keyword arguments, with custom defaults applied. + *args (list): Positional arguments passed to original tqdm. + **kwargs (dict): Keyword arguments, with custom defaults applied. """ def __init__(self, *args, **kwargs): diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index 7c90906cee..9089d0fa0f 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -771,6 +771,19 @@ def masks2segments(masks, strategy='largest'): return segments +def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray: + """ + Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout. + + Args: + batch (torch.Tensor): Input tensor batch of shape (Batch, Channels, Height, Width) and dtype torch.float32. + + Returns: + (np.ndarray): Output NumPy array batch of shape (Batch, Height, Width, Channels) and dtype uint8. + """ + return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy() + + def clean_str(s): """ Cleans a string by replacing special characters with underscore _