From a76f5293b4c87216879e0a5a71f7afe5cba22201 Mon Sep 17 00:00:00 2001 From: Muhammad Rizwan Munawar Date: Fri, 25 Oct 2024 04:53:56 +0500 Subject: [PATCH 01/12] Enable default cfg for similar args in solutions. (#17112) Co-authored-by: UltralyticsAssistant Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Glenn Jocher --- ultralytics/cfg/solutions/default.yaml | 15 ++++++++------- ultralytics/solutions/parking_management.py | 1 - ultralytics/solutions/solutions.py | 18 +++++++++--------- ultralytics/utils/__init__.py | 2 ++ 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/ultralytics/cfg/solutions/default.yaml b/ultralytics/cfg/solutions/default.yaml index a353fd2a21..69e430b8c3 100644 --- a/ultralytics/cfg/solutions/default.yaml +++ b/ultralytics/cfg/solutions/default.yaml @@ -1,18 +1,19 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license - # Configuration for Ultralytics Solutions -model: "yolo11n.pt" # The Ultralytics YOLO11 model to be used (e.g., yolo11n.pt for YOLO11 nano version and yolov8n.pt for YOLOv8 nano version) - +# Object counting settings region: # Object counting, queue or speed estimation region points. Default region points are [(20, 400), (1080, 404), (1080, 360), (20, 360)] -line_width: 2 # Width of the annotator used to draw regions on the image/video frames + bounding boxes and tracks drawing. Default value is 2. -show: True # Flag to control whether to display output image or not, you can set this as False i.e. when deploying it on some embedded devices. show_in: True # Flag to display objects moving *into* the defined region show_out: True # Flag to display objects moving *out of* the defined region -classes: # To count specific classes. i.e, if you want to detect, track and count the person with COCO model, you can use classes=0, Default its None + +# Heatmaps settings +colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization. + +# Workouts monitoring settings up_angle: 145.0 # Workouts up_angle for counts, 145.0 is default value. You can adjust it for different workouts, based on position of keypoints. down_angle: 90 # Workouts down_angle for counts, 90 is default value. You can change it for different workouts, based on position of keypoints. kpts: [6, 8, 10] # Keypoints for workouts monitoring, i.e. If you want to consider keypoints for pushups that have mostly values of [6, 8, 10]. -colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization. + +# Analytics settings analytics_type: "line" # Analytics type i.e "line", "pie", "bar" or "area" charts. By default, "line" analytics will be used for processing. json_file: # parking system regions file path. diff --git a/ultralytics/solutions/parking_management.py b/ultralytics/solutions/parking_management.py index fa815938ab..a62de99524 100644 --- a/ultralytics/solutions/parking_management.py +++ b/ultralytics/solutions/parking_management.py @@ -168,7 +168,6 @@ class ParkingManagement(BaseSolution): Examples: >>> from ultralytics.solutions import ParkingManagement >>> parking_manager = ParkingManagement(model="yolov8n.pt", json_file="parking_regions.json") - >>> results = parking_manager(source="parking_lot_video.mp4") >>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}") >>> print(f"Available spaces: {parking_manager.pr_info['Available']}") """ diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py index 1af0c0ba09..e43aba6441 100644 --- a/ultralytics/solutions/solutions.py +++ b/ultralytics/solutions/solutions.py @@ -1,16 +1,13 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license from collections import defaultdict -from pathlib import Path import cv2 from ultralytics import YOLO -from ultralytics.utils import LOGGER, yaml_load +from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_SOL_DICT, LOGGER from ultralytics.utils.checks import check_imshow, check_requirements -DEFAULT_SOL_CFG_PATH = Path(__file__).resolve().parents[1] / "cfg/solutions/default.yaml" - class BaseSolution: """ @@ -55,15 +52,18 @@ class BaseSolution: self.Point = Point # Load config and update with args - self.CFG = yaml_load(DEFAULT_SOL_CFG_PATH) - self.CFG.update(kwargs) - LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}") + DEFAULT_SOL_DICT.update(kwargs) + DEFAULT_CFG_DICT.update(kwargs) + self.CFG = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT} + LOGGER.info(f"Ultralytics Solutions: ✅ {DEFAULT_SOL_DICT}") self.region = self.CFG["region"] # Store region data for other classes usage - self.line_width = self.CFG["line_width"] # Store line_width for usage + self.line_width = ( + self.CFG["line_width"] if self.CFG["line_width"] is not None else 2 + ) # Store line_width for usage # Load Model and store classes names - self.model = YOLO(self.CFG["model"]) + self.model = YOLO(self.CFG["model"] if self.CFG["model"] else "yolov8n.pt") self.names = self.model.names # Initialize environment and region setup diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index 05a4f464b7..50ef7e5ca1 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -38,6 +38,7 @@ FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO ASSETS = ROOT / "assets" # default images DEFAULT_CFG_PATH = ROOT / "cfg/default.yaml" +DEFAULT_SOL_CFG_PATH = ROOT / "cfg/solutions/default.yaml" # Ultralytics solutions yaml path NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLO multiprocessing threads AUTOINSTALL = str(os.getenv("YOLO_AUTOINSTALL", True)).lower() == "true" # global auto-install mode VERBOSE = str(os.getenv("YOLO_VERBOSE", True)).lower() == "true" # global verbose mode @@ -508,6 +509,7 @@ def yaml_print(yaml_file: Union[str, Path, dict]) -> None: # Default configuration DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH) +DEFAULT_SOL_DICT = yaml_load(DEFAULT_SOL_CFG_PATH) # Ultralytics solutions configuration for k, v in DEFAULT_CFG_DICT.items(): if isinstance(v, str) and v.lower() == "none": DEFAULT_CFG_DICT[k] = None From 55eec8347f783153478fcee46b49e29a036a3ba8 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 25 Oct 2024 01:54:48 +0200 Subject: [PATCH 02/12] Add Binder Notebook badge (#17074) Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> --- README.md | 1 + README.zh-CN.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index c70ed6a41b..51f13230ed 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Run Ultralytics on Gradient Open Ultralytics In Colab Open Ultralytics In Kaggle + Open Ultralytics In Binder
diff --git a/README.zh-CN.md b/README.zh-CN.md index 53cb7e05d6..d7665f166d 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -17,6 +17,7 @@ Run Ultralytics on Gradient Open Ultralytics In Colab Open Ultralytics In Kaggle + Open Ultralytics In Binder
From f6c378835b0904b785cf19cdc492bdcfbd35bbf2 Mon Sep 17 00:00:00 2001 From: Laughing <61612323+Laughing-q@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:17:46 +0800 Subject: [PATCH 03/12] `ultralytics 8.3.22` SAM2.1 integration (#17131) Co-authored-by: UltralyticsAssistant Co-authored-by: Glenn Jocher --- docs/en/models/sam-2.md | 28 +++-- tests/test_cuda.py | 2 +- ultralytics/__init__.py | 2 +- ultralytics/cfg/__init__.py | 2 +- ultralytics/models/sam/build.py | 8 ++ ultralytics/models/sam/modules/sam.py | 147 ++++++++++++++++++++------ 6 files changed, 142 insertions(+), 47 deletions(-) diff --git a/docs/en/models/sam-2.md b/docs/en/models/sam-2.md index 5120498e24..d5e8888e29 100644 --- a/docs/en/models/sam-2.md +++ b/docs/en/models/sam-2.md @@ -1,9 +1,13 @@ --- comments: true description: Discover SAM 2, the next generation of Meta's Segment Anything Model, supporting real-time promptable segmentation in both images and videos with state-of-the-art performance. Learn about its key features, datasets, and how to use it. -keywords: SAM 2, Segment Anything, video segmentation, image segmentation, promptable segmentation, zero-shot performance, SA-V dataset, Ultralytics, real-time segmentation, AI, machine learning +keywords: SAM 2, SAM 2.1, Segment Anything, video segmentation, image segmentation, promptable segmentation, zero-shot performance, SA-V dataset, Ultralytics, real-time segmentation, AI, machine learning --- +!!! tip "SAM 2.1" + + We have just supported the more accurate SAM2.1 model. Please give it a try! + # SAM 2: Segment Anything Model 2 SAM 2, the successor to Meta's [Segment Anything Model (SAM)](sam.md), is a cutting-edge tool designed for comprehensive object segmentation in both images and videos. It excels in handling complex visual data through a unified, promptable model architecture that supports real-time processing and zero-shot generalization. @@ -114,12 +118,16 @@ pip install ultralytics The following table details the available SAM 2 models, their pre-trained weights, supported tasks, and compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). -| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | -| ----------- | ------------------------------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ | -| SAM 2 tiny | [sam2_t.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_t.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | -| SAM 2 small | [sam2_s.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_s.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | -| SAM 2 base | [sam2_b.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_b.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | -| SAM 2 large | [sam2_l.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_l.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +| ------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ | +| SAM 2 tiny | [sam2_t.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_t.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2 small | [sam2_s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_s.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2 base | [sam2_b.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_b.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2 large | [sam2_l.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_l.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2.1 tiny | [sam2.1_t.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_t.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2.1 small | [sam2.1_s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_s.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2.1 base | [sam2.1_b.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_b.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | +| SAM 2.1 large | [sam2.1_l.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_l.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ | ### SAM 2 Prediction Examples @@ -137,7 +145,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide from ultralytics import SAM # Load a model - model = SAM("sam2_b.pt") + model = SAM("sam2.1_b.pt") # Display model information (optional) model.info() @@ -170,7 +178,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide from ultralytics import SAM # Load a model - model = SAM("sam2_b.pt") + model = SAM("sam2.1_b.pt") # Display model information (optional) model.info() @@ -183,7 +191,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide ```bash # Run inference with a SAM 2 model - yolo predict model=sam2_b.pt source=path/to/video.mp4 + yolo predict model=sam2.1_b.pt source=path/to/video.mp4 ``` - This example demonstrates how SAM 2 can be used to segment the entire content of an image or video if no prompts (bboxes/points/masks) are provided. diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 89f8c39b25..4fd1a7aee3 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -116,7 +116,7 @@ def test_predict_sam(): from ultralytics.models.sam import Predictor as SAMPredictor # Load a model - model = SAM(WEIGHTS_DIR / "sam_b.pt") + model = SAM(WEIGHTS_DIR / "sam2.1_b.pt") # Display model information (optional) model.info() diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index ac22fe8620..b9f92151a6 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.3.21" +__version__ = "8.3.22" import os diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 153ab27e38..7c36a3b918 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -787,7 +787,7 @@ def entrypoint(debug=""): from ultralytics import FastSAM model = FastSAM(model) - elif "sam_" in stem or "sam2_" in stem: + elif "sam_" in stem or "sam2_" in stem or "sam2.1_" in stem: from ultralytics import SAM model = SAM(model) diff --git a/ultralytics/models/sam/build.py b/ultralytics/models/sam/build.py index e110531244..cee5133a09 100644 --- a/ultralytics/models/sam/build.py +++ b/ultralytics/models/sam/build.py @@ -263,6 +263,7 @@ def _build_sam2( memory_attention = MemoryAttention(d_model=256, pos_enc_at_input=True, num_layers=4, layer=MemoryAttentionLayer()) memory_encoder = MemoryEncoder(out_dim=64) + is_sam2_1 = checkpoint is not None and "sam2.1" in checkpoint sam2 = SAM2Model( image_encoder=image_encoder, memory_attention=memory_attention, @@ -288,6 +289,9 @@ def _build_sam2( multimask_max_pt_num=1, use_mlp_for_obj_ptr_proj=True, compile_image_encoder=False, + no_obj_embed_spatial=is_sam2_1, + proj_tpos_enc_in_obj_ptrs=is_sam2_1, + use_signed_tpos_enc_to_obj_ptrs=is_sam2_1, sam_mask_decoder_extra_args=dict( dynamic_multimask_via_stability=True, dynamic_multimask_stability_delta=0.05, @@ -313,6 +317,10 @@ sam_model_map = { "sam2_s.pt": build_sam2_s, "sam2_b.pt": build_sam2_b, "sam2_l.pt": build_sam2_l, + "sam2.1_t.pt": build_sam2_t, + "sam2.1_s.pt": build_sam2_s, + "sam2.1_b.pt": build_sam2_b, + "sam2.1_l.pt": build_sam2_l, } diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py index 2728b0b481..562314b2b9 100644 --- a/ultralytics/models/sam/modules/sam.py +++ b/ultralytics/models/sam/modules/sam.py @@ -161,18 +161,19 @@ class SAM2Model(torch.nn.Module): use_multimask_token_for_obj_ptr: bool = False, iou_prediction_use_sigmoid=False, memory_temporal_stride_for_eval=1, - add_all_frames_to_correct_as_cond=False, non_overlap_masks_for_mem_enc=False, use_obj_ptrs_in_encoder=False, max_obj_ptrs_in_encoder=16, add_tpos_enc_to_obj_ptrs=True, proj_tpos_enc_in_obj_ptrs=False, + use_signed_tpos_enc_to_obj_ptrs=False, only_obj_ptrs_in_the_past_for_eval=False, pred_obj_scores: bool = False, pred_obj_scores_mlp: bool = False, fixed_no_obj_ptr: bool = False, soft_no_obj_ptr: bool = False, use_mlp_for_obj_ptr_proj: bool = False, + no_obj_embed_spatial: bool = False, sam_mask_decoder_extra_args=None, compile_image_encoder: bool = False, ): @@ -205,8 +206,6 @@ class SAM2Model(torch.nn.Module): use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers. iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1]. memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation. - add_all_frames_to_correct_as_cond (bool): Whether to append frames with correction clicks to conditioning - frame list. non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in memory encoder during evaluation. use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder. @@ -216,6 +215,9 @@ class SAM2Model(torch.nn.Module): the encoder. proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional encoding in object pointers. + use_signed_tpos_enc_to_obj_ptrs (bool): whether to use signed distance (instead of unsigned absolute distance) + in the temporal positional encoding in the object pointers, only relevant when both `use_obj_ptrs_in_encoder=True` + and `add_tpos_enc_to_obj_ptrs=True`. only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past during evaluation. pred_obj_scores (bool): Whether to predict if there is an object in the frame. @@ -223,6 +225,7 @@ class SAM2Model(torch.nn.Module): fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present. soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation. use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection. + no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames. sam_mask_decoder_extra_args (Dict | None): Extra arguments for constructing the SAM mask decoder. compile_image_encoder (bool): Whether to compile the image encoder for faster inference. @@ -253,6 +256,7 @@ class SAM2Model(torch.nn.Module): if proj_tpos_enc_in_obj_ptrs: assert add_tpos_enc_to_obj_ptrs # these options need to be used together self.proj_tpos_enc_in_obj_ptrs = proj_tpos_enc_in_obj_ptrs + self.use_signed_tpos_enc_to_obj_ptrs = use_signed_tpos_enc_to_obj_ptrs self.only_obj_ptrs_in_the_past_for_eval = only_obj_ptrs_in_the_past_for_eval # Part 2: memory attention to condition current frame's visual features @@ -309,9 +313,12 @@ class SAM2Model(torch.nn.Module): self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim)) trunc_normal_(self.no_obj_ptr, std=0.02) self.use_mlp_for_obj_ptr_proj = use_mlp_for_obj_ptr_proj + self.no_obj_embed_spatial = None + if no_obj_embed_spatial: + self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim)) + trunc_normal_(self.no_obj_embed_spatial, std=0.02) self._build_sam_heads() - self.add_all_frames_to_correct_as_cond = add_all_frames_to_correct_as_cond self.max_cond_frames_in_attn = max_cond_frames_in_attn # Model compilation @@ -533,8 +540,6 @@ class SAM2Model(torch.nn.Module): if self.pred_obj_scores: # Allow *soft* no obj ptr, unlike for masks if self.soft_no_obj_ptr: - # Only hard possible with gt - assert not self.teacher_force_obj_scores_for_mem lambda_is_obj_appearing = object_score_logits.sigmoid() else: lambda_is_obj_appearing = is_obj_appearing.float() @@ -647,6 +652,7 @@ class SAM2Model(torch.nn.Module): if self.num_maskmem == 0: # Disable memory and skip fusion return current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W) num_obj_ptr_tokens = 0 + tpos_sign_mul = -1 if track_in_reverse else 1 # Step 1: condition the visual features of the current frame on previous memories if not is_init_cond_frame: # Retrieve the memories encoded with the maskmem backbone @@ -664,7 +670,7 @@ class SAM2Model(torch.nn.Module): # the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1 # We also allow taking the memory frame non-consecutively (with r>1), in which case # we take (self.num_maskmem - 2) frames among every r-th frames plus the last frame. - r = self.memory_temporal_stride_for_eval + r = 1 if self.training else self.memory_temporal_stride_for_eval for t_pos in range(1, self.num_maskmem): t_rel = self.num_maskmem - t_pos # how many frames before current frame if t_rel == 1: @@ -718,7 +724,14 @@ class SAM2Model(torch.nn.Module): ptr_cond_outputs = selected_cond_outputs pos_and_ptrs = [ # Temporal pos encoding contains how far away each pointer is from current frame - (abs(frame_idx - t), out["obj_ptr"]) + ( + ( + (frame_idx - t) * tpos_sign_mul + if self.use_signed_tpos_enc_to_obj_ptrs + else abs(frame_idx - t) + ), + out["obj_ptr"], + ) for t, out in ptr_cond_outputs.items() ] # Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame @@ -787,6 +800,7 @@ class SAM2Model(torch.nn.Module): current_vision_feats, feat_sizes, pred_masks_high_res, + object_score_logits, is_mask_from_pts, ): """Encodes frame features and masks into a new memory representation for video segmentation.""" @@ -819,10 +833,17 @@ class SAM2Model(torch.nn.Module): ) maskmem_features = maskmem_out["vision_features"] maskmem_pos_enc = maskmem_out["vision_pos_enc"] + # add a no-object embedding to the spatial memory to indicate that the frame + # is predicted to be occluded (i.e. no object is appearing in the frame) + if self.no_obj_embed_spatial is not None: + is_obj_appearing = (object_score_logits > 0).float() + maskmem_features += (1 - is_obj_appearing[..., None, None]) * self.no_obj_embed_spatial[ + ..., None, None + ].expand(*maskmem_features.shape) return maskmem_features, maskmem_pos_enc - def track_step( + def _track_step( self, frame_idx, is_init_cond_frame, @@ -833,15 +854,7 @@ class SAM2Model(torch.nn.Module): mask_inputs, output_dict, num_frames, - track_in_reverse=False, # tracking in reverse time order (for demo usage) - # Whether to run the memory encoder on the predicted masks. Sometimes we might want - # to skip the memory encoder with `run_mem_encoder=False`. For example, - # in demo we might call `track_step` multiple times for each user click, - # and only encode the memory when the user finalizes their clicks. And in ablation - # settings like SAM training on static images, we don't need the memory encoder. - run_mem_encoder=True, - # The previously predicted SAM mask logits (which can be fed together with new clicks in demo). - prev_sam_mask_logits=None, + prev_sam_mask_logits, ): """Performs a single tracking step, updating object masks and memory features based on current frame inputs.""" current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs} @@ -861,7 +874,7 @@ class SAM2Model(torch.nn.Module): sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs) else: # fused the visual feature with previous memory features in the memory bank - pix_feat_with_mem = self._prepare_memory_conditioned_features( + pix_feat = self._prepare_memory_conditioned_features( frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame, current_vision_feats=current_vision_feats[-1:], @@ -880,12 +893,78 @@ class SAM2Model(torch.nn.Module): mask_inputs = prev_sam_mask_logits multimask_output = self._use_multimask(is_init_cond_frame, point_inputs) sam_outputs = self._forward_sam_heads( - backbone_features=pix_feat_with_mem, + backbone_features=pix_feat, point_inputs=point_inputs, mask_inputs=mask_inputs, high_res_features=high_res_features, multimask_output=multimask_output, ) + return current_out, sam_outputs, high_res_features, pix_feat + + def _encode_memory_in_output( + self, + current_vision_feats, + feat_sizes, + point_inputs, + run_mem_encoder, + high_res_masks, + object_score_logits, + current_out, + ): + """Finally run the memory encoder on the predicted mask to encode, it into a new memory feature (that can be + used in future frames). + """ + if run_mem_encoder and self.num_maskmem > 0: + high_res_masks_for_mem_enc = high_res_masks + maskmem_features, maskmem_pos_enc = self._encode_new_memory( + current_vision_feats=current_vision_feats, + feat_sizes=feat_sizes, + pred_masks_high_res=high_res_masks_for_mem_enc, + object_score_logits=object_score_logits, + is_mask_from_pts=(point_inputs is not None), + ) + current_out["maskmem_features"] = maskmem_features + current_out["maskmem_pos_enc"] = maskmem_pos_enc + else: + current_out["maskmem_features"] = None + current_out["maskmem_pos_enc"] = None + + def track_step( + self, + frame_idx, + is_init_cond_frame, + current_vision_feats, + current_vision_pos_embeds, + feat_sizes, + point_inputs, + mask_inputs, + output_dict, + num_frames, + track_in_reverse=False, # tracking in reverse time order (for demo usage) + # Whether to run the memory encoder on the predicted masks. Sometimes we might want + # to skip the memory encoder with `run_mem_encoder=False`. For example, + # in demo we might call `track_step` multiple times for each user click, + # and only encode the memory when the user finalizes their clicks. And in ablation + # settings like SAM training on static images, we don't need the memory encoder. + run_mem_encoder=True, + # The previously predicted SAM mask logits (which can be fed together with new clicks in demo). + prev_sam_mask_logits=None, + ): + """Performs a single tracking step, updating object masks and memory features based on current frame inputs.""" + current_out, sam_outputs, _, _ = self._track_step( + frame_idx, + is_init_cond_frame, + current_vision_feats, + current_vision_pos_embeds, + feat_sizes, + point_inputs, + mask_inputs, + output_dict, + num_frames, + track_in_reverse, + prev_sam_mask_logits, + ) + ( _, _, @@ -893,28 +972,28 @@ class SAM2Model(torch.nn.Module): low_res_masks, high_res_masks, obj_ptr, - _, + object_score_logits, ) = sam_outputs current_out["pred_masks"] = low_res_masks current_out["pred_masks_high_res"] = high_res_masks current_out["obj_ptr"] = obj_ptr + if not self.training: + # Only add this in inference (to avoid unused param in activation checkpointing; + # it's mainly used in the demo to encode spatial memories w/ consolidated masks) + current_out["object_score_logits"] = object_score_logits # Finally run the memory encoder on the predicted mask to encode # it into a new memory feature (that can be used in future frames) - if run_mem_encoder and self.num_maskmem > 0: - high_res_masks_for_mem_enc = high_res_masks - maskmem_features, maskmem_pos_enc = self._encode_new_memory( - current_vision_feats=current_vision_feats, - feat_sizes=feat_sizes, - pred_masks_high_res=high_res_masks_for_mem_enc, - is_mask_from_pts=(point_inputs is not None), - ) - current_out["maskmem_features"] = maskmem_features - current_out["maskmem_pos_enc"] = maskmem_pos_enc - else: - current_out["maskmem_features"] = None - current_out["maskmem_pos_enc"] = None + self._encode_memory_in_output( + current_vision_feats, + feat_sizes, + point_inputs, + run_mem_encoder, + high_res_masks, + object_score_logits, + current_out, + ) return current_out From 98aa4bbd439aa978e45478e9e7fe207e6390eb0b Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Fri, 25 Oct 2024 19:45:40 +0800 Subject: [PATCH 04/12] Fix Python warning spam (#17162) --- ultralytics/utils/checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 60faef2c4b..9591d3dea2 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -335,7 +335,7 @@ def check_font(font="Arial.ttf"): return file -def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = True) -> bool: +def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = False) -> bool: """ Check current python version against the required minimum version. From f80d0d75c4d942b8252fdee521f64a7a76733ed5 Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Fri, 25 Oct 2024 19:48:28 +0800 Subject: [PATCH 05/12] Fix inaccurate example in Export docs (#17161) --- docs/en/modes/export.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/modes/export.md b/docs/en/modes/export.md index 4be5bd5b90..776d826445 100644 --- a/docs/en/modes/export.md +++ b/docs/en/modes/export.md @@ -136,13 +136,13 @@ INT8 quantization is an excellent way to compress the model and speed up inferen from ultralytics import YOLO model = YOLO("yolo11n.pt") # Load a model - model.export(format="onnx", int8=True) + model.export(format="engine", int8=True) ``` === "CLI" ```bash - yolo export model=yolo11n.pt format=onnx int8=True # export model with INT8 quantization + yolo export model=yolo11n.pt format=engine int8=True # export TensorRT model with INT8 quantization ``` INT8 quantization can be applied to various formats, such as TensorRT and CoreML. More details can be found in the [Export section](../modes/export.md). From 61a45508f8e8bc14246ae616b9de8a6104cb23ae Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 25 Oct 2024 13:49:42 +0200 Subject: [PATCH 06/12] Default W&B setting `False` (#17164) Co-authored-by: UltralyticsAssistant --- ultralytics/utils/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index 50ef7e5ca1..d9cd96e3c4 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -568,12 +568,16 @@ def is_kaggle(): def is_jupyter(): """ - Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace. + Check if the current script is running inside a Jupyter Notebook. Returns: (bool): True if running inside a Jupyter Notebook, False otherwise. + + Note: + - Only works on Colab and Kaggle, other environments like Jupyterlab and Paperspace are not reliably detectable. + - "get_ipython" in globals() method suffers false positives when IPython package installed manually. """ - return "get_ipython" in globals() + return IS_COLAB or IS_KAGGLE def is_docker() -> bool: @@ -801,10 +805,10 @@ def get_user_config_dir(sub_dir="Ultralytics"): PROC_DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant ONLINE = is_online() IS_COLAB = is_colab() +IS_KAGGLE = is_kaggle() IS_DOCKER = is_docker() IS_JETSON = is_jetson() IS_JUPYTER = is_jupyter() -IS_KAGGLE = is_kaggle() IS_PIP_PACKAGE = is_pip_package() IS_RASPBERRYPI = is_raspberrypi() GIT_DIR = get_git_dir() @@ -1195,7 +1199,7 @@ class SettingsManager(JSONDict): "neptune": True, # Neptune integration "raytune": True, # Ray Tune integration "tensorboard": True, # TensorBoard logging - "wandb": True, # Weights & Biases logging + "wandb": False, # Weights & Biases logging "vscode_msg": True, # VSCode messaging } From b5fd7f3378a7dbc551871b57b7c122f77b55193b Mon Sep 17 00:00:00 2001 From: Laughing <61612323+Laughing-q@users.noreply.github.com> Date: Fri, 25 Oct 2024 21:58:11 +0800 Subject: [PATCH 07/12] `ultralytics 8.3.23` fix `bbox2segment` when no segments generated (#17157) Co-authored-by: Glenn Jocher --- ultralytics/__init__.py | 2 +- ultralytics/data/converter.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index b9f92151a6..a48d3646c0 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.3.22" +__version__ = "8.3.23" import os diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index fe1aac10ae..fa5821418a 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -632,9 +632,10 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): txt_file = save_dir / lb_name cls = label["cls"] for i, s in enumerate(label["segments"]): + if len(s) == 0: + continue line = (int(cls[i]), *s.reshape(-1)) texts.append(("%g " * len(line)).rstrip() % line) - if texts: with open(txt_file, "a") as f: f.writelines(text + "\n" for text in texts) LOGGER.info(f"Generated segment labels saved in {save_dir}") From 1d6fd4c3cf6ccb33e4dc2372eecb470a1009bb8e Mon Sep 17 00:00:00 2001 From: Muhammad Rizwan Munawar Date: Sat, 26 Oct 2024 00:19:57 +0500 Subject: [PATCH 08/12] Update OBB predict examples with boats.jpg (#17052) Co-authored-by: UltralyticsAssistant Co-authored-by: Jan Knobloch <116908874+jk4e@users.noreply.github.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> --- docs/en/modes/predict.md | 2 +- docs/en/tasks/obb.md | 6 +++--- examples/tutorial.ipynb | 2 +- ultralytics/cfg/__init__.py | 4 +++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/en/modes/predict.md b/docs/en/modes/predict.md index a298294d59..4c69aa52d4 100644 --- a/docs/en/modes/predict.md +++ b/docs/en/modes/predict.md @@ -665,7 +665,7 @@ For more details see the [`Probs` class documentation](../reference/engine/resul model = YOLO("yolo11n-obb.pt") # Run inference on an image - results = model("bus.jpg") # results list + results = model("boats.jpg") # results list # View results for r in results: diff --git a/docs/en/tasks/obb.md b/docs/en/tasks/obb.md index 35e659ed47..621ffc783d 100644 --- a/docs/en/tasks/obb.md +++ b/docs/en/tasks/obb.md @@ -141,14 +141,14 @@ Use a trained YOLO11n-obb model to run predictions on images. model = YOLO("path/to/best.pt") # load a custom model # Predict with the model - results = model("https://ultralytics.com/images/bus.jpg") # predict on an image + results = model("https://ultralytics.com/images/boats.jpg") # predict on an image ``` === "CLI" ```bash - yolo obb predict model=yolo11n-obb.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model - yolo obb predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + yolo obb predict model=yolo11n-obb.pt source='https://ultralytics.com/images/boats.jpg' # predict with official model + yolo obb predict model=path/to/best.pt source='https://ultralytics.com/images/boats.jpg' # predict with custom model ```

diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 98c659b864..75dd455e9a 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -583,7 +583,7 @@ "\n", "model = YOLO('yolo11n-obb.pt') # load a pretrained YOLO OBB model\n", "model.train(data='dota8.yaml', epochs=3) # train the model\n", - "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + "model('https://ultralytics.com/images/boats.jpg') # predict on an image" ], "metadata": { "id": "IJNKClOOB5YS" diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 7c36a3b918..0af93a37d3 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -809,7 +809,9 @@ def entrypoint(debug=""): # Mode if mode in {"predict", "track"} and "source" not in overrides: - overrides["source"] = DEFAULT_CFG.source or ASSETS + overrides["source"] = ( + "https://ultralytics.com/images/boats.jpg" if task == "obb" else DEFAULT_CFG.source or ASSETS + ) LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.") elif mode in {"train", "val"}: if "data" not in overrides and "resume" not in overrides: From 8d203cf40dadb9343241d2f65bd4c84820f27430 Mon Sep 17 00:00:00 2001 From: Muhammad Rizwan Munawar Date: Sat, 26 Oct 2024 17:04:33 +0500 Subject: [PATCH 09/12] Add explorer depreciation message in `datasets/index.md` (#17179) Co-authored-by: UltralyticsAssistant --- docs/en/datasets/index.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md index 4b6bd6c968..5f165b2477 100644 --- a/docs/en/datasets/index.md +++ b/docs/en/datasets/index.md @@ -19,7 +19,11 @@ Ultralytics provides support for various datasets to facilitate computer vision Watch: Ultralytics Datasets Overview

-## Ultralytics Explorer 🚀 NEW +## Ultralytics Explorer + +!!! warning "Community Note ⚠️" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀 Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md). From d02a0e5d4fc3d587ab32261cb5da532cbc8978ed Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 27 Oct 2024 18:32:56 +0100 Subject: [PATCH 10/12] Ultralytics Cleanup Disk action in docker.yaml (#17194) --- .github/workflows/docker.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 8d9f749e19..c299bc5bfd 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -84,11 +84,8 @@ jobs: outputs: new_release: ${{ steps.check_tag.outputs.new_release }} steps: - - name: Cleanup disk - # Free up to 30GB of disk space per https://github.com/ultralytics/ultralytics/pull/15848 - uses: jlumbroso/free-disk-space@v1.3.1 - with: - tool-cache: true + - name: Cleanup disk space + uses: ultralytics/actions/cleanup-disk@main - name: Checkout repo uses: actions/checkout@v4 From e1e54602dfbf6b9c52aaaa340681a5a8b8f089f4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 27 Oct 2024 19:09:17 +0100 Subject: [PATCH 11/12] Disable HUB CI temporarily (#17196) --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 796a08968a..43f5d4cfeb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,7 +39,8 @@ on: jobs: HUB: - if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true')) + # if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true')) + if: github.repository == 'ultralytics/ultralytics' && 'workflow_dispatch' && github.event.inputs.hub == 'true' runs-on: ${{ matrix.os }} strategy: fail-fast: false From da1bf9d79d4063eb9f389f9e7509cd936e21a9cc Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 28 Oct 2024 20:17:35 +0100 Subject: [PATCH 12/12] Pin `numpy<=2.0.0` on macOS (#17221) --- pyproject.toml | 3 ++- ultralytics/engine/exporter.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f6cb23204a..2545739bab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,8 @@ classifiers = [ # Required dependencies ------------------------------------------------------------------------------------------------ dependencies = [ - "numpy>=1.23.0", # temporary patch for compat errors https://github.com/ultralytics/yolov5/actions/runs/9538130424/job/26286956354 + "numpy>=1.23.0", + "numpy<2.0.0; sys_platform == 'darwin'", # macOS OpenVINO errors https://github.com/ultralytics/ultralytics/pull/17221 "matplotlib>=3.3.0", "opencv-python>=4.6.0", "pillow>=7.1.2", diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 6d403a2afb..49e84af9f5 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -457,6 +457,7 @@ class Exporter: @try_export def export_openvino(self, prefix=colorstr("OpenVINO:")): """YOLO OpenVINO export.""" + # WARNING: numpy>=2.0.0 issue with OpenVINO on macOS https://github.com/ultralytics/ultralytics/pull/17221 check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}') # fix OpenVINO issue on ARM64 import openvino as ov