diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 796a08968a..43f5d4cfeb 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -39,7 +39,8 @@ on:
jobs:
HUB:
- if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true'))
+ # if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true'))
+ if: github.repository == 'ultralytics/ultralytics' && 'workflow_dispatch' && github.event.inputs.hub == 'true'
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 8d9f749e19..c299bc5bfd 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -84,11 +84,8 @@ jobs:
outputs:
new_release: ${{ steps.check_tag.outputs.new_release }}
steps:
- - name: Cleanup disk
- # Free up to 30GB of disk space per https://github.com/ultralytics/ultralytics/pull/15848
- uses: jlumbroso/free-disk-space@v1.3.1
- with:
- tool-cache: true
+ - name: Cleanup disk space
+ uses: ultralytics/actions/cleanup-disk@main
- name: Checkout repo
uses: actions/checkout@v4
diff --git a/README.md b/README.md
index c70ed6a41b..51f13230ed 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@
+
diff --git a/README.zh-CN.md b/README.zh-CN.md
index 53cb7e05d6..d7665f166d 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -17,6 +17,7 @@
+
diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md
index 4b6bd6c968..5f165b2477 100644
--- a/docs/en/datasets/index.md
+++ b/docs/en/datasets/index.md
@@ -19,7 +19,11 @@ Ultralytics provides support for various datasets to facilitate computer vision
Watch: Ultralytics Datasets Overview
diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 98c659b864..75dd455e9a 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -583,7 +583,7 @@ "\n", "model = YOLO('yolo11n-obb.pt') # load a pretrained YOLO OBB model\n", "model.train(data='dota8.yaml', epochs=3) # train the model\n", - "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + "model('https://ultralytics.com/images/boats.jpg') # predict on an image" ], "metadata": { "id": "IJNKClOOB5YS" diff --git a/pyproject.toml b/pyproject.toml index f6cb23204a..2545739bab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,8 @@ classifiers = [ # Required dependencies ------------------------------------------------------------------------------------------------ dependencies = [ - "numpy>=1.23.0", # temporary patch for compat errors https://github.com/ultralytics/yolov5/actions/runs/9538130424/job/26286956354 + "numpy>=1.23.0", + "numpy<2.0.0; sys_platform == 'darwin'", # macOS OpenVINO errors https://github.com/ultralytics/ultralytics/pull/17221 "matplotlib>=3.3.0", "opencv-python>=4.6.0", "pillow>=7.1.2", diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 89f8c39b25..4fd1a7aee3 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -116,7 +116,7 @@ def test_predict_sam(): from ultralytics.models.sam import Predictor as SAMPredictor # Load a model - model = SAM(WEIGHTS_DIR / "sam_b.pt") + model = SAM(WEIGHTS_DIR / "sam2.1_b.pt") # Display model information (optional) model.info() diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index ac22fe8620..a48d3646c0 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.3.21" +__version__ = "8.3.23" import os diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 153ab27e38..0af93a37d3 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -787,7 +787,7 @@ def entrypoint(debug=""): from ultralytics import FastSAM model = FastSAM(model) - elif "sam_" in stem or "sam2_" in stem: + elif "sam_" in stem or "sam2_" in stem or "sam2.1_" in stem: from ultralytics import SAM model = SAM(model) @@ -809,7 +809,9 @@ def entrypoint(debug=""): # Mode if mode in {"predict", "track"} and "source" not in overrides: - overrides["source"] = DEFAULT_CFG.source or ASSETS + overrides["source"] = ( + "https://ultralytics.com/images/boats.jpg" if task == "obb" else DEFAULT_CFG.source or ASSETS + ) LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.") elif mode in {"train", "val"}: if "data" not in overrides and "resume" not in overrides: diff --git a/ultralytics/cfg/solutions/default.yaml b/ultralytics/cfg/solutions/default.yaml index a353fd2a21..69e430b8c3 100644 --- a/ultralytics/cfg/solutions/default.yaml +++ b/ultralytics/cfg/solutions/default.yaml @@ -1,18 +1,19 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license - # Configuration for Ultralytics Solutions -model: "yolo11n.pt" # The Ultralytics YOLO11 model to be used (e.g., yolo11n.pt for YOLO11 nano version and yolov8n.pt for YOLOv8 nano version) - +# Object counting settings region: # Object counting, queue or speed estimation region points. Default region points are [(20, 400), (1080, 404), (1080, 360), (20, 360)] -line_width: 2 # Width of the annotator used to draw regions on the image/video frames + bounding boxes and tracks drawing. Default value is 2. -show: True # Flag to control whether to display output image or not, you can set this as False i.e. when deploying it on some embedded devices. show_in: True # Flag to display objects moving *into* the defined region show_out: True # Flag to display objects moving *out of* the defined region -classes: # To count specific classes. i.e, if you want to detect, track and count the person with COCO model, you can use classes=0, Default its None + +# Heatmaps settings +colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization. + +# Workouts monitoring settings up_angle: 145.0 # Workouts up_angle for counts, 145.0 is default value. You can adjust it for different workouts, based on position of keypoints. down_angle: 90 # Workouts down_angle for counts, 90 is default value. You can change it for different workouts, based on position of keypoints. kpts: [6, 8, 10] # Keypoints for workouts monitoring, i.e. If you want to consider keypoints for pushups that have mostly values of [6, 8, 10]. -colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization. + +# Analytics settings analytics_type: "line" # Analytics type i.e "line", "pie", "bar" or "area" charts. By default, "line" analytics will be used for processing. json_file: # parking system regions file path. diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index fe1aac10ae..fa5821418a 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -632,9 +632,10 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): txt_file = save_dir / lb_name cls = label["cls"] for i, s in enumerate(label["segments"]): + if len(s) == 0: + continue line = (int(cls[i]), *s.reshape(-1)) texts.append(("%g " * len(line)).rstrip() % line) - if texts: with open(txt_file, "a") as f: f.writelines(text + "\n" for text in texts) LOGGER.info(f"Generated segment labels saved in {save_dir}") diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 6d403a2afb..49e84af9f5 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -457,6 +457,7 @@ class Exporter: @try_export def export_openvino(self, prefix=colorstr("OpenVINO:")): """YOLO OpenVINO export.""" + # WARNING: numpy>=2.0.0 issue with OpenVINO on macOS https://github.com/ultralytics/ultralytics/pull/17221 check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}') # fix OpenVINO issue on ARM64 import openvino as ov diff --git a/ultralytics/models/sam/build.py b/ultralytics/models/sam/build.py index e110531244..cee5133a09 100644 --- a/ultralytics/models/sam/build.py +++ b/ultralytics/models/sam/build.py @@ -263,6 +263,7 @@ def _build_sam2( memory_attention = MemoryAttention(d_model=256, pos_enc_at_input=True, num_layers=4, layer=MemoryAttentionLayer()) memory_encoder = MemoryEncoder(out_dim=64) + is_sam2_1 = checkpoint is not None and "sam2.1" in checkpoint sam2 = SAM2Model( image_encoder=image_encoder, memory_attention=memory_attention, @@ -288,6 +289,9 @@ def _build_sam2( multimask_max_pt_num=1, use_mlp_for_obj_ptr_proj=True, compile_image_encoder=False, + no_obj_embed_spatial=is_sam2_1, + proj_tpos_enc_in_obj_ptrs=is_sam2_1, + use_signed_tpos_enc_to_obj_ptrs=is_sam2_1, sam_mask_decoder_extra_args=dict( dynamic_multimask_via_stability=True, dynamic_multimask_stability_delta=0.05, @@ -313,6 +317,10 @@ sam_model_map = { "sam2_s.pt": build_sam2_s, "sam2_b.pt": build_sam2_b, "sam2_l.pt": build_sam2_l, + "sam2.1_t.pt": build_sam2_t, + "sam2.1_s.pt": build_sam2_s, + "sam2.1_b.pt": build_sam2_b, + "sam2.1_l.pt": build_sam2_l, } diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py index 2728b0b481..562314b2b9 100644 --- a/ultralytics/models/sam/modules/sam.py +++ b/ultralytics/models/sam/modules/sam.py @@ -161,18 +161,19 @@ class SAM2Model(torch.nn.Module): use_multimask_token_for_obj_ptr: bool = False, iou_prediction_use_sigmoid=False, memory_temporal_stride_for_eval=1, - add_all_frames_to_correct_as_cond=False, non_overlap_masks_for_mem_enc=False, use_obj_ptrs_in_encoder=False, max_obj_ptrs_in_encoder=16, add_tpos_enc_to_obj_ptrs=True, proj_tpos_enc_in_obj_ptrs=False, + use_signed_tpos_enc_to_obj_ptrs=False, only_obj_ptrs_in_the_past_for_eval=False, pred_obj_scores: bool = False, pred_obj_scores_mlp: bool = False, fixed_no_obj_ptr: bool = False, soft_no_obj_ptr: bool = False, use_mlp_for_obj_ptr_proj: bool = False, + no_obj_embed_spatial: bool = False, sam_mask_decoder_extra_args=None, compile_image_encoder: bool = False, ): @@ -205,8 +206,6 @@ class SAM2Model(torch.nn.Module): use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers. iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1]. memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation. - add_all_frames_to_correct_as_cond (bool): Whether to append frames with correction clicks to conditioning - frame list. non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in memory encoder during evaluation. use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder. @@ -216,6 +215,9 @@ class SAM2Model(torch.nn.Module): the encoder. proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional encoding in object pointers. + use_signed_tpos_enc_to_obj_ptrs (bool): whether to use signed distance (instead of unsigned absolute distance) + in the temporal positional encoding in the object pointers, only relevant when both `use_obj_ptrs_in_encoder=True` + and `add_tpos_enc_to_obj_ptrs=True`. only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past during evaluation. pred_obj_scores (bool): Whether to predict if there is an object in the frame. @@ -223,6 +225,7 @@ class SAM2Model(torch.nn.Module): fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present. soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation. use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection. + no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames. sam_mask_decoder_extra_args (Dict | None): Extra arguments for constructing the SAM mask decoder. compile_image_encoder (bool): Whether to compile the image encoder for faster inference. @@ -253,6 +256,7 @@ class SAM2Model(torch.nn.Module): if proj_tpos_enc_in_obj_ptrs: assert add_tpos_enc_to_obj_ptrs # these options need to be used together self.proj_tpos_enc_in_obj_ptrs = proj_tpos_enc_in_obj_ptrs + self.use_signed_tpos_enc_to_obj_ptrs = use_signed_tpos_enc_to_obj_ptrs self.only_obj_ptrs_in_the_past_for_eval = only_obj_ptrs_in_the_past_for_eval # Part 2: memory attention to condition current frame's visual features @@ -309,9 +313,12 @@ class SAM2Model(torch.nn.Module): self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim)) trunc_normal_(self.no_obj_ptr, std=0.02) self.use_mlp_for_obj_ptr_proj = use_mlp_for_obj_ptr_proj + self.no_obj_embed_spatial = None + if no_obj_embed_spatial: + self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim)) + trunc_normal_(self.no_obj_embed_spatial, std=0.02) self._build_sam_heads() - self.add_all_frames_to_correct_as_cond = add_all_frames_to_correct_as_cond self.max_cond_frames_in_attn = max_cond_frames_in_attn # Model compilation @@ -533,8 +540,6 @@ class SAM2Model(torch.nn.Module): if self.pred_obj_scores: # Allow *soft* no obj ptr, unlike for masks if self.soft_no_obj_ptr: - # Only hard possible with gt - assert not self.teacher_force_obj_scores_for_mem lambda_is_obj_appearing = object_score_logits.sigmoid() else: lambda_is_obj_appearing = is_obj_appearing.float() @@ -647,6 +652,7 @@ class SAM2Model(torch.nn.Module): if self.num_maskmem == 0: # Disable memory and skip fusion return current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W) num_obj_ptr_tokens = 0 + tpos_sign_mul = -1 if track_in_reverse else 1 # Step 1: condition the visual features of the current frame on previous memories if not is_init_cond_frame: # Retrieve the memories encoded with the maskmem backbone @@ -664,7 +670,7 @@ class SAM2Model(torch.nn.Module): # the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1 # We also allow taking the memory frame non-consecutively (with r>1), in which case # we take (self.num_maskmem - 2) frames among every r-th frames plus the last frame. - r = self.memory_temporal_stride_for_eval + r = 1 if self.training else self.memory_temporal_stride_for_eval for t_pos in range(1, self.num_maskmem): t_rel = self.num_maskmem - t_pos # how many frames before current frame if t_rel == 1: @@ -718,7 +724,14 @@ class SAM2Model(torch.nn.Module): ptr_cond_outputs = selected_cond_outputs pos_and_ptrs = [ # Temporal pos encoding contains how far away each pointer is from current frame - (abs(frame_idx - t), out["obj_ptr"]) + ( + ( + (frame_idx - t) * tpos_sign_mul + if self.use_signed_tpos_enc_to_obj_ptrs + else abs(frame_idx - t) + ), + out["obj_ptr"], + ) for t, out in ptr_cond_outputs.items() ] # Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame @@ -787,6 +800,7 @@ class SAM2Model(torch.nn.Module): current_vision_feats, feat_sizes, pred_masks_high_res, + object_score_logits, is_mask_from_pts, ): """Encodes frame features and masks into a new memory representation for video segmentation.""" @@ -819,10 +833,17 @@ class SAM2Model(torch.nn.Module): ) maskmem_features = maskmem_out["vision_features"] maskmem_pos_enc = maskmem_out["vision_pos_enc"] + # add a no-object embedding to the spatial memory to indicate that the frame + # is predicted to be occluded (i.e. no object is appearing in the frame) + if self.no_obj_embed_spatial is not None: + is_obj_appearing = (object_score_logits > 0).float() + maskmem_features += (1 - is_obj_appearing[..., None, None]) * self.no_obj_embed_spatial[ + ..., None, None + ].expand(*maskmem_features.shape) return maskmem_features, maskmem_pos_enc - def track_step( + def _track_step( self, frame_idx, is_init_cond_frame, @@ -833,15 +854,7 @@ class SAM2Model(torch.nn.Module): mask_inputs, output_dict, num_frames, - track_in_reverse=False, # tracking in reverse time order (for demo usage) - # Whether to run the memory encoder on the predicted masks. Sometimes we might want - # to skip the memory encoder with `run_mem_encoder=False`. For example, - # in demo we might call `track_step` multiple times for each user click, - # and only encode the memory when the user finalizes their clicks. And in ablation - # settings like SAM training on static images, we don't need the memory encoder. - run_mem_encoder=True, - # The previously predicted SAM mask logits (which can be fed together with new clicks in demo). - prev_sam_mask_logits=None, + prev_sam_mask_logits, ): """Performs a single tracking step, updating object masks and memory features based on current frame inputs.""" current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs} @@ -861,7 +874,7 @@ class SAM2Model(torch.nn.Module): sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs) else: # fused the visual feature with previous memory features in the memory bank - pix_feat_with_mem = self._prepare_memory_conditioned_features( + pix_feat = self._prepare_memory_conditioned_features( frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame, current_vision_feats=current_vision_feats[-1:], @@ -880,12 +893,78 @@ class SAM2Model(torch.nn.Module): mask_inputs = prev_sam_mask_logits multimask_output = self._use_multimask(is_init_cond_frame, point_inputs) sam_outputs = self._forward_sam_heads( - backbone_features=pix_feat_with_mem, + backbone_features=pix_feat, point_inputs=point_inputs, mask_inputs=mask_inputs, high_res_features=high_res_features, multimask_output=multimask_output, ) + return current_out, sam_outputs, high_res_features, pix_feat + + def _encode_memory_in_output( + self, + current_vision_feats, + feat_sizes, + point_inputs, + run_mem_encoder, + high_res_masks, + object_score_logits, + current_out, + ): + """Finally run the memory encoder on the predicted mask to encode, it into a new memory feature (that can be + used in future frames). + """ + if run_mem_encoder and self.num_maskmem > 0: + high_res_masks_for_mem_enc = high_res_masks + maskmem_features, maskmem_pos_enc = self._encode_new_memory( + current_vision_feats=current_vision_feats, + feat_sizes=feat_sizes, + pred_masks_high_res=high_res_masks_for_mem_enc, + object_score_logits=object_score_logits, + is_mask_from_pts=(point_inputs is not None), + ) + current_out["maskmem_features"] = maskmem_features + current_out["maskmem_pos_enc"] = maskmem_pos_enc + else: + current_out["maskmem_features"] = None + current_out["maskmem_pos_enc"] = None + + def track_step( + self, + frame_idx, + is_init_cond_frame, + current_vision_feats, + current_vision_pos_embeds, + feat_sizes, + point_inputs, + mask_inputs, + output_dict, + num_frames, + track_in_reverse=False, # tracking in reverse time order (for demo usage) + # Whether to run the memory encoder on the predicted masks. Sometimes we might want + # to skip the memory encoder with `run_mem_encoder=False`. For example, + # in demo we might call `track_step` multiple times for each user click, + # and only encode the memory when the user finalizes their clicks. And in ablation + # settings like SAM training on static images, we don't need the memory encoder. + run_mem_encoder=True, + # The previously predicted SAM mask logits (which can be fed together with new clicks in demo). + prev_sam_mask_logits=None, + ): + """Performs a single tracking step, updating object masks and memory features based on current frame inputs.""" + current_out, sam_outputs, _, _ = self._track_step( + frame_idx, + is_init_cond_frame, + current_vision_feats, + current_vision_pos_embeds, + feat_sizes, + point_inputs, + mask_inputs, + output_dict, + num_frames, + track_in_reverse, + prev_sam_mask_logits, + ) + ( _, _, @@ -893,28 +972,28 @@ class SAM2Model(torch.nn.Module): low_res_masks, high_res_masks, obj_ptr, - _, + object_score_logits, ) = sam_outputs current_out["pred_masks"] = low_res_masks current_out["pred_masks_high_res"] = high_res_masks current_out["obj_ptr"] = obj_ptr + if not self.training: + # Only add this in inference (to avoid unused param in activation checkpointing; + # it's mainly used in the demo to encode spatial memories w/ consolidated masks) + current_out["object_score_logits"] = object_score_logits # Finally run the memory encoder on the predicted mask to encode # it into a new memory feature (that can be used in future frames) - if run_mem_encoder and self.num_maskmem > 0: - high_res_masks_for_mem_enc = high_res_masks - maskmem_features, maskmem_pos_enc = self._encode_new_memory( - current_vision_feats=current_vision_feats, - feat_sizes=feat_sizes, - pred_masks_high_res=high_res_masks_for_mem_enc, - is_mask_from_pts=(point_inputs is not None), - ) - current_out["maskmem_features"] = maskmem_features - current_out["maskmem_pos_enc"] = maskmem_pos_enc - else: - current_out["maskmem_features"] = None - current_out["maskmem_pos_enc"] = None + self._encode_memory_in_output( + current_vision_feats, + feat_sizes, + point_inputs, + run_mem_encoder, + high_res_masks, + object_score_logits, + current_out, + ) return current_out diff --git a/ultralytics/solutions/parking_management.py b/ultralytics/solutions/parking_management.py index fa815938ab..a62de99524 100644 --- a/ultralytics/solutions/parking_management.py +++ b/ultralytics/solutions/parking_management.py @@ -168,7 +168,6 @@ class ParkingManagement(BaseSolution): Examples: >>> from ultralytics.solutions import ParkingManagement >>> parking_manager = ParkingManagement(model="yolov8n.pt", json_file="parking_regions.json") - >>> results = parking_manager(source="parking_lot_video.mp4") >>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}") >>> print(f"Available spaces: {parking_manager.pr_info['Available']}") """ diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py index 1af0c0ba09..e43aba6441 100644 --- a/ultralytics/solutions/solutions.py +++ b/ultralytics/solutions/solutions.py @@ -1,16 +1,13 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license from collections import defaultdict -from pathlib import Path import cv2 from ultralytics import YOLO -from ultralytics.utils import LOGGER, yaml_load +from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_SOL_DICT, LOGGER from ultralytics.utils.checks import check_imshow, check_requirements -DEFAULT_SOL_CFG_PATH = Path(__file__).resolve().parents[1] / "cfg/solutions/default.yaml" - class BaseSolution: """ @@ -55,15 +52,18 @@ class BaseSolution: self.Point = Point # Load config and update with args - self.CFG = yaml_load(DEFAULT_SOL_CFG_PATH) - self.CFG.update(kwargs) - LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}") + DEFAULT_SOL_DICT.update(kwargs) + DEFAULT_CFG_DICT.update(kwargs) + self.CFG = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT} + LOGGER.info(f"Ultralytics Solutions: ✅ {DEFAULT_SOL_DICT}") self.region = self.CFG["region"] # Store region data for other classes usage - self.line_width = self.CFG["line_width"] # Store line_width for usage + self.line_width = ( + self.CFG["line_width"] if self.CFG["line_width"] is not None else 2 + ) # Store line_width for usage # Load Model and store classes names - self.model = YOLO(self.CFG["model"]) + self.model = YOLO(self.CFG["model"] if self.CFG["model"] else "yolov8n.pt") self.names = self.model.names # Initialize environment and region setup diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index 05a4f464b7..d9cd96e3c4 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -38,6 +38,7 @@ FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO ASSETS = ROOT / "assets" # default images DEFAULT_CFG_PATH = ROOT / "cfg/default.yaml" +DEFAULT_SOL_CFG_PATH = ROOT / "cfg/solutions/default.yaml" # Ultralytics solutions yaml path NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLO multiprocessing threads AUTOINSTALL = str(os.getenv("YOLO_AUTOINSTALL", True)).lower() == "true" # global auto-install mode VERBOSE = str(os.getenv("YOLO_VERBOSE", True)).lower() == "true" # global verbose mode @@ -508,6 +509,7 @@ def yaml_print(yaml_file: Union[str, Path, dict]) -> None: # Default configuration DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH) +DEFAULT_SOL_DICT = yaml_load(DEFAULT_SOL_CFG_PATH) # Ultralytics solutions configuration for k, v in DEFAULT_CFG_DICT.items(): if isinstance(v, str) and v.lower() == "none": DEFAULT_CFG_DICT[k] = None @@ -566,12 +568,16 @@ def is_kaggle(): def is_jupyter(): """ - Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace. + Check if the current script is running inside a Jupyter Notebook. Returns: (bool): True if running inside a Jupyter Notebook, False otherwise. + + Note: + - Only works on Colab and Kaggle, other environments like Jupyterlab and Paperspace are not reliably detectable. + - "get_ipython" in globals() method suffers false positives when IPython package installed manually. """ - return "get_ipython" in globals() + return IS_COLAB or IS_KAGGLE def is_docker() -> bool: @@ -799,10 +805,10 @@ def get_user_config_dir(sub_dir="Ultralytics"): PROC_DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant ONLINE = is_online() IS_COLAB = is_colab() +IS_KAGGLE = is_kaggle() IS_DOCKER = is_docker() IS_JETSON = is_jetson() IS_JUPYTER = is_jupyter() -IS_KAGGLE = is_kaggle() IS_PIP_PACKAGE = is_pip_package() IS_RASPBERRYPI = is_raspberrypi() GIT_DIR = get_git_dir() @@ -1193,7 +1199,7 @@ class SettingsManager(JSONDict): "neptune": True, # Neptune integration "raytune": True, # Ray Tune integration "tensorboard": True, # TensorBoard logging - "wandb": True, # Weights & Biases logging + "wandb": False, # Weights & Biases logging "vscode_msg": True, # VSCode messaging } diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 60faef2c4b..9591d3dea2 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -335,7 +335,7 @@ def check_font(font="Arial.ttf"): return file -def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = True) -> bool: +def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = False) -> bool: """ Check current python version against the required minimum version.