diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c54ee4494c..796a08968a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -56,7 +56,7 @@ jobs: shell: bash # for Windows compatibility run: | python -m pip install --upgrade pip wheel - pip install -e . --extra-index-url https://download.pytorch.org/whl/cpu + pip install . --extra-index-url https://download.pytorch.org/whl/cpu - name: Check environment run: | yolo checks @@ -213,7 +213,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install requirements - run: pip install -e . pytest-cov + run: pip install . pytest-cov - name: Check environment run: | yolo checks diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 8a3d41a91a..0f0fe5e942 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -81,6 +81,8 @@ jobs: # - dockerfile: "Dockerfile-conda" # tags: "latest-conda" # platforms: "linux/amd64" + outputs: + new_release: ${{ steps.check_tag.outputs.new_release }} steps: - name: Cleanup disk # Free up to 30GB of disk space per https://github.com/ultralytics/ultralytics/pull/15848 @@ -111,7 +113,6 @@ jobs: VERSION=$(grep "^__version__ =" ultralytics/__init__.py | awk -F'"' '{print $2}') echo "Retrieved Ultralytics version: $VERSION" echo "version=$VERSION" >> $GITHUB_OUTPUT - VERSION_TAG=$(echo "${{ matrix.tags }}" | sed "s/latest/${VERSION}/") echo "Intended version tag: $VERSION_TAG" echo "version_tag=$VERSION_TAG" >> $GITHUB_OUTPUT @@ -123,13 +124,13 @@ jobs: MESSAGE=$(echo $RESPONSE | jq -r '.message') if [[ "$MESSAGE" == "null" ]]; then echo "Tag $VERSION_TAG already exists on DockerHub." - echo "exists=true" >> $GITHUB_OUTPUT + echo "new_release=false" >> $GITHUB_OUTPUT elif [[ "$MESSAGE" == *"404"* ]]; then echo "Tag $VERSION_TAG does not exist on DockerHub." - echo "exists=false" >> $GITHUB_OUTPUT + echo "new_release=true" >> $GITHUB_OUTPUT else echo "Unexpected response from DockerHub. Please check manually." - echo "exists=false" >> $GITHUB_OUTPUT + echo "new_release=false" >> $GITHUB_OUTPUT fi env: VERSION_TAG: ${{ steps.get_version.outputs.version_tag }} @@ -159,7 +160,7 @@ jobs: run: docker run ultralytics/ultralytics:${{ matrix.tags }} yolo benchmark model=yolo11n.pt imgsz=160 verbose=0.309 - name: Push Docker Image with Ultralytics version tag - if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.exists == 'false' && matrix.dockerfile != 'Dockerfile-conda' + if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.new_release == 'true' && matrix.dockerfile != 'Dockerfile-conda' run: | docker push ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }} @@ -173,8 +174,27 @@ jobs: docker push $t fi - - name: Notify on failure - if: github.event_name == 'push' && failure() # do not notify on cancelled() as cancelling is performed by hand + trigger-actions: + runs-on: ubuntu-latest + needs: docker + # Only trigger actions on new Ultralytics releases + if: success() && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && needs.docker.outputs.new_release == 'true' + steps: + - name: Trigger Additional GitHub Actions + env: + GH_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + run: | + gh workflow run deploy_cloud_run.yml \ + --repo ultralytics/assistant \ + --ref main + + notify: + runs-on: ubuntu-latest + needs: [docker, trigger-actions] + if: always() + steps: + - name: Check for failure and notify + if: needs.docker.result == 'failure' && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' uses: slackapi/slack-github-action@v1.27.0 with: payload: | diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8276a7696a..d59dd901ab 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -17,6 +17,8 @@ jobs: if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher' name: Publish runs-on: ubuntu-latest + permissions: + id-token: write # for PyPI trusted publishing steps: - name: Checkout code uses: actions/checkout@v4 @@ -85,12 +87,13 @@ jobs: if publish: print('Ready to publish new version to PyPI ✅.') id: check_pypi + - name: Build package + if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' + run: python -m build - name: Publish to PyPI continue-on-error: true - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - run: | - python -m build - python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} + if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' + uses: pypa/gh-action-pypi-publish@release/v1 - name: Publish new tag if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' run: | diff --git a/README.md b/README.md index b39b089a2e..cce98e50b0 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ Ultralytics CI Ultralytics YOLO Citation Ultralytics Docker Pulls - Ultralytics Discord - Ultralytics Forums + Ultralytics Discord + Ultralytics Forums Ultralytics Reddit
Run Ultralytics on Gradient @@ -22,7 +22,7 @@ [Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics) is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLO11 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection and tracking, instance segmentation, image classification and pose estimation tasks. -We hope that the resources here will help you get the most out of YOLO. Please browse the Ultralytics Docs for details, raise an issue on GitHub for support, questions, or discussions, become a member of the Ultralytics Discord, Reddit and Forums! +We hope that the resources here will help you get the most out of YOLO. Please browse the Ultralytics Docs for details, raise an issue on GitHub for support, questions, or discussions, become a member of the Ultralytics Discord, Reddit and Forums! To request an Enterprise License please complete the form at [Ultralytics Licensing](https://www.ultralytics.com/license). @@ -41,7 +41,7 @@ To request an Enterprise License please complete the form at [Ultralytics Licens space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord @@ -210,7 +210,7 @@ See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for usage examples with Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [Roboflow](https://roboflow.com/?ref=ultralytics), ClearML, [Comet](https://bit.ly/yolov8-readme-comet), Neural Magic and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow.
- + Ultralytics active learning integrations

@@ -237,7 +237,7 @@ Our key integrations with leading AI platforms extend the functionality of Ultra Experience seamless AI with [Ultralytics HUB](https://www.ultralytics.com/hub) ⭐, the all-in-one solution for data visualization, YOLO11 🚀 model training and deployment, without any coding. Transform images into actionable insights and bring your AI visions to life with ease using our cutting-edge platform and user-friendly [Ultralytics App](https://www.ultralytics.com/app-install). Start your journey for **Free** now! - + Ultralytics HUB preview image ##
Contribute
@@ -274,5 +274,5 @@ For Ultralytics bug reports and feature requests please visit [GitHub Issues](ht space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord diff --git a/README.zh-CN.md b/README.zh-CN.md index e43aba2399..ca49bb8ad1 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -10,8 +10,8 @@ Ultralytics CI Ultralytics YOLO Citation Ultralytics Docker Pulls - Ultralytics Discord - Ultralytics Forums + Ultralytics Discord + Ultralytics Forums Ultralytics Reddit
Run Ultralytics on Gradient @@ -22,7 +22,7 @@ [Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics) 是一个尖端的、最先进(SOTA)的模型,基于之前 YOLO 版本的成功,并引入了新功能和改进以进一步提升性能和灵活性。YOLO11 被设计得快速、准确且易于使用,是进行广泛对象检测和跟踪、实例分割、图像分类和姿态估计任务的理想选择。 -我们希望这里的资源能帮助你充分利用 YOLO。请浏览 Ultralytics 文档 以获取详细信息,在 GitHub 上提出问题或讨论,成为 Ultralytics DiscordReddit论坛 的成员! +我们希望这里的资源能帮助你充分利用 YOLO。请浏览 Ultralytics 文档 以获取详细信息,在 GitHub 上提出问题或讨论,成为 Ultralytics DiscordReddit论坛 的成员! 想申请企业许可证,请完成 [Ultralytics Licensing](https://www.ultralytics.com/license) 上的表单。 @@ -41,7 +41,7 @@ space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord @@ -210,7 +210,7 @@ YOLO11 [检测](https://docs.ultralytics.com/tasks/detect/)、[分割](https://d 我们与领先的 AI 平台的关键集成扩展了 Ultralytics 产品的功能,增强了数据集标记、训练、可视化和模型管理等任务的能力。了解 Ultralytics 如何与 [Roboflow](https://roboflow.com/?ref=ultralytics)、ClearML、[Comet](https://bit.ly/yolov8-readme-comet)、Neural Magic 和 [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) 合作,优化您的 AI 工作流程。
- + Ultralytics active learning integrations

@@ -237,7 +237,7 @@ YOLO11 [检测](https://docs.ultralytics.com/tasks/detect/)、[分割](https://d 体验无缝 AI 使用 [Ultralytics HUB](https://www.ultralytics.com/hub) ⭐,一个集数据可视化、YOLO11 🚀 模型训练和部署于一体的解决方案,无需编写代码。利用我们最先进的平台和用户友好的 [Ultralytics 应用](https://www.ultralytics.com/app-install),将图像转换为可操作见解,并轻松实现您的 AI 愿景。免费开始您的旅程! - + Ultralytics HUB preview image ##
贡献
@@ -274,5 +274,5 @@ Ultralytics 提供两种许可选项以适应各种用例: space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord diff --git a/docker/Dockerfile b/docker/Dockerfile index 3283c65076..37b0640752 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,7 +3,7 @@ # Image is CUDA-optimized for YOLO11 single/multi-GPU training and inference # Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3 -FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime +FROM pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime # Set environment variables # Avoid DDP error "MKL_THREADING_LAYER=INTEL is incompatible with libgomp.so.1 library" https://github.com/pytorch/pytorch/issues/37377 @@ -11,7 +11,8 @@ ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ PIP_BREAK_SYSTEM_PACKAGES=1 \ - MKL_THREADING_LAYER=GNU + MKL_THREADING_LAYER=GNU \ + OMP_NUM_THREADS=1 # Downloads to user config dir ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \ diff --git a/docs/README.md b/docs/README.md index 03285c41b4..a3d3edb40e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,5 +1,5 @@
-Ultralytics logo +Ultralytics logo # 📚 Ultralytics Docs @@ -10,7 +10,7 @@ [![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) [![Ultralytics Actions](https://github.com/ultralytics/docs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/format.yml) -Discord Ultralytics Forums Ultralytics Reddit +Discord Ultralytics Forums Ultralytics Reddit ## 🛠️ Installation @@ -142,5 +142,5 @@ For Ultralytics bug reports and feature requests please visit [GitHub Issues](ht space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord diff --git a/docs/build_docs.py b/docs/build_docs.py index e342312bd6..483a2dd051 100644 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -226,7 +226,7 @@ def remove_macros(): # Create a set of indices to remove (including lines before and after) indices_to_remove = set() for i in macros_indices: - indices_to_remove.update(range(i - 1, i + 4)) # i-1, i, i+1, i+2, i+3 + indices_to_remove.update(range(i - 1, i + 3)) # i-1, i, i+1, i+2, i+3 # Create new list of lines, excluding the ones to remove new_lines = [line for i, line in enumerate(lines) if i not in indices_to_remove] diff --git a/docs/en/datasets/classify/index.md b/docs/en/datasets/classify/index.md index 700f4af35a..e8876ce9eb 100644 --- a/docs/en/datasets/classify/index.md +++ b/docs/en/datasets/classify/index.md @@ -113,6 +113,7 @@ Ultralytics supports the following datasets with automatic download: - [Imagenette](imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing. - [Imagewoof](imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks. - [MNIST](mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks. +- [MNIST160](mnist.md): First 8 images of each MNIST category from the MNIST dataset. Dataset contains 160 images total. ### Adding your own dataset diff --git a/docs/en/datasets/explorer/api.md b/docs/en/datasets/explorer/api.md index a3e525daa1..b2a7438adc 100644 --- a/docs/en/datasets/explorer/api.md +++ b/docs/en/datasets/explorer/api.md @@ -6,6 +6,10 @@ keywords: Ultralytics, Explorer API, dataset exploration, SQL queries, similarit # Ultralytics Explorer API +!!! warning "Community Note ⚠️" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀 + ## Introduction Open In Colab diff --git a/docs/en/datasets/explorer/dashboard.md b/docs/en/datasets/explorer/dashboard.md index 3bc3a21e46..92c1ba78b3 100644 --- a/docs/en/datasets/explorer/dashboard.md +++ b/docs/en/datasets/explorer/dashboard.md @@ -6,6 +6,10 @@ keywords: Ultralytics Explorer GUI, semantic search, vector similarity, SQL quer # Explorer GUI +!!! warning "Community Note ⚠️" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀 + Explorer GUI is like a playground build using [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries and even search using natural language using our ask AI feature powered by LLMs.

diff --git a/docs/en/datasets/explorer/explorer.ipynb b/docs/en/datasets/explorer/explorer.ipynb index c0fac941a8..42da7a61e3 100644 --- a/docs/en/datasets/explorer/explorer.ipynb +++ b/docs/en/datasets/explorer/explorer.ipynb @@ -30,6 +30,18 @@ "" ] }, + { + "cell_type": "markdown", + "source": [ + "## Ultralytics Explorer support deprecated ⚠️\n", + "\n", + "As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don’t worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀" + ], + "metadata": { + "id": "RHe1PX5c7uK2" + }, + "id": "RHe1PX5c7uK2" + }, { "cell_type": "markdown", "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f", diff --git a/docs/en/datasets/explorer/index.md b/docs/en/datasets/explorer/index.md index d7e7ab66d4..6db5fa1673 100644 --- a/docs/en/datasets/explorer/index.md +++ b/docs/en/datasets/explorer/index.md @@ -6,6 +6,10 @@ keywords: Ultralytics Explorer, CV datasets, semantic search, SQL queries, vecto # Ultralytics Explorer +!!! warning "Community Note ⚠️" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀 +

Ultralytics Explorer Screenshot 1

diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md index a53d2040d1..9d7a10ed7e 100644 --- a/docs/en/datasets/index.md +++ b/docs/en/datasets/index.md @@ -19,7 +19,7 @@ Ultralytics provides support for various datasets to facilitate computer vision Watch: Ultralytics Datasets Overview

-## NEW 🚀 Ultralytics Explorer +## Ultralytics Explorer 🚀 NEW Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md). @@ -85,6 +85,7 @@ Pose estimation is a technique used to determine the pose of the object relative - [Imagenette](classify/imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing. - [Imagewoof](classify/imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks. - [MNIST](classify/mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks. +- [MNIST160](classify/mnist.md): First 8 images of each MNIST category from the MNIST dataset. Dataset contains 160 images total. ## [Oriented Bounding Boxes (OBB)](obb/index.md) diff --git a/docs/en/datasets/pose/hand-keypoints.md b/docs/en/datasets/pose/hand-keypoints.md index 86548a0233..dd3c19b1a4 100644 --- a/docs/en/datasets/pose/hand-keypoints.md +++ b/docs/en/datasets/pose/hand-keypoints.md @@ -8,7 +8,7 @@ keywords: Hand KeyPoints, pose estimation, dataset, keypoints, MediaPipe, YOLO, ## Introduction -The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high accuracy and consistency, and the dataset is compatible [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) formats. +The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high [accuracy](https://www.ultralytics.com/glossary/accuracy) and consistency, and the dataset is compatible [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) formats. ## Hand Landmarks diff --git a/docs/en/guides/analytics.md b/docs/en/guides/analytics.md index 1b7049c601..d073cd25b5 100644 --- a/docs/en/guides/analytics.md +++ b/docs/en/guides/analytics.md @@ -40,103 +40,32 @@ This guide provides a comprehensive overview of three fundamental types of [data ```python import cv2 - from ultralytics import YOLO, solutions - - model = YOLO("yolo11n.pt") + from ultralytics import solutions cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - analytics = solutions.Analytics( - type="line", - writer=out, - im0_shape=(w, h), - view_img=True, + out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed ) - total_counts = 0 - frame_count = 0 - - while cap.isOpened(): - success, frame = cap.read() - - if success: - frame_count += 1 - results = model.track(frame, persist=True, verbose=True) - - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - for box in boxes: - total_counts += 1 - - analytics.update_line(frame_count, total_counts) - - total_counts = 0 - if cv2.waitKey(1) & 0xFF == ord("q"): - break - else: - break - - cap.release() - out.release() - cv2.destroyAllWindows() - ``` - - === "Multiple Lines" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolo11n.pt") - - cap = cv2.VideoCapture("Path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - out = cv2.VideoWriter("multiple_line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) analytics = solutions.Analytics( - type="line", - writer=out, - im0_shape=(w, h), - view_img=True, - max_points=200, + analytics_type="line", + show=True, ) frame_count = 0 - data = {} - labels = [] - while cap.isOpened(): - success, frame = cap.read() - + success, im0 = cap.read() if success: frame_count += 1 - - results = model.track(frame, persist=True) - - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - track_ids = results[0].boxes.id.int().cpu().tolist() - clss = results[0].boxes.cls.cpu().tolist() - - for box, track_id, cls in zip(boxes, track_ids, clss): - # Store each class label - if model.names[int(cls)] not in labels: - labels.append(model.names[int(cls)]) - - # Store each class count - if model.names[int(cls)] in data: - data[model.names[int(cls)]] += 1 - else: - data[model.names[int(cls)]] = 0 - - # update lines every frame - analytics.update_multiple_lines(data, labels, frame_count) - data = {} # clear the data list for next frame + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file else: break @@ -150,43 +79,32 @@ This guide provides a comprehensive overview of three fundamental types of [data ```python import cv2 - from ultralytics import YOLO, solutions - - model = YOLO("yolo11n.pt") + from ultralytics import solutions cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - out = cv2.VideoWriter("pie_chart.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) + out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed + ) analytics = solutions.Analytics( - type="pie", - writer=out, - im0_shape=(w, h), - view_img=True, + analytics_type="pie", + show=True, ) - clswise_count = {} - + frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True, verbose=True) - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - for box, cls in zip(boxes, clss): - if model.names[int(cls)] in clswise_count: - clswise_count[model.names[int(cls)]] += 1 - else: - clswise_count[model.names[int(cls)]] = 1 - - analytics.update_pie(clswise_count) - clswise_count = {} - - if cv2.waitKey(1) & 0xFF == ord("q"): - break + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file else: break @@ -200,43 +118,32 @@ This guide provides a comprehensive overview of three fundamental types of [data ```python import cv2 - from ultralytics import YOLO, solutions - - model = YOLO("yolo11n.pt") + from ultralytics import solutions cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - out = cv2.VideoWriter("bar_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) + out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed + ) analytics = solutions.Analytics( - type="bar", - writer=out, - im0_shape=(w, h), - view_img=True, + analytics_type="bar", + show=True, ) - clswise_count = {} - + frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True, verbose=True) - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - for box, cls in zip(boxes, clss): - if model.names[int(cls)] in clswise_count: - clswise_count[model.names[int(cls)]] += 1 - else: - clswise_count[model.names[int(cls)]] = 1 - - analytics.update_bar(clswise_count) - clswise_count = {} - - if cv2.waitKey(1) & 0xFF == ord("q"): - break + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file else: break @@ -250,46 +157,32 @@ This guide provides a comprehensive overview of three fundamental types of [data ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - out = cv2.VideoWriter("area_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) + out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed + ) analytics = solutions.Analytics( - type="area", - writer=out, - im0_shape=(w, h), - view_img=True, + analytics_type="area", + show=True, ) - clswise_count = {} frame_count = 0 - while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: frame_count += 1 - results = model.track(frame, persist=True, verbose=True) - - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - - for box, cls in zip(boxes, clss): - if model.names[int(cls)] in clswise_count: - clswise_count[model.names[int(cls)]] += 1 - else: - clswise_count[model.names[int(cls)]] = 1 - - analytics.update_area(frame_count, clswise_count) - clswise_count = {} - if cv2.waitKey(1) & 0xFF == ord("q"): - break + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file else: break @@ -302,23 +195,12 @@ This guide provides a comprehensive overview of three fundamental types of [data Here's a table with the `Analytics` arguments: -| Name | Type | Default | Description | -| -------------- | ----------------- | ------------- | -------------------------------------------------------------------------------- | -| `type` | `str` | `None` | Type of data or object. | -| `im0_shape` | `tuple` | `None` | Shape of the initial image. | -| `writer` | `cv2.VideoWriter` | `None` | Object for writing video files. | -| `title` | `str` | `ultralytics` | Title for the visualization. | -| `x_label` | `str` | `x` | Label for the x-axis. | -| `y_label` | `str` | `y` | Label for the y-axis. | -| `bg_color` | `str` | `white` | Background color. | -| `fg_color` | `str` | `black` | Foreground color. | -| `line_color` | `str` | `yellow` | Color of the lines. | -| `line_width` | `int` | `2` | Width of the lines. | -| `fontsize` | `int` | `13` | Font size for text. | -| `view_img` | `bool` | `False` | Flag to display the image or video. | -| `save_img` | `bool` | `True` | Flag to save the image or video. | -| `max_points` | `int` | `50` | For multiple lines, total points drawn on frame, before deleting initial points. | -| `points_width` | `int` | `15` | Width of line points highlighter. | +| Name | Type | Default | Description | +| ---------------- | ------ | ------- | ---------------------------------------------------- | +| `analytics_type` | `str` | `line` | Type of graph i.e "line", "bar", "area", "pie" | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -344,21 +226,33 @@ Example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -analytics = solutions.Analytics(type="line", writer=out, im0_shape=(w, h), view_img=True) +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) +analytics = solutions.Analytics( + analytics_type="line", + show=True, +) + +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - total_counts = sum([1 for box in results[0].boxes.xyxy]) - analytics.update_line(frame_count, total_counts) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -382,24 +276,33 @@ Use the following example to generate a bar plot: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("bar_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -analytics = solutions.Analytics(type="bar", writer=out, im0_shape=(w, h), view_img=True) +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) +analytics = solutions.Analytics( + analytics_type="bar", + show=True, +) + +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - clswise_count = { - model.names[int(cls)]: boxes.size(0) - for cls, boxes in zip(results[0].boxes.cls.tolist(), results[0].boxes.xyxy) - } - analytics.update_bar(clswise_count) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -423,24 +326,33 @@ Here's a quick example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("pie_chart.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -analytics = solutions.Analytics(type="pie", writer=out, im0_shape=(w, h), view_img=True) +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) +analytics = solutions.Analytics( + analytics_type="pie", + show=True, +) + +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - clswise_count = { - model.names[int(cls)]: boxes.size(0) - for cls, boxes in zip(results[0].boxes.cls.tolist(), results[0].boxes.xyxy) - } - analytics.update_pie(clswise_count) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -459,21 +371,33 @@ Example for tracking and updating a line graph: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) -analytics = solutions.Analytics(type="line", writer=out, im0_shape=(w, h), view_img=True) +analytics = solutions.Analytics( + analytics_type="line", + show=True, +) +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - total_counts = sum([1 for box in results[0].boxes.xyxy]) - analytics.update_line(frame_count, total_counts) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md index 7d5aad6c2b..f33993134f 100644 --- a/docs/en/guides/heatmaps.md +++ b/docs/en/guides/heatmaps.md @@ -34,11 +34,6 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult | ![Ultralytics YOLO11 Transportation Heatmap](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-transportation-heatmap.avif) | ![Ultralytics YOLO11 Retail Heatmap](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-retail-heatmap.avif) | | Ultralytics YOLO11 Transportation Heatmap | Ultralytics YOLO11 Retail Heatmap | -!!! tip "Heatmap Configuration" - - - `heatmap_alpha`: Ensure this value is within the range (0.0 - 1.0). - - `decay_factor`: Used for removing heatmap after an object is no longer in the frame, its value should also be in the range (0.0 - 1.0). - !!! example "Heatmaps using Ultralytics YOLO11 Example" === "Heatmap" @@ -46,10 +41,9 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -57,11 +51,10 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) # Init heatmap - heatmap_obj = solutions.Heatmap( + heatmap = solutions.Heatmap( + show=True, + model="yolo11n.pt", colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - names=model.names, ) while cap.isOpened(): @@ -69,9 +62,7 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) video_writer.write(im0) cap.release() @@ -84,25 +75,24 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) # Video writer video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - line_points = [(20, 400), (1080, 404)] # line for object counting + # line for object counting + line_points = [(20, 400), (1080, 404)] # Init heatmap - heatmap_obj = solutions.Heatmap( + heatmap = solutions.Heatmap( + show=True, + model="yolo11n.pt", colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - count_reg_pts=line_points, - names=model.names, + region=line_points, ) while cap.isOpened(): @@ -110,9 +100,7 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult if not success: print("Video frame is empty or video processing has been successfully completed.") break - - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) video_writer.write(im0) cap.release() @@ -125,10 +113,9 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -139,12 +126,11 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] # Init heatmap - heatmap_obj = solutions.Heatmap( + heatmap = solutions.Heatmap( + show=True, + model="yolo11n.pt", colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - count_reg_pts=region_points, - names=model.names, + region=region_points, ) while cap.isOpened(): @@ -152,9 +138,7 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult if not success: print("Video frame is empty or video processing has been successfully completed.") break - - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) video_writer.write(im0) cap.release() @@ -167,10 +151,9 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -181,12 +164,11 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] # Init heatmap - heatmap_obj = solutions.Heatmap( + heatmap = solutions.Heatmap( + show=True, + model="yolo11n.pt", colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - count_reg_pts=region_points, - names=model.names, + region=region_points, ) while cap.isOpened(): @@ -194,9 +176,7 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult if not success: print("Video frame is empty or video processing has been successfully completed.") break - - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) video_writer.write(im0) cap.release() @@ -204,54 +184,25 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult cv2.destroyAllWindows() ``` - === "Im0" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolo11n.pt") # YOLO11 custom/pretrained model - - im0 = cv2.imread("path/to/image.png") # path to image file - h, w = im0.shape[:2] # image height and width - - # Heatmap Init - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - names=model.names, - ) - - results = model.track(im0, persist=True) - im0 = heatmap_obj.generate_heatmap(im0, tracks=results) - cv2.imwrite("ultralytics_output.png", im0) - ``` - === "Specific Classes" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) # Video writer video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - classes_for_heatmap = [0, 2] # classes for heatmap - # Init heatmap - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - names=model.names, + heatmap = solutions.Heatmap( + show=True, + model="yolo11n.pt", + classes=[0, 2], ) while cap.isOpened(): @@ -259,9 +210,7 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False, classes=classes_for_heatmap) - - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) video_writer.write(im0) cap.release() @@ -271,25 +220,14 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult ### Arguments `Heatmap()` -| Name | Type | Default | Description | -| ------------------ | ---------------- | ------------------ | ----------------------------------------------------------------- | -| `names` | `list` | `None` | Dictionary of class names. | -| `imw` | `int` | `0` | Image width. | -| `imh` | `int` | `0` | Image height. | -| `colormap` | `int` | `cv2.COLORMAP_JET` | Colormap to use for the heatmap. | -| `heatmap_alpha` | `float` | `0.5` | Alpha blending value for heatmap overlay. | -| `view_img` | `bool` | `False` | Whether to display the image with the heatmap overlay. | -| `view_in_counts` | `bool` | `True` | Whether to display the count of objects entering the region. | -| `view_out_counts` | `bool` | `True` | Whether to display the count of objects exiting the region. | -| `count_reg_pts` | `list` or `None` | `None` | Points defining the counting region (either a line or a polygon). | -| `count_txt_color` | `tuple` | `(0, 0, 0)` | Text color for displaying counts. | -| `count_bg_color` | `tuple` | `(255, 255, 255)` | Background color for displaying counts. | -| `count_reg_color` | `tuple` | `(255, 0, 255)` | Color for the counting region. | -| `region_thickness` | `int` | `5` | Thickness of the region line. | -| `line_dist_thresh` | `int` | `15` | Distance threshold for line-based counting. | -| `line_thickness` | `int` | `2` | Thickness of the lines used in drawing. | -| `decay_factor` | `float` | `0.99` | Decay factor for the heatmap to reduce intensity over time. | -| `shape` | `str` | `"circle"` | Shape of the heatmap blobs ('circle' or 'rect'). | +| Name | Type | Default | Description | +| ------------ | ------ | ------------------ | ----------------------------------------------------------------- | +| `colormap` | `int` | `cv2.COLORMAP_JET` | Colormap to use for the heatmap. | +| `show` | `bool` | `False` | Whether to display the image with the heatmap overlay. | +| `show_in` | `bool` | `True` | Whether to display the count of objects entering the region. | +| `show_out` | `bool` | `True` | Whether to display the count of objects exiting the region. | +| `region` | `list` | `None` | Points defining the counting region (either a line or a polygon). | +| `line_width` | `int` | `2` | Thickness of the lines used in drawing. | ### Arguments `model.track` @@ -337,18 +275,16 @@ Yes, Ultralytics YOLO11 supports object tracking and heatmap generation concurre ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") -heatmap_obj = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, view_img=True, shape="circle", names=model.names) +heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, show=True, model="yolo11n.pt") while cap.isOpened(): success, im0 = cap.read() if not success: break - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) cv2.imshow("Heatmap", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -370,19 +306,16 @@ You can visualize specific object classes by specifying the desired classes in t ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") -heatmap_obj = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, view_img=True, shape="circle", names=model.names) +heatmap = solutions.Heatmap(show=True, model="yolo11n.pt", classes=[0, 2]) -classes_for_heatmap = [0, 2] # Classes to visualize while cap.isOpened(): success, im0 = cap.read() if not success: break - tracks = model.track(im0, persist=True, show=False, classes=classes_for_heatmap) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) cv2.imshow("Heatmap", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break diff --git a/docs/en/guides/object-counting.md b/docs/en/guides/object-counting.md index 8467271b38..cefc9ae281 100644 --- a/docs/en/guides/object-counting.md +++ b/docs/en/guides/object-counting.md @@ -53,9 +53,8 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -68,21 +67,18 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly # Init Object Counter counter = solutions.ObjectCounter( - view_img=True, - reg_pts=region_points, - names=model.names, - draw_tracks=True, - line_thickness=2, + show=True, + region=region_points, + model="yolo11n.pt", ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -95,34 +91,32 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n-obb.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - # Define region points - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] + # line or region points + line_points = [(20, 400), (1080, 400)] # Video writer video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) # Init Object Counter counter = solutions.ObjectCounter( - view_img=True, - reg_pts=region_points, - names=model.names, - line_thickness=2, + show=True, + region=line_points, + model="yolo11n-obb.pt", ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -135,14 +129,13 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - # Define region points as a polygon with 5 points + # Define region points region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] # Video writer @@ -150,20 +143,18 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly # Init Object Counter counter = solutions.ObjectCounter( - view_img=True, - reg_pts=region_points, - names=model.names, - draw_tracks=True, - line_thickness=2, + show=True, + region=region_points, + model="yolo11n.pt", ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -176,14 +167,13 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - # Define line points + # Define region points line_points = [(20, 400), (1080, 400)] # Video writer @@ -191,20 +181,18 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly # Init Object Counter counter = solutions.ObjectCounter( - view_img=True, - reg_pts=line_points, - names=model.names, - draw_tracks=True, - line_thickness=2, + show=True, + region=line_points, + model="yolo11n.pt", ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -217,35 +205,29 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - line_points = [(20, 400), (1080, 400)] # line or region points - classes_to_count = [0, 2] # person and car classes for count - # Video writer video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) # Init Object Counter counter = solutions.ObjectCounter( - view_img=True, - reg_pts=line_points, - names=model.names, - draw_tracks=True, - line_thickness=2, + show=True, + model="yolo11n.pt", + classes=[0, 1], ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False, classes=classes_to_count) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -253,23 +235,18 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly cv2.destroyAllWindows() ``` -???+ tip "Region is Movable" - - You can move the region anywhere in the frame by clicking on its edges - ### Argument `ObjectCounter` Here's a table with the `ObjectCounter` arguments: -| Name | Type | Default | Description | -| ----------------- | ------ | -------------------------- | ---------------------------------------------------------------------- | -| `names` | `dict` | `None` | Dictionary of classes names. | -| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | -| `line_thickness` | `int` | `2` | Line thickness for bounding boxes. | -| `view_img` | `bool` | `False` | Flag to control whether to display the video stream. | -| `view_in_counts` | `bool` | `True` | Flag to control whether to display the in counts on the video stream. | -| `view_out_counts` | `bool` | `True` | Flag to control whether to display the out counts on the video stream. | -| `draw_tracks` | `bool` | `False` | Flag to control whether to draw the object tracks. | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | +| `show_in` | `bool` | `True` | Flag to control whether to display the in counts on the video stream. | +| `show_out` | `bool` | `True` | Flag to control whether to display the out counts on the video stream. | ### Arguments `model.track` @@ -282,38 +259,34 @@ Here's a table with the `ObjectCounter` arguments: To count objects in a video using Ultralytics YOLO11, you can follow these steps: 1. Import the necessary libraries (`cv2`, `ultralytics`). -2. Load a pretrained YOLO11 model. -3. Define the counting region (e.g., a polygon, line, etc.). -4. Set up the video capture and initialize the object counter. -5. Process each frame to track objects and count them within the defined region. +2. Define the counting region (e.g., a polygon, line, etc.). +3. Set up the video capture and initialize the object counter. +4. Process each frame to track objects and count them within the defined region. Here's a simple example for counting in a region: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions def count_objects_in_region(video_path, output_video_path, model_path): """Count objects in a specific region within a video.""" - model = YOLO(model_path) cap = cv2.VideoCapture(video_path) assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - counter = solutions.ObjectCounter( - view_img=True, reg_pts=region_points, names=model.names, draw_tracks=True, line_thickness=2 - ) + + region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] + counter = solutions.ObjectCounter(show=True, region=region_points, model=model_path) while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -343,28 +316,25 @@ To count specific classes of objects using Ultralytics YOLO11, you need to speci ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions def count_specific_classes(video_path, output_video_path, model_path, classes_to_count): """Count specific classes of objects in a video.""" - model = YOLO(model_path) cap = cv2.VideoCapture(video_path) assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - line_points = [(20, 400), (1080, 400)] video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - counter = solutions.ObjectCounter( - view_img=True, reg_pts=line_points, names=model.names, draw_tracks=True, line_thickness=2 - ) + + line_points = [(20, 400), (1080, 400)] + counter = solutions.ObjectCounter(show=True, region=line_points, model=model_path, classes=classes_to_count) while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False, classes=classes_to_count) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() diff --git a/docs/en/guides/preprocessing_annotated_data.md b/docs/en/guides/preprocessing_annotated_data.md index bca2268145..62f6969492 100644 --- a/docs/en/guides/preprocessing_annotated_data.md +++ b/docs/en/guides/preprocessing_annotated_data.md @@ -120,6 +120,10 @@ Common tools for visualizations include: ### Using Ultralytics Explorer for EDA +!!! warning "Community Note ⚠️" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀 + For a more advanced approach to EDA, you can use the Ultralytics Explorer tool. It offers robust capabilities for exploring computer vision datasets. By supporting semantic search, SQL queries, and vector similarity search, the tool makes it easy to analyze and understand your data. With Ultralytics Explorer, you can create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset to find similar images, run SQL queries for detailed analysis, and perform semantic searches, all through a user-friendly graphical interface.

diff --git a/docs/en/guides/queue-management.md b/docs/en/guides/queue-management.md index 8f6610bc9e..32cb5b8a4e 100644 --- a/docs/en/guides/queue-management.md +++ b/docs/en/guides/queue-management.md @@ -40,10 +40,9 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -53,18 +52,15 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] queue = solutions.QueueManager( - names=model.names, - reg_pts=queue_region, - line_thickness=3, + model="yolo11n.pt", + region=queue_region, ) while cap.isOpened(): success, im0 = cap.read() if success: - tracks = model.track(im0, persist=True) - out = queue.process_queue(im0, tracks) - + out = queue.process_queue(im0) video_writer.write(im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -82,10 +78,9 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -95,18 +90,15 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] queue = solutions.QueueManager( - names=model.names, - reg_pts=queue_region, - line_thickness=3, + model="yolo11n.pt", + classes=3, ) while cap.isOpened(): success, im0 = cap.read() if success: - tracks = model.track(im0, persist=True, classes=0) # Only person class - out = queue.process_queue(im0, tracks) - + out = queue.process_queue(im0) video_writer.write(im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -121,13 +113,12 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra ### Arguments `QueueManager` -| Name | Type | Default | Description | -| ---------------- | ---------------- | -------------------------- | -------------------------------------------------------------------------------- | -| `names` | `dict` | `model.names` | A dictionary mapping class IDs to class names. | -| `reg_pts` | `list of tuples` | `[(20, 400), (1260, 400)]` | Points defining the counting region polygon. Defaults to a predefined rectangle. | -| `line_thickness` | `int` | `2` | Thickness of the annotation lines. | -| `view_img` | `bool` | `False` | Whether to display the image frames. | -| `draw_tracks` | `bool` | `False` | Whether to draw tracks of the objects. | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the queue region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -149,23 +140,21 @@ Here's a minimal example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video.mp4") queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] queue = solutions.QueueManager( - names=model.names, - reg_pts=queue_region, - line_thickness=3, + model="yolo11n.pt", + region=queue_region, + line_width=3, ) while cap.isOpened(): success, im0 = cap.read() if success: - tracks = model.track(im0, show=False, persist=True, verbose=False) - out = queue.process_queue(im0, tracks) + out = queue.process_queue(im0) cv2.imshow("Queue Management", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -207,9 +196,9 @@ Example for airports: ```python queue_region_airport = [(50, 600), (1200, 600), (1200, 550), (50, 550)] queue_airport = solutions.QueueManager( - names=model.names, - reg_pts=queue_region_airport, - line_thickness=3, + model="yolo11n.pt", + region=queue_region_airport, + line_width=3, ) ``` diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md index c25557e8a3..96e903b5b3 100644 --- a/docs/en/guides/raspberry-pi.md +++ b/docs/en/guides/raspberry-pi.md @@ -41,7 +41,7 @@ Raspberry Pi is a small, affordable, single-board computer. It has become popula ## What is Raspberry Pi OS? -[Raspberry Pi OS](https://www.raspberrypi.com/software) (formerly known as Raspbian) is a Unix-like operating system based on the Debian GNU/Linux distribution for the Raspberry Pi family of compact single-board computers distributed by the Raspberry Pi Foundation. Raspberry Pi OS is highly optimized for the Raspberry Pi with ARM CPUs and uses a modified LXDE desktop environment with the Openbox stacking window manager. Raspberry Pi OS is under active development, with an emphasis on improving the stability and performance of as many Debian packages as possible on Raspberry Pi. +[Raspberry Pi OS](https://www.raspberrypi.com/software/) (formerly known as Raspbian) is a Unix-like operating system based on the Debian GNU/Linux distribution for the Raspberry Pi family of compact single-board computers distributed by the Raspberry Pi Foundation. Raspberry Pi OS is highly optimized for the Raspberry Pi with ARM CPUs and uses a modified LXDE desktop environment with the Openbox stacking window manager. Raspberry Pi OS is under active development, with an emphasis on improving the stability and performance of as many Debian packages as possible on Raspberry Pi. ## Flash Raspberry Pi OS to Raspberry Pi @@ -249,7 +249,7 @@ To reproduce the above Ultralytics benchmarks on all [export formats](../modes/e ## Use Raspberry Pi Camera -When using Raspberry Pi for Computer Vision projects, it can be essentially to grab real-time video feeds to perform inference. The onboard MIPI CSI connector on the Raspberry Pi allows you to connect official Raspberry PI camera modules. In this guide, we have used a [Raspberry Pi Camera Module 3](https://www.raspberrypi.com/products/camera-module-3) to grab the video feeds and perform inference using YOLOv8 models. +When using Raspberry Pi for Computer Vision projects, it can be essentially to grab real-time video feeds to perform inference. The onboard MIPI CSI connector on the Raspberry Pi allows you to connect official Raspberry PI camera modules. In this guide, we have used a [Raspberry Pi Camera Module 3](https://www.raspberrypi.com/products/camera-module-3/) to grab the video feeds and perform inference using YOLOv8 models. !!! tip @@ -257,7 +257,7 @@ When using Raspberry Pi for Computer Vision projects, it can be essentially to g !!! note - Raspberry Pi 5 uses smaller CSI connectors than the Raspberry Pi 4 (15-pin vs 22-pin), so you will need a [15-pin to 22pin adapter cable](https://www.raspberrypi.com/products/camera-cable) to connect to a Raspberry Pi Camera. + Raspberry Pi 5 uses smaller CSI connectors than the Raspberry Pi 4 (15-pin vs 22-pin), so you will need a [15-pin to 22pin adapter cable](https://www.raspberrypi.com/products/camera-cable/) to connect to a Raspberry Pi Camera. ### Test the Camera diff --git a/docs/en/guides/ros-quickstart.md b/docs/en/guides/ros-quickstart.md index 27371131dd..9d7be73497 100644 --- a/docs/en/guides/ros-quickstart.md +++ b/docs/en/guides/ros-quickstart.md @@ -7,7 +7,7 @@ keywords: Ultralytics, YOLO, object detection, deep learning, machine learning, # ROS (Robot Operating System) quickstart guide

-

ROS Introduction (captioned) from Open Robotics on Vimeo.

+

ROS Introduction (captioned) from Open Robotics on Vimeo.

## What is ROS? diff --git a/docs/en/guides/security-alarm-system.md b/docs/en/guides/security-alarm-system.md index a9523dd61c..e856248571 100644 --- a/docs/en/guides/security-alarm-system.md +++ b/docs/en/guides/security-alarm-system.md @@ -8,7 +8,7 @@ keywords: YOLO11, Security Alarm System, real-time object detection, Ultralytics Security Alarm System -The Security Alarm System Project utilizing Ultralytics YOLO11 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLO11, developed by Ultralytics, provides real-time object detection, allowing the system to identify and respond to potential security threats promptly. This project offers several advantages: +The Security Alarm System Project utilizing Ultralytics YOLO11 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLO11, developed by Ultralytics, provides real-time [object detection](https://www.ultralytics.com/glossary/object-detection), allowing the system to identify and respond to potential security threats promptly. This project offers several advantages: - **Real-time Detection:** YOLO11's efficiency enables the Security Alarm System to detect and respond to security incidents in real-time, minimizing response time. - **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** YOLO11 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system. diff --git a/docs/en/guides/speed-estimation.md b/docs/en/guides/speed-estimation.md index 6a6c192de1..48a9aa09eb 100644 --- a/docs/en/guides/speed-estimation.md +++ b/docs/en/guides/speed-estimation.md @@ -45,40 +45,33 @@ keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n.pt") - names = model.model.names + cap = cv2.VideoCapture("Path/to/video/file.mp4") - cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - # Video writer - video_writer = cv2.VideoWriter("speed_estimation.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + video_writer = cv2.VideoWriter("speed_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - line_pts = [(0, 360), (1280, 360)] + speed_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] - # Init speed-estimation obj - speed_obj = solutions.SpeedEstimator( - reg_pts=line_pts, - names=names, - view_img=True, - ) + speed = solutions.SpeedEstimator(model="yolo11n.pt", region=speed_region, show=True) while cap.isOpened(): success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - tracks = model.track(im0, persist=True) + if success: + out = speed.estimate_speed(im0) + video_writer.write(im0) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + continue - im0 = speed_obj.estimate_speed(im0, tracks) - video_writer.write(im0) + print("Video frame is empty or video processing has been successfully completed.") + break cap.release() - video_writer.release() cv2.destroyAllWindows() ``` @@ -88,13 +81,12 @@ keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision ### Arguments `SpeedEstimator` -| Name | Type | Default | Description | -| ------------------ | ------ | -------------------------- | ---------------------------------------------------- | -| `names` | `dict` | `None` | Dictionary of class names. | -| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of region points for speed estimation. | -| `view_img` | `bool` | `False` | Whether to display the image with annotations. | -| `line_thickness` | `int` | `2` | Thickness of the lines for drawing boxes and tracks. | -| `spdl_dist_thresh` | `int` | `10` | Distance threshold for speed calculation. | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -111,10 +103,7 @@ Estimating object speed with Ultralytics YOLO11 involves combining [object detec ```python import cv2 -from ultralytics import YOLO, solutions - -model = YOLO("yolo11n.pt") -names = model.model.names +from ultralytics import solutions cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -122,17 +111,16 @@ video_writer = cv2.VideoWriter("speed_estimation.avi", cv2.VideoWriter_fourcc(*" # Initialize SpeedEstimator speed_obj = solutions.SpeedEstimator( - reg_pts=[(0, 360), (1280, 360)], - names=names, - view_img=True, + region=[(0, 360), (1280, 360)], + model="yolo11n.pt", + show=True, ) while cap.isOpened(): success, im0 = cap.read() if not success: break - tracks = model.track(im0, persist=True, show=False) - im0 = speed_obj.estimate_speed(im0, tracks) + im0 = speed_obj.estimate_speed(im0) video_writer.write(im0) cap.release() diff --git a/docs/en/guides/steps-of-a-cv-project.md b/docs/en/guides/steps-of-a-cv-project.md index b0f03c1eac..ca067547da 100644 --- a/docs/en/guides/steps-of-a-cv-project.md +++ b/docs/en/guides/steps-of-a-cv-project.md @@ -147,7 +147,7 @@ It's important to keep in mind that proper dataset management is vital for effic It's important to assess your model's performance using various metrics and refine it to improve [accuracy](https://www.ultralytics.com/glossary/accuracy). [Evaluating](../modes/val.md) helps identify areas where the model excels and where it may need improvement. Fine-tuning ensures the model is optimized for the best possible performance. -- **[Performance Metrics](./yolo-performance-metrics.md):** Use metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), recall, and F1-score to evaluate your model's performance. These metrics provide insights into how well your model is making predictions. +- **[Performance Metrics](./yolo-performance-metrics.md):** Use metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and F1-score to evaluate your model's performance. These metrics provide insights into how well your model is making predictions. - **[Hyperparameter Tuning](./hyperparameter-tuning.md):** Adjust hyperparameters to optimize model performance. Techniques like grid search or random search can help find the best hyperparameter values. - Fine-Tuning: Make small adjustments to the model architecture or training process to enhance performance. This might involve tweaking [learning rates](https://www.ultralytics.com/glossary/learning-rate), [batch sizes](https://www.ultralytics.com/glossary/batch-size), or other model parameters. diff --git a/docs/en/guides/workouts-monitoring.md b/docs/en/guides/workouts-monitoring.md index af996894b3..78d894e81d 100644 --- a/docs/en/guides/workouts-monitoring.md +++ b/docs/en/guides/workouts-monitoring.md @@ -41,18 +41,16 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], + gym = solutions.AIGym( + model="yolo11n-pose.pt", + show=True, + kpts=[6, 8, 10], ) while cap.isOpened(): @@ -60,9 +58,7 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) # Tracking recommended - # results = model.predict(im0) # Prediction also supported - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) cv2.destroyAllWindows() ``` @@ -72,20 +68,17 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolo11n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], + gym = solutions.AIGym( + show=True, + kpts=[6, 8, 10], ) while cap.isOpened(): @@ -93,33 +86,26 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) # Tracking recommended - # results = model.predict(im0) # Prediction also supported - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) video_writer.write(im0) cv2.destroyAllWindows() video_writer.release() ``` -???+ tip "Support" - - "pushup", "pullup" and "abworkout" supported - ### KeyPoints Map ![keyPoints Order Ultralytics YOLO11 Pose](https://github.com/ultralytics/docs/releases/download/0/keypoints-order-ultralytics-yolov8-pose.avif) ### Arguments `AIGym` -| Name | Type | Default | Description | -| ----------------- | ------- | -------- | -------------------------------------------------------------------------------------- | -| `kpts_to_check` | `list` | `None` | List of three keypoints index, for counting specific workout, followed by keypoint Map | -| `line_thickness` | `int` | `2` | Thickness of the lines drawn. | -| `view_img` | `bool` | `False` | Flag to display the image. | -| `pose_up_angle` | `float` | `145.0` | Angle threshold for the 'up' pose. | -| `pose_down_angle` | `float` | `90.0` | Angle threshold for the 'down' pose. | -| `pose_type` | `str` | `pullup` | Type of pose to detect (`'pullup`', `pushup`, `abworkout`, `squat`). | +| Name | Type | Default | Description | +| ------------ | ------- | ------- | -------------------------------------------------------------------------------------- | +| `kpts` | `list` | `None` | List of three keypoints index, for counting specific workout, followed by keypoint Map | +| `line_width` | `int` | `2` | Thickness of the lines drawn. | +| `show` | `bool` | `False` | Flag to display the image. | +| `up_angle` | `float` | `145.0` | Angle threshold for the 'up' pose. | +| `down_angle` | `float` | `90.0` | Angle threshold for the 'down' pose. | ### Arguments `model.predict` @@ -138,18 +124,16 @@ To monitor your workouts using Ultralytics YOLO11, you can utilize the pose esti ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], +gym = solutions.AIGym( + line_width=2, + show=True, + kpts=[6, 8, 10], ) while cap.isOpened(): @@ -157,8 +141,7 @@ while cap.isOpened(): if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) cv2.destroyAllWindows() ``` @@ -188,11 +171,10 @@ Yes, Ultralytics YOLO11 can be adapted for custom workout routines. The `AIGym` ```python from ultralytics import solutions -gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="squat", - kpts_to_check=[6, 8, 10], +gym = solutions.AIGym( + line_width=2, + show=True, + kpts=[6, 8, 10], ) ``` @@ -205,20 +187,18 @@ To save the workout monitoring output, you can modify the code to include a vide ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolo11n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) -gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], +gym = solutions.AIGym( + line_width=2, + show=True, + kpts=[6, 8, 10], ) while cap.isOpened(): @@ -226,8 +206,7 @@ while cap.isOpened(): if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) video_writer.write(im0) cv2.destroyAllWindows() diff --git a/docs/en/help/CI.md b/docs/en/help/CI.md index 93b1ad3222..c63d678eb8 100644 --- a/docs/en/help/CI.md +++ b/docs/en/help/CI.md @@ -27,6 +27,7 @@ Below is the table showing the status of these CI tests for our main repositorie | [yolov3](https://github.com/ultralytics/yolov3) | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml) | | | [yolov5](https://github.com/ultralytics/yolov5) | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml) | | | [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml) | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) | +| [hub-sdk](https://github.com/ultralytics/hub-sdk) | [![HUB-SDK CI](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml) | | [![Check Broken links](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/hub-sdk/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/codeql.yaml) | [![Publish to PyPI](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml) | | [hub](https://github.com/ultralytics/hub) | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml) | | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | | | [docs](https://github.com/ultralytics/docs) | | | [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml)[![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) | | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) | @@ -56,7 +57,7 @@ To quickly get a glimpse of the code coverage status of the `ultralytics` python In the sunburst graphic below, the innermost circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively. - + Ultralytics Codecov Image diff --git a/docs/en/hub/app/android.md b/docs/en/hub/app/android.md index bca298fa9d..d5d19ef91d 100644 --- a/docs/en/hub/app/android.md +++ b/docs/en/hub/app/android.md @@ -6,7 +6,7 @@ keywords: Ultralytics, Android app, real-time object detection, YOLO models, Ten # Ultralytics Android App: Real-time [Object Detection](https://www.ultralytics.com/glossary/object-detection) with YOLO Models - + Ultralytics HUB preview image
@@ -22,7 +22,7 @@ keywords: Ultralytics, Android app, real-time object detection, YOLO models, Ten space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord

diff --git a/docs/en/hub/app/index.md b/docs/en/hub/app/index.md index e812d68678..c044aa0553 100644 --- a/docs/en/hub/app/index.md +++ b/docs/en/hub/app/index.md @@ -6,7 +6,7 @@ keywords: Ultralytics HUB, YOLO models, mobile app, iOS, Android, hardware accel # Ultralytics HUB App - + Ultralytics HUB preview image
@@ -22,7 +22,7 @@ keywords: Ultralytics HUB, YOLO models, mobile app, iOS, Android, hardware accel space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord

diff --git a/docs/en/hub/app/ios.md b/docs/en/hub/app/ios.md index be896fe80b..061267b59f 100644 --- a/docs/en/hub/app/ios.md +++ b/docs/en/hub/app/ios.md @@ -6,7 +6,7 @@ keywords: Ultralytics, iOS App, YOLO models, real-time object detection, Apple N # Ultralytics iOS App: Real-time [Object Detection](https://www.ultralytics.com/glossary/object-detection) with YOLO Models - + Ultralytics HUB preview image
@@ -22,7 +22,7 @@ keywords: Ultralytics, iOS App, YOLO models, real-time object detection, Apple N space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord

diff --git a/docs/en/hub/index.md b/docs/en/hub/index.md index 24dbdd3f57..c2ebbce553 100644 --- a/docs/en/hub/index.md +++ b/docs/en/hub/index.md @@ -7,7 +7,7 @@ keywords: Ultralytics HUB, YOLO models, train YOLO, YOLOv5, YOLOv8, object detec # Ultralytics HUB
- + 中文 | 한국어 | 日本語 | @@ -22,13 +22,13 @@ keywords: Ultralytics HUB, YOLO models, train YOLO, YOLOv5, YOLOv8, object detec

-CI CPU Open In Colab Discord Ultralytics Forums Ultralytics Reddit +CI CPU Open In Colab Discord Ultralytics Forums Ultralytics Reddit
👋 Hello from the [Ultralytics](https://www.ultralytics.com/) Team! We've been working hard these last few months to launch [Ultralytics HUB](https://www.ultralytics.com/hub), a new web tool for training and deploying all your YOLOv5 and YOLOv8 🚀 models from one spot! -We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! +We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!

@@ -44,7 +44,7 @@ We hope that the resources here will help you get the most out of HUB. Please br space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
## Introduction @@ -61,7 +61,7 @@ We hope that the resources here will help you get the most out of HUB. Please br Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB

-We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! +We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! - [**Quickstart**](quickstart.md): Start training and deploying models in seconds. - [**Datasets**](datasets.md): Learn how to prepare and upload your datasets. diff --git a/docs/en/hub/quickstart.md b/docs/en/hub/quickstart.md index 3fbcf23af1..ad10e26e94 100644 --- a/docs/en/hub/quickstart.md +++ b/docs/en/hub/quickstart.md @@ -98,4 +98,4 @@ You can report a bug, request a feature, or ask a question on Discord community for questions and discussions! + You can join our Discord community for questions and discussions! diff --git a/docs/en/index.md b/docs/en/index.md index 45c5dab7d8..8217f972da 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -6,24 +6,24 @@ keywords: Ultralytics, YOLO, YOLO11, object detection, image segmentation, deep
Ultralytics YOLO banner -中文 | -한국어 | -日本語 | -Русский | -Deutsch | -Français | +中文 | +한국어 | +日本語 | +Русский | +Deutsch | +Français | Español | -Português | -Türkçe | -Tiếng Việt | -العربية +Português | +Türkçe | +Tiếng Việt | +العربية

Ultralytics CI YOLO Citation Docker Pulls -Discord -Ultralytics Forums +Discord +Ultralytics Forums Ultralytics Reddit
Run on Gradient @@ -49,7 +49,7 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
## Where to Start @@ -58,7 +58,7 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde - **Predict** new images and videos with YOLO   [:octicons-image-16: Predict on Images](modes/predict.md){ .md-button } - **Train** a new YOLO model on your own custom dataset   [:fontawesome-solid-brain: Train a Model](modes/train.md){ .md-button } - **Tasks** YOLO tasks like segment, classify, pose and track   [:material-magnify-expand: Explore Tasks](tasks/index.md){ .md-button } -- **[YOLO11](models/yolo11.md) NEW 🚀**: Ultralytics' latest SOTA models   [:material-magnify-expand: Explore new YOLO11 models](models/yolo11.md){ .md-button } +- **[YOLO11](models/yolo11.md) 🚀 NEW**: Ultralytics' latest SOTA models   [:material-magnify-expand: Explore new YOLO11 models](models/yolo11.md){ .md-button }


@@ -84,7 +84,7 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde - [YOLOv8](https://github.com/ultralytics/ultralytics) released in 2023 by Ultralytics. YOLOv8 introduced new features and improvements for enhanced performance, flexibility, and efficiency, supporting a full range of vision AI tasks, - [YOLOv9](models/yolov9.md) introduces innovative methods like Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN). - [YOLOv10](models/yolov10.md) is created by researchers from [Tsinghua University](https://www.tsinghua.edu.cn/en/) using the [Ultralytics](https://www.ultralytics.com/) [Python package](https://pypi.org/project/ultralytics/). This version provides real-time [object detection](tasks/detect.md) advancements by introducing an End-to-End head that eliminates Non-Maximum Suppression (NMS) requirements. -- **[YOLO11](models/yolo11.md) NEW 🚀**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md), leverage capabilities across diverse AI applications and domains. +- **[YOLO11](models/yolo11.md) 🚀 NEW**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md), leverage capabilities across diverse AI applications and domains. ## YOLO Licenses: How is Ultralytics YOLO licensed? diff --git a/docs/en/integrations/index.md b/docs/en/integrations/index.md index bb4de86c81..bdb8b9c907 100644 --- a/docs/en/integrations/index.md +++ b/docs/en/integrations/index.md @@ -27,65 +27,65 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of ## Training Integrations +- [Amazon SageMaker](amazon-sagemaker.md): Leverage Amazon SageMaker to efficiently build, train, and deploy Ultralytics models, providing an all-in-one platform for the ML lifecycle. + - [ClearML](clearml.md): Automate your Ultralytics ML workflows, monitor experiments, and foster team collaboration. - [Comet ML](comet.md): Enhance your model development with Ultralytics by tracking, comparing, and optimizing your machine learning experiments. - [DVC](dvc.md): Implement version control for your Ultralytics machine learning projects, synchronizing data, code, and models effectively. -- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment. - -- [Ultralytics HUB](https://hub.ultralytics.com/): Access and contribute to a community of pre-trained Ultralytics models. +- [Google Colab](google-colab.md): Use Google Colab to train and evaluate Ultralytics models in a cloud-based environment that supports collaboration and sharing. -- [Neptune](https://neptune.ai/): Maintain a comprehensive log of your ML experiments with Ultralytics in this metadata store designed for MLOps. +- [IBM Watsonx](ibm-watsonx.md): See how IBM Watsonx simplifies the training and evaluation of Ultralytics models with its cutting-edge AI tools, effortless integration, and advanced model management system. -- [Ray Tune](ray-tune.md): Optimize the hyperparameters of your Ultralytics models at any scale. +- [JupyterLab](jupyterlab.md): Find out how to use JupyterLab's interactive and customizable environment to train and evaluate Ultralytics models with ease and efficiency. -- [TensorBoard](tensorboard.md): Visualize your Ultralytics ML workflows, monitor model metrics, and foster team collaboration. +- [Kaggle](kaggle.md): Explore how you can use Kaggle to train and evaluate Ultralytics models in a cloud-based environment with pre-installed libraries, GPU support, and a vibrant community for collaboration and sharing. -- [Weights & Biases (W&B)](weights-biases.md): Monitor experiments, visualize metrics, and foster reproducibility and collaboration on Ultralytics projects. +- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment. -- [Amazon SageMaker](amazon-sagemaker.md): Leverage Amazon SageMaker to efficiently build, train, and deploy Ultralytics models, providing an all-in-one platform for the ML lifecycle. +- [Neptune](https://neptune.ai/): Maintain a comprehensive log of your ML experiments with Ultralytics in this metadata store designed for MLOps. - [Paperspace Gradient](paperspace.md): Paperspace Gradient simplifies working on YOLO11 projects by providing easy-to-use cloud tools for training, testing, and deploying your models quickly. -- [Google Colab](google-colab.md): Use Google Colab to train and evaluate Ultralytics models in a cloud-based environment that supports collaboration and sharing. +- [Ray Tune](ray-tune.md): Optimize the hyperparameters of your Ultralytics models at any scale. -- [Kaggle](kaggle.md): Explore how you can use Kaggle to train and evaluate Ultralytics models in a cloud-based environment with pre-installed libraries, GPU support, and a vibrant community for collaboration and sharing. +- [TensorBoard](tensorboard.md): Visualize your Ultralytics ML workflows, monitor model metrics, and foster team collaboration. -- [JupyterLab](jupyterlab.md): Find out how to use JupyterLab's interactive and customizable environment to train and evaluate Ultralytics models with ease and efficiency. +- [Ultralytics HUB](https://hub.ultralytics.com/): Access and contribute to a community of pre-trained Ultralytics models. -- [IBM Watsonx](ibm-watsonx.md): See how IBM Watsonx simplifies the training and evaluation of Ultralytics models with its cutting-edge AI tools, effortless integration, and advanced model management system. +- [Weights & Biases (W&B)](weights-biases.md): Monitor experiments, visualize metrics, and foster reproducibility and collaboration on Ultralytics projects. ## Deployment Integrations -- [Neural Magic](neural-magic.md): Leverage Quantization Aware Training (QAT) and pruning techniques to optimize Ultralytics models for superior performance and leaner size. +- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment). - [Gradio](gradio.md) 🚀 NEW: Deploy Ultralytics models with Gradio for real-time, interactive object detection demos. -- [TorchScript](torchscript.md): Developed as part of the [PyTorch](https://pytorch.org/) framework, TorchScript enables efficient execution and deployment of machine learning models in various production environments without the need for Python dependencies. +- [NCNN](ncnn.md): Developed by [Tencent](http://www.tencent.com/), NCNN is an efficient [neural network](https://www.ultralytics.com/glossary/neural-network-nn) inference framework tailored for mobile devices. It enables direct deployment of AI models into apps, optimizing performance across various mobile platforms. + +- [Neural Magic](neural-magic.md): Leverage Quantization Aware Training (QAT) and pruning techniques to optimize Ultralytics models for superior performance and leaner size. - [ONNX](onnx.md): An open-source format created by [Microsoft](https://www.microsoft.com/) for facilitating the transfer of AI models between various frameworks, enhancing the versatility and deployment flexibility of Ultralytics models. - [OpenVINO](openvino.md): Intel's toolkit for optimizing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models efficiently across various Intel CPU and GPU platforms. -- [TensorRT](tensorrt.md): Developed by [NVIDIA](https://www.nvidia.com/), this high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference framework and model format optimizes AI models for accelerated speed and efficiency on NVIDIA GPUs, ensuring streamlined deployment. +- [PaddlePaddle](paddlepaddle.md): An open-source deep learning platform by [Baidu](https://www.baidu.com/), PaddlePaddle enables the efficient deployment of AI models and focuses on the scalability of industrial applications. -- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment). +- [TF GraphDef](tf-graphdef.md): Developed by [Google](https://www.google.com/), GraphDef is TensorFlow's format for representing computation graphs, enabling optimized execution of machine learning models across diverse hardware. - [TF SavedModel](tf-savedmodel.md): Developed by [Google](https://www.google.com/), TF SavedModel is a universal serialization format for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) models, enabling easy sharing and deployment across a wide range of platforms, from servers to edge devices. -- [TF GraphDef](tf-graphdef.md): Developed by [Google](https://www.google.com/), GraphDef is TensorFlow's format for representing computation graphs, enabling optimized execution of machine learning models across diverse hardware. +- [TF.js](tfjs.md): Developed by [Google](https://www.google.com/) to facilitate machine learning in browsers and Node.js, TF.js allows JavaScript-based deployment of ML models. - [TFLite](tflite.md): Developed by [Google](https://www.google.com/), TFLite is a lightweight framework for deploying machine learning models on mobile and edge devices, ensuring fast, efficient inference with minimal memory footprint. - [TFLite Edge TPU](edge-tpu.md): Developed by [Google](https://www.google.com/) for optimizing TensorFlow Lite models on Edge TPUs, this model format ensures high-speed, efficient [edge computing](https://www.ultralytics.com/glossary/edge-computing). -- [TF.js](tfjs.md): Developed by [Google](https://www.google.com/) to facilitate machine learning in browsers and Node.js, TF.js allows JavaScript-based deployment of ML models. - -- [PaddlePaddle](paddlepaddle.md): An open-source deep learning platform by [Baidu](https://www.baidu.com/), PaddlePaddle enables the efficient deployment of AI models and focuses on the scalability of industrial applications. +- [TensorRT](tensorrt.md): Developed by [NVIDIA](https://www.nvidia.com/), this high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference framework and model format optimizes AI models for accelerated speed and efficiency on NVIDIA GPUs, ensuring streamlined deployment. -- [NCNN](ncnn.md): Developed by [Tencent](http://www.tencent.com/), NCNN is an efficient [neural network](https://www.ultralytics.com/glossary/neural-network-nn) inference framework tailored for mobile devices. It enables direct deployment of AI models into apps, optimizing performance across various mobile platforms. +- [TorchScript](torchscript.md): Developed as part of the [PyTorch](https://pytorch.org/) framework, TorchScript enables efficient execution and deployment of machine learning models in various production environments without the need for Python dependencies. - [VS Code](vscode.md): An extension for VS Code that provides code snippets for accelerating development workflows with Ultralytics and also for anyone looking for examples to help learn or get started with Ultralytics. diff --git a/docs/en/integrations/kaggle.md b/docs/en/integrations/kaggle.md index efcb78eb81..2e2c00cac6 100644 --- a/docs/en/integrations/kaggle.md +++ b/docs/en/integrations/kaggle.md @@ -48,7 +48,7 @@ These options include: When working with Kaggle, you might come across some common issues. Here are some points to help you navigate the platform smoothly: -- **Access to GPUs**: In your Kaggle notebooks, you can activate a GPU at any time, with usage allowed for up to 30 hours per week. Kaggle provides the Nvidia Tesla P100 GPU with 16GB of memory and also offers the option of using a Nvidia GPU T4 x2. Powerful hardware accelerates your machine-learning tasks, making model training and inference much faster. +- **Access to GPUs**: In your Kaggle notebooks, you can activate a GPU at any time, with usage allowed for up to 30 hours per week. Kaggle provides the NVIDIA Tesla P100 GPU with 16GB of memory and also offers the option of using a NVIDIA GPU T4 x2. Powerful hardware accelerates your machine-learning tasks, making model training and inference much faster. - **Kaggle Kernels**: Kaggle Kernels are free Jupyter notebook servers that can integrate GPUs, allowing you to perform machine learning operations on cloud computers. You don't have to rely on your own computer's CPU, avoiding overload and freeing up your local resources. - **Kaggle Datasets**: Kaggle datasets are free to download. However, it's important to check the license for each dataset to understand any usage restrictions. Some datasets may have limitations on academic publications or commercial use. You can download datasets directly to your Kaggle notebook or anywhere else via the Kaggle API. - **Saving and Committing Notebooks**: To save and commit a notebook on Kaggle, click "Save Version." This saves the current state of your notebook. Once the background kernel finishes generating the output files, you can access them from the Output tab on the main notebook page. @@ -101,7 +101,7 @@ Training a YOLO11 model on Kaggle is straightforward. First, access the [Kaggle Kaggle offers several advantages for training YOLO11 models: -- **Free GPU Access**: Utilize powerful GPUs like Nvidia Tesla P100 or T4 x2 for up to 30 hours per week. +- **Free GPU Access**: Utilize powerful GPUs like NVIDIA Tesla P100 or T4 x2 for up to 30 hours per week. - **Pre-installed Libraries**: Libraries like TensorFlow and PyTorch are pre-installed, simplifying the setup. - **Community Collaboration**: Engage with a vast community of data scientists and machine learning enthusiasts. - **Version Control**: Easily manage different versions of your notebooks and revert to previous versions if needed. diff --git a/docs/en/integrations/openvino.md b/docs/en/integrations/openvino.md index 8395f949f1..9bd45cb5f1 100644 --- a/docs/en/integrations/openvino.md +++ b/docs/en/integrations/openvino.md @@ -148,7 +148,7 @@ This table represents the benchmark results for five different models (YOLOv8n, ### Intel Arc GPU -Intel® Arc™ represents Intel's foray into the dedicated GPU market. The Arc™ series, designed to compete with leading GPU manufacturers like AMD and Nvidia, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers. +Intel® Arc™ represents Intel's foray into the dedicated GPU market. The Arc™ series, designed to compete with leading GPU manufacturers like AMD and NVIDIA, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers. The Arc™ series is divided into three categories: Arc™ 3, Arc™ 5, and Arc™ 7, with each number indicating the performance level. Each category includes several models, and the 'M' in the GPU model name signifies a mobile, integrated variant. diff --git a/docs/en/integrations/vscode.md b/docs/en/integrations/vscode.md index 521abde311..faf8c893cf 100644 --- a/docs/en/integrations/vscode.md +++ b/docs/en/integrations/vscode.md @@ -181,7 +181,7 @@ There are over 💯 keyword arguments for all of the various Ultralytics [tasks] conf=0.25, # (float) minimum confidence threshold iou=0.7, # (float) intersection over union (IoU) threshold for NMS vid_stride=1, # (int) video frame-rate stride - stream_buffer=False, # (bool) buffer all streaming frames (True) or return the most recent frame (False) + stream_buffer=False, # (bool) buffer incoming frames in a queue (True) or only keep the most recent frame (False) visualize=False, # (bool) visualize model features augment=False, # (bool) apply image augmentation to prediction sources agnostic_nms=False, # (bool) class-agnostic NMS diff --git a/docs/en/integrations/weights-biases.md b/docs/en/integrations/weights-biases.md index 9f2cbb2fa0..9777632e4c 100644 --- a/docs/en/integrations/weights-biases.md +++ b/docs/en/integrations/weights-biases.md @@ -6,7 +6,7 @@ keywords: YOLO11, Weights & Biases, model training, experiment tracking, Ultraly # Enhancing YOLO11 Experiment Tracking and Visualization with Weights & Biases -[Object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) have become integral to many [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. However, training, evaluating, and deploying these complex models introduces several challenges. Tracking key training metrics, comparing model variants, analyzing model behavior, and detecting issues require substantial instrumentation and experiment management. +[Object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) have become integral to many [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. However, training, evaluating, and deploying these complex models introduce several challenges. Tracking key training metrics, comparing model variants, analyzing model behavior, and detecting issues require significant instrumentation and experiment management.


@@ -19,7 +19,7 @@ keywords: YOLO11, Weights & Biases, model training, experiment tracking, Ultraly Watch: How to use Ultralytics YOLO11 with Weights and Biases

-This guide showcases Ultralytics YOLO11 integration with Weights & Biases' for enhanced experiment tracking, model-checkpointing, and visualization of model performance. It also includes instructions for setting up the integration, training, fine-tuning, and visualizing results using Weights & Biases' interactive features. +This guide showcases Ultralytics YOLO11 integration with Weights & Biases for enhanced experiment tracking, model-checkpointing, and visualization of model performance. It also includes instructions for setting up the integration, training, fine-tuning, and visualizing results using Weights & Biases' interactive features. ## Weights & Biases @@ -42,8 +42,8 @@ To install the required packages, run: === "CLI" ```bash - # Install the required packages for YOLO11 and Weights & Biases - pip install --upgrade ultralytics==8.0.186 wandb + # Install the required packages for Ultralytics YOLO and Weights & Biases + pip install -U ultralytics wandb ``` For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. @@ -56,12 +56,20 @@ Start by initializing the Weights & Biases environment in your workspace. You ca !!! tip "Initial SDK Setup" + === "Python" + + ```python + import wandb + + # Initialize your Weights & Biases environment + wandb.login(key="") + ``` + === "CLI" ```bash # Initialize your Weights & Biases environment - import wandb - wandb.login() + wandb login ``` Navigate to the Weights & Biases authorization page to create and retrieve your API key. Use this key to authenticate your environment with W&B. @@ -75,50 +83,42 @@ Before diving into the usage instructions for YOLO11 model training with Weights === "Python" ```python - import wandb - from wandb.integration.ultralytics import add_wandb_callback - from ultralytics import YOLO - # Initialize a Weights & Biases run - wandb.init(project="ultralytics", job_type="training") - # Load a YOLO model model = YOLO("yolo11n.pt") - # Add W&B Callback for Ultralytics - add_wandb_callback(model, enable_model_checkpointing=True) - # Train and Fine-Tune the Model - model.train(project="ultralytics", data="coco8.yaml", epochs=5, imgsz=640) - - # Validate the Model - model.val() - - # Perform Inference and Log Results - model(["path/to/image1", "path/to/image2"]) - - # Finalize the W&B Run - wandb.finish() + model.train(data="coco8.yaml", epochs=5, project="ultralytics", name="yolo11n") ``` -### Understanding the Code + === "CLI" -Let's understand the steps showcased in the usage code snippet above. + ```bash + # Train a YOLO11 model with Weights & Biases + yolo train data=coco8.yaml epochs=5 project=ultralytics name=yolo11n + ``` -- **Step 1: Initialize a Weights & Biases Run**: Start by initializing a Weights & Biases run, specifying the project name and the job type. This run will track and manage the training and validation processes of your model. +### W&B Arguments -- **Step 2: Define the YOLO11 Model and Dataset**: Specify the model variant and the dataset you wish to use. The YOLO model is then initialized with the specified model file. +| Argument | Default | Description | +| -------- | ------- | ------------------------------------------------------------------------------------------------------------------ | +| project | `None` | Specifies the name of the project logged locally and in W&B. This way you can group multiple runs together. | +| name | `None` | The name of the training run. This determines the name used to create subfolders and the name used for W&B logging | -- **Step 3: Add Weights & Biases Callback for Ultralytics**: This step is crucial as it enables the automatic logging of training metrics and validation results to Weights & Biases, providing a detailed view of the model's performance. +!!! tip "Enable or Disable Weights & Biases" -- **Step 4: Train and Fine-Tune the Model**: Begin training the model with the specified dataset, number of epochs, and image size. The training process includes logging of metrics and predictions at the end of each [epoch](https://www.ultralytics.com/glossary/epoch), offering a comprehensive view of the model's learning progress. + If you want to enable or disable Weights & Biases logging, you can use the `wandb` command. By default, Weights & Biases logging is enabled. -- **Step 5: Validate the Model**: After training, the model is validated. This step is crucial for assessing the model's performance on unseen data and ensuring its generalizability. + === "CLI" -- **Step 6: Perform Inference and Log Results**: The model performs predictions on specified images. These predictions, along with visual overlays and insights, are automatically logged in a W&B Table for interactive exploration. + ```bash + # Enable Weights & Biases logging + wandb enabled -- **Step 7: Finalize the W&B Run**: This step marks the end of data logging and saves the final state of your model's training and validation process in the W&B dashboard. + # Disable Weights & Biases logging + wandb disabled + ``` ### Understanding the Output @@ -126,7 +126,7 @@ Upon running the usage code snippet above, you can expect the following key outp - The setup of a new run with its unique ID, indicating the start of the training process. - A concise summary of the model's structure, including the number of layers and parameters. -- Regular updates on important metrics such as box loss, cls loss, dfl loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP scores during each training epoch. +- Regular updates on important metrics such as box loss, cls loss, dfl loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP scores during each training [epoch](https://www.ultralytics.com/glossary/epoch). - At the end of training, detailed metrics including the model's inference speed, and overall [accuracy](https://www.ultralytics.com/glossary/accuracy) metrics are displayed. - Links to the Weights & Biases dashboard for in-depth analysis and visualization of the training process, along with information on local log file locations. @@ -138,7 +138,7 @@ After running the usage code snippet, you can access the Weights & Biases (W&B) - **Real-Time Metrics Tracking**: Observe metrics like loss, accuracy, and validation scores as they evolve during the training, offering immediate insights for model tuning. [See how experiments are tracked using Weights & Biases](https://imgur.com/D6NVnmN). -- **Hyperparameter Optimization**: Weights & Biases aids in fine-tuning critical parameters such as [learning rate](https://www.ultralytics.com/glossary/learning-rate), batch size, and more, enhancing the performance of YOLO11. +- **Hyperparameter Optimization**: Weights & Biases aids in fine-tuning critical parameters such as [learning rate](https://www.ultralytics.com/glossary/learning-rate), [batch size](https://www.ultralytics.com/glossary/batch-size), and more, enhancing the performance of YOLO11. - **Comparative Analysis**: The platform allows side-by-side comparisons of different training runs, essential for assessing the impact of various model configurations. @@ -154,7 +154,7 @@ By using these features, you can effectively track, analyze, and optimize your Y ## Summary -This guide helped you explore Ultralytics' YOLO11 integration with Weights & Biases. It illustrates the ability of this integration to efficiently track and visualize model training and prediction results. +This guide helped you explore the Ultralytics YOLO integration with Weights & Biases. It illustrates the ability of this integration to efficiently track and visualize model training and prediction results. For further details on usage, visit [Weights & Biases' official documentation](https://docs.wandb.ai/guides/integrations/ultralytics/). @@ -162,83 +162,83 @@ Also, be sure to check out the [Ultralytics integration guide page](../integrati ## FAQ -### How do I install the required packages for YOLO11 and Weights & Biases? +### How do I integrate Weights & Biases with Ultralytics YOLO11? -To install the required packages for YOLO11 and Weights & Biases, open your command line interface and run: +To integrate Weights & Biases with Ultralytics YOLO11: + +1. Install the required packages: ```bash -pip install --upgrade ultralytics==8.0.186 wandb +pip install -U ultralytics wandb ``` -For further guidance on installation steps, refer to our [YOLO11 Installation guide](../quickstart.md). If you encounter issues, consult the [Common Issues guide](../guides/yolo-common-issues.md) for troubleshooting tips. +2. Log in to your Weights & Biases account: -### What are the benefits of integrating Ultralytics YOLO11 with Weights & Biases? +```python +import wandb -Integrating Ultralytics YOLO11 with Weights & Biases offers several benefits including: +wandb.login(key="") +``` -- **Real-Time Metrics Tracking:** Observe metric changes during training for immediate insights. -- **Hyperparameter Optimization:** Improve model performance by fine-tuning learning rate, [batch size](https://www.ultralytics.com/glossary/batch-size), etc. -- **Comparative Analysis:** Side-by-side comparison of different training runs. -- **Resource Monitoring:** Keep track of CPU, GPU, and memory usage. -- **Model Artifacts Management:** Easy access and sharing of model checkpoints. +3. Train your YOLO11 model with W&B logging enabled: -Explore these features in detail in the Weights & Biases Dashboard section above. +```python +from ultralytics import YOLO -### How can I configure Weights & Biases for YOLO11 training? +model = YOLO("yolo11n.pt") +model.train(data="coco8.yaml", epochs=5, project="ultralytics", name="yolo11n") +``` -To configure Weights & Biases for YOLO11 training, follow these steps: +This will automatically log metrics, hyperparameters, and model artifacts to your W&B project. -1. Run the command to initialize Weights & Biases: - ```bash - import wandb - wandb.login() - ``` -2. Retrieve your API key from the Weights & Biases website. -3. Use the API key to authenticate your development environment. +### What are the key features of Weights & Biases integration with YOLO11? -Detailed setup instructions can be found in the Configuring Weights & Biases section above. +The key features include: -### How do I train a YOLO11 model using Weights & Biases? +- Real-time metrics tracking during training +- Hyperparameter optimization tools +- Comparative analysis of different training runs +- Visualization of training progress through graphs +- Resource monitoring (CPU, GPU, memory usage) +- Model artifacts management and sharing +- Viewing inference results with image overlays -For training a YOLO11 model using Weights & Biases, use the following steps in a Python script: +These features help in tracking experiments, optimizing models, and collaborating more effectively on YOLO11 projects. -```python -import wandb -from wandb.integration.ultralytics import add_wandb_callback +### How can I view the Weights & Biases dashboard for my YOLO11 training? -from ultralytics import YOLO +After running your training script with W&B integration: -# Initialize a Weights & Biases run -wandb.init(project="ultralytics", job_type="training") +1. A link to your W&B dashboard will be provided in the console output. +2. Click on the link or go to [wandb.ai](https://wandb.ai) and log in to your account. +3. Navigate to your project to view detailed metrics, visualizations, and model performance data. -# Load a YOLO model -model = YOLO("yolo11n.pt") +The dashboard offers insights into your model's training process, allowing you to analyze and improve your YOLO11 models effectively. -# Add W&B Callback for Ultralytics -add_wandb_callback(model, enable_model_checkpointing=True) +### Can I disable Weights & Biases logging for YOLO11 training? -# Train and Fine-Tune the Model -model.train(project="ultralytics", data="coco8.yaml", epochs=5, imgsz=640) +Yes, you can disable W&B logging using the following command: -# Validate the Model -model.val() +```bash +wandb disabled +``` -# Perform Inference and Log Results -model(["path/to/image1", "path/to/image2"]) +To re-enable logging, use: -# Finalize the W&B Run -wandb.finish() +```bash +wandb enabled ``` -This script initializes Weights & Biases, sets up the model, trains it, and logs results. For more details, visit the Usage section above. +This allows you to control when you want to use W&B logging without modifying your training scripts. -### Why should I use Ultralytics YOLO11 with Weights & Biases over other platforms? +### How does Weights & Biases help in optimizing YOLO11 models? -Ultralytics YOLO11 integrated with Weights & Biases offers several unique advantages: +Weights & Biases helps optimize YOLO11 models by: -- **High Efficiency:** Real-time tracking of training metrics and performance optimization. -- **Scalability:** Easily manage large-scale training jobs with robust resource monitoring and utilization tools. -- **Interactivity:** A user-friendly interactive UI for [data visualization](https://www.ultralytics.com/glossary/data-visualization) and model management. -- **Community and Support:** Strong integration documentation and community support with flexible customization and enhancement options. +1. Providing detailed visualizations of training metrics +2. Enabling easy comparison between different model versions +3. Offering tools for [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) +4. Allowing for collaborative analysis of model performance +5. Facilitating easy sharing of model artifacts and results -For comparisons with other platforms like Comet and ClearML, refer to [Ultralytics integrations](../integrations/index.md). +These features help researchers and developers iterate faster and make data-driven decisions to improve their YOLO11 models. diff --git a/docs/en/macros/predict-args.md b/docs/en/macros/predict-args.md index 2bb669eb7b..35c285afe0 100644 --- a/docs/en/macros/predict-args.md +++ b/docs/en/macros/predict-args.md @@ -1,17 +1,17 @@ -| Argument | Type | Default | Description | -| --------------- | -------------- | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across [different types of input](/modes/predict.md/#inference-sources). | -| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | -| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | -| `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | -| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | -| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. | -| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | -| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | -| `stream_buffer` | `bool` | `False` | Determines the frame processing strategy for video streams. If `False` processing only the most recent frame, minimizing latency (optimized for real-time applications). If `True' processes all frames in order, ensuring no frames are skipped. | -| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. | -| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. | -| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. | -| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. | -| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. | -| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. | +| Argument | Type | Default | Description | +| --------------- | -------------- | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across [different types of input](/modes/predict.md/#inference-sources). | +| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | +| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | +| `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | +| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | +| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. | +| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | +| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | +| `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accomodate new frames (optimized for real-time applications). If `True', queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. | +| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. | +| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. | +| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. | +| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. | +| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. | +| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. | diff --git a/docs/en/models/index.md b/docs/en/models/index.md index baa5c9b260..5e9d07f3d5 100644 --- a/docs/en/models/index.md +++ b/docs/en/models/index.md @@ -8,6 +8,8 @@ keywords: Ultralytics, supported models, YOLOv3, YOLOv4, YOLOv5, YOLOv6, YOLOv7, Welcome to Ultralytics' model documentation! We offer support for a wide range of models, each tailored to specific tasks like [object detection](../tasks/detect.md), [instance segmentation](../tasks/segment.md), [image classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and [multi-object tracking](../modes/track.md). If you're interested in contributing your model architecture to Ultralytics, check out our [Contributing Guide](../help/contributing.md). +![Ultralytics YOLO11 Comparison Plots](https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845) + ## Featured Models Here are some of the key models supported: @@ -20,7 +22,7 @@ Here are some of the key models supported: 6. **[YOLOv8](yolov8.md)**: The latest version of the YOLO family, featuring enhanced capabilities such as [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), pose/keypoints estimation, and classification. 7. **[YOLOv9](yolov9.md)**: An experimental model trained on the Ultralytics [YOLOv5](yolov5.md) codebase implementing Programmable Gradient Information (PGI). 8. **[YOLOv10](yolov10.md)**: By Tsinghua University, featuring NMS-free training and efficiency-accuracy driven architecture, delivering state-of-the-art performance and latency. -9. **[YOLO11](yolo11.md) NEW 🚀**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks. +9. **[YOLO11](yolo11.md) 🚀 NEW**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks. 10. **[Segment Anything Model (SAM)](sam.md)**: Meta's original Segment Anything Model (SAM). 11. **[Segment Anything Model 2 (SAM2)](sam-2.md)**: The next generation of Meta's Segment Anything Model (SAM) for videos and images. 12. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM for mobile applications, by Kyung Hee University. diff --git a/docs/en/models/mobile-sam.md b/docs/en/models/mobile-sam.md index 0d7df2a2ca..0529db6603 100644 --- a/docs/en/models/mobile-sam.md +++ b/docs/en/models/mobile-sam.md @@ -90,8 +90,17 @@ You can download the model [here](https://github.com/ChaoningZhang/MobileSAM/blo # Load the model model = SAM("mobile_sam.pt") - # Predict a segment based on a point prompt + # Predict a segment based on a single point prompt model.predict("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + + # Predict multiple segments based on multiple points prompt + model.predict("ultralytics/assets/zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Predict a segment based on multiple points prompt per object + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Predict a segment using both positive and negative prompts. + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` ### Box Prompt @@ -106,8 +115,17 @@ You can download the model [here](https://github.com/ChaoningZhang/MobileSAM/blo # Load the model model = SAM("mobile_sam.pt") - # Predict a segment based on a box prompt - model.predict("ultralytics/assets/zidane.jpg", bboxes=[439, 437, 524, 709]) + # Predict a segment based on a single point prompt + model.predict("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + + # Predict mutiple segments based on multiple points prompt + model.predict("ultralytics/assets/zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Predict a segment based on multiple points prompt per object + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Predict a segment using both positive and negative prompts. + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` We have implemented `MobileSAM` and `SAM` using the same API. For more usage information, please see the [SAM page](sam.md). diff --git a/docs/en/models/sam.md b/docs/en/models/sam.md index 304fc00287..1a5c0db4a7 100644 --- a/docs/en/models/sam.md +++ b/docs/en/models/sam.md @@ -58,8 +58,17 @@ The Segment Anything Model can be employed for a multitude of downstream tasks t # Run inference with bboxes prompt results = model("ultralytics/assets/zidane.jpg", bboxes=[439, 437, 524, 709]) - # Run inference with points prompt - results = model("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + # Run inference with single point + results = predictor(points=[900, 370], labels=[1]) + + # Run inference with multiple points + results = predictor(points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Run inference with multiple points prompt per object + results = predictor(points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Run inference with negative points prompt + results = predictor(points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` !!! example "Segment everything" @@ -107,8 +116,16 @@ The Segment Anything Model can be employed for a multitude of downstream tasks t predictor.set_image("ultralytics/assets/zidane.jpg") # set with image file predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # set with np.ndarray results = predictor(bboxes=[439, 437, 524, 709]) + + # Run inference with single point prompt results = predictor(points=[900, 370], labels=[1]) + # Run inference with multiple points prompt + results = predictor(points=[[400, 370], [900, 370]], labels=[[1, 1]]) + + # Run inference with negative points prompt + results = predictor(points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) + # Reset image predictor.reset_image() ``` @@ -245,6 +262,15 @@ model("ultralytics/assets/zidane.jpg", bboxes=[439, 437, 524, 709]) # Segment with points prompt model("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + +# Segment with multiple points prompt +model("ultralytics/assets/zidane.jpg", points=[[400, 370], [900, 370]], labels=[[1, 1]]) + +# Segment with multiple points prompt per object +model("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + +# Segment with negative points prompt. +model("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` Alternatively, you can run inference with SAM in the command line interface (CLI): diff --git a/docs/en/models/yolo11.md b/docs/en/models/yolo11.md index dbb8318fe1..0c755147ab 100644 --- a/docs/en/models/yolo11.md +++ b/docs/en/models/yolo11.md @@ -8,19 +8,19 @@ keywords: YOLO11, state-of-the-art object detection, YOLO series, Ultralytics, c ## Overview -YOLO11 is the latest iteration in the [Ultralytics](https://www.ultralytics.com) YOLO series of real-time object detectors, redefining what's possible with cutting-edge [accuracy](https://www.ultralytics.com/glossary/accuracy), speed, and efficiency. Building upon the impressive advancements of previous YOLO versions, YOLO11 introduces significant improvements in architecture and training methods, making it a versatile choice for a wide range of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. +YOLO11 is the latest iteration in the [Ultralytics](https://www.ultralytics.com/) YOLO series of real-time object detectors, redefining what's possible with cutting-edge [accuracy](https://www.ultralytics.com/glossary/accuracy), speed, and efficiency. Building upon the impressive advancements of previous YOLO versions, YOLO11 introduces significant improvements in architecture and training methods, making it a versatile choice for a wide range of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. ![Ultralytics YOLO11 Comparison Plots](https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845)


-
- Watch: Ultralytics YOLO11 Announcement at YOLO Vision 2024 + Watch: How to Use Ultralytics YOLO11 for Object Detection and Tracking | How to Benchmark | YOLO11 RELEASED🚀

## Key Features diff --git a/docs/en/modes/predict.md b/docs/en/modes/predict.md index cb8ca25e7e..a298294d59 100644 --- a/docs/en/modes/predict.md +++ b/docs/en/modes/predict.md @@ -120,6 +120,7 @@ YOLO11 can process different types of input sources for inference, as shown in t | YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL to a YouTube video. | | stream ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL for streaming protocols such as RTSP, RTMP, TCP, or an IP address. | | multi-stream ✅ | `'list.streams'` | `str` or `Path` | `*.streams` text file with one stream URL per row, i.e. 8 streams will run at batch-size 8. | +| webcam ✅ | `0` | `int` | Index of the connected camera device to run inference on. | Below are code examples for using each source type: @@ -376,6 +377,20 @@ Below are code examples for using each source type: Each row in the file represents a streaming source, allowing you to monitor and perform inference on several video streams at once. + === "Webcam" + + You can run inference on a connected camera device by passing the index of that particular camera to `source`. + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") + + # Run inference on the source + results = model(source=0, stream=True) # generator of Results objects + ``` + ## Inference Arguments `model.predict()` accepts multiple arguments that can be passed at inference time to override defaults: @@ -408,6 +423,10 @@ YOLO11 supports various image and video formats, as specified in [ultralytics/da The below table contains valid Ultralytics image formats. +!!! note + + HEIC images are supported for inference only, not for training. + | Image Suffixes | Example Predict Command | Reference | | -------------- | -------------------------------- | -------------------------------------------------------------------------- | | `.bmp` | `yolo predict source=image.bmp` | [Microsoft BMP File Format](https://en.wikipedia.org/wiki/BMP_file_format) | @@ -420,6 +439,7 @@ The below table contains valid Ultralytics image formats. | `.tiff` | `yolo predict source=image.tiff` | [Tag Image File Format](https://en.wikipedia.org/wiki/TIFF) | | `.webp` | `yolo predict source=image.webp` | [WebP](https://en.wikipedia.org/wiki/WebP) | | `.pfm` | `yolo predict source=image.pfm` | [Portable FloatMap](https://en.wikipedia.org/wiki/Netpbm#File_formats) | +| `.HEIC` | `yolo predict source=image.HEIC` | [High Efficiency Image Format](https://en.wikipedia.org/wiki/HEIF) | ### Videos diff --git a/docs/en/reference/cfg/__init__.md b/docs/en/reference/cfg/__init__.md index 5997b37f2b..69652aa06c 100644 --- a/docs/en/reference/cfg/__init__.md +++ b/docs/en/reference/cfg/__init__.md @@ -47,10 +47,6 @@ keywords: Ultralytics, YOLO, configuration, cfg2dict, get_cfg, check_cfg, save_d



-## ::: ultralytics.cfg.handle_explorer - -



- ## ::: ultralytics.cfg.handle_streamlit_inference



diff --git a/docs/en/reference/data/converter.md b/docs/en/reference/data/converter.md index d4ba3d58d5..073c760e53 100644 --- a/docs/en/reference/data/converter.md +++ b/docs/en/reference/data/converter.md @@ -41,4 +41,8 @@ keywords: Ultralytics, data conversion, YOLO models, COCO, DOTA, YOLO bbox2segme ## ::: ultralytics.data.converter.yolo_bbox2segment +



+ +## ::: ultralytics.data.converter.create_synthetic_coco_dataset +

diff --git a/docs/en/reference/data/explorer/explorer.md b/docs/en/reference/data/explorer/explorer.md deleted file mode 100644 index 22aa6d0620..0000000000 --- a/docs/en/reference/data/explorer/explorer.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -comments: true -description: Explore the Ultralytics data explorer functions including YOLO dataset handling, image querying, embedding generation, and similarity indexing. -keywords: Ultralytics, YOLO, data explorer, image querying, embeddings, similarity index, python, machine learning ---- - -# Reference for `ultralytics/data/explorer/explorer.py` - -!!! note - - This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/explorer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/explorer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/explorer.py) 🛠️. Thank you 🙏! - -
- -## ::: ultralytics.data.explorer.explorer.ExplorerDataset - -



- -## ::: ultralytics.data.explorer.explorer.Explorer - -

diff --git a/docs/en/reference/data/explorer/gui/dash.md b/docs/en/reference/data/explorer/gui/dash.md deleted file mode 100644 index b2e5120364..0000000000 --- a/docs/en/reference/data/explorer/gui/dash.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -comments: true -description: Explore the functionalities of Ultralytics Explorer with our comprehensive GUI dash documentation. -keywords: Ultralytics, Explorer, GUI, dash, documentation, data explorer, AI query, SQL query, image similarity ---- - -# Reference for `ultralytics/data/explorer/gui/dash.py` - -!!! note - - This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/gui/dash.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/gui/dash.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/gui/dash.py) 🛠️. Thank you 🙏! - -
- -## ::: ultralytics.data.explorer.gui.dash._get_explorer - -



- -## ::: ultralytics.data.explorer.gui.dash.init_explorer_form - -



- -## ::: ultralytics.data.explorer.gui.dash.query_form - -



- -## ::: ultralytics.data.explorer.gui.dash.ai_query_form - -



- -## ::: ultralytics.data.explorer.gui.dash.find_similar_imgs - -



- -## ::: ultralytics.data.explorer.gui.dash.similarity_form - -



- -## ::: ultralytics.data.explorer.gui.dash.run_sql_query - -



- -## ::: ultralytics.data.explorer.gui.dash.run_ai_query - -



- -## ::: ultralytics.data.explorer.gui.dash.reset_explorer - -



- -## ::: ultralytics.data.explorer.gui.dash.utralytics_explorer_docs_callback - -



- -## ::: ultralytics.data.explorer.gui.dash.layout - -

diff --git a/docs/en/reference/data/explorer/utils.md b/docs/en/reference/data/explorer/utils.md deleted file mode 100644 index 9a953a0665..0000000000 --- a/docs/en/reference/data/explorer/utils.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -comments: true -description: Explore various utility functions in ultralytics.data.explorer.utils including schema definitions, batch sanitization, and query results plotting. -keywords: Ultralytics, data explorer, utils, schema, sanitize batch, plot query results, SQL query, machine learning ---- - -# Reference for `ultralytics/data/explorer/utils.py` - -!!! note - - This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/utils.py) 🛠️. Thank you 🙏! - -
- -## ::: ultralytics.data.explorer.utils.get_table_schema - -



- -## ::: ultralytics.data.explorer.utils.get_sim_index_schema - -



- -## ::: ultralytics.data.explorer.utils.sanitize_batch - -



- -## ::: ultralytics.data.explorer.utils.plot_query_result - -



- -## ::: ultralytics.data.explorer.utils.prompt_sql_query - -

diff --git a/docs/en/reference/solutions/solutions.md b/docs/en/reference/solutions/solutions.md new file mode 100644 index 0000000000..727a5fa752 --- /dev/null +++ b/docs/en/reference/solutions/solutions.md @@ -0,0 +1,16 @@ +--- +description: Explore the Ultralytics Solution Base class for real-time object counting,virtual gym, heatmaps, speed estimation using Ultralytics YOLO. Learn to implement Ultralytics solutions effectively. +keywords: Ultralytics, Solutions, Object counting, Speed Estimation, Heatmaps, Queue Management, AI Gym, YOLO, pose detection, gym step counting, real-time pose estimation, Python +--- + +# Reference for `ultralytics/solutions/solutions.py` + +!!! note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/solutions.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/solutions.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/solutions.py) 🛠️. Thank you 🙏! + +
+ +## ::: ultralytics.solutions.solutions.BaseSolution + +

diff --git a/docs/en/reference/utils/torch_utils.md b/docs/en/reference/utils/torch_utils.md index 4f8f3d1b9c..ac31ec2c33 100644 --- a/docs/en/reference/utils/torch_utils.md +++ b/docs/en/reference/utils/torch_utils.md @@ -35,6 +35,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere



+## ::: ultralytics.utils.torch_utils.get_gpu_info + +



+ ## ::: ultralytics.utils.torch_utils.select_device



diff --git a/docs/en/tasks/obb.md b/docs/en/tasks/obb.md index 7554dc2b5e..35e659ed47 100644 --- a/docs/en/tasks/obb.md +++ b/docs/en/tasks/obb.md @@ -49,7 +49,7 @@ YOLO11 pretrained OBB models are shown here, which are pretrained on the [DOTAv1 ## Train -Train YOLO11n-obb on the `dota8.yaml` dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. +Train YOLO11n-obb on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. !!! example diff --git a/docs/en/tasks/pose.md b/docs/en/tasks/pose.md index 5fa566029c..0523239fc5 100644 --- a/docs/en/tasks/pose.md +++ b/docs/en/tasks/pose.md @@ -73,7 +73,7 @@ YOLO11 pretrained Pose models are shown here. Detect, Segment and Pose models ar ## Train -Train a YOLO11-pose model on the COCO128-pose dataset. +Train a YOLO11-pose model on the COCO8-pose dataset. !!! example @@ -110,7 +110,7 @@ YOLO pose dataset format can be found in detail in the [Dataset Guide](../datase ## Val -Validate trained YOLO11n-pose model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO128-pose dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n-pose model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8-pose dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md index e6ad5add3a..c422c6fd62 100644 --- a/docs/en/tasks/segment.md +++ b/docs/en/tasks/segment.md @@ -41,7 +41,7 @@ YOLO11 pretrained Segment models are shown here. Detect, Segment and Pose models ## Train -Train YOLO11n-seg on the COCO128-seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. +Train YOLO11n-seg on the COCO8-seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. !!! example @@ -78,7 +78,7 @@ YOLO segmentation dataset format can be found in detail in the [Dataset Guide](. ## Val -Validate trained YOLO11n-seg model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO128-seg dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n-seg model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8-seg dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example diff --git a/docs/en/usage/cfg.md b/docs/en/usage/cfg.md index 8f8ac6025f..a7fef9e2ac 100644 --- a/docs/en/usage/cfg.md +++ b/docs/en/usage/cfg.md @@ -41,8 +41,8 @@ Ultralytics commands use the following syntax: Where: -- `TASK` (optional) is one of ([detect](../tasks/detect.md), [segment](../tasks/segment.md), [classify](../tasks/classify.md), [pose](../tasks/pose.md)) -- `MODE` (required) is one of ([train](../modes/train.md), [val](../modes/val.md), [predict](../modes/predict.md), [export](../modes/export.md), [track](../modes/track.md)) +- `TASK` (optional) is one of ([detect](../tasks/detect.md), [segment](../tasks/segment.md), [classify](../tasks/classify.md), [pose](../tasks/pose.md), [obb](../tasks/obb.md)) +- `MODE` (required) is one of ([train](../modes/train.md), [val](../modes/val.md), [predict](../modes/predict.md), [export](../modes/export.md), [track](../modes/track.md), [benchmark](../modes/benchmark.md)) - `ARGS` (optional) are `arg=value` pairs like `imgsz=640` that override defaults. Default `ARG` values are defined on this page from the `cfg/defaults.yaml` [file](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml). @@ -59,7 +59,7 @@ YOLO models can be used for a variety of tasks, including detection, segmentatio | Argument | Default | Description | | -------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `task` | `'detect'` | Specifies the YOLO task to be executed. Options include `detect` for [object detection](https://www.ultralytics.com/glossary/object-detection), `segment` for segmentation, `classify` for classification, `pose` for pose estimation and `OBB` for oriented bounding boxes. Each task is tailored to specific types of output and problems within image and video analysis. | +| `task` | `'detect'` | Specifies the YOLO task to be executed. Options include `detect` for [object detection](https://www.ultralytics.com/glossary/object-detection), `segment` for segmentation, `classify` for classification, `pose` for pose estimation and `obb` for oriented bounding boxes. Each task is tailored to specific types of output and problems within image and video analysis. | [Tasks Guide](../tasks/index.md){ .md-button } diff --git a/docs/en/usage/python.md b/docs/en/usage/python.md index af0546f434..177f5d45b0 100644 --- a/docs/en/usage/python.md +++ b/docs/en/usage/python.md @@ -51,7 +51,7 @@ Train mode is used for training a YOLO11 model on a custom dataset. In this mode !!! example "Train" - === "From pretrained(recommended)" + === "From pretrained (recommended)" ```python from ultralytics import YOLO @@ -256,50 +256,6 @@ Benchmark mode is used to profile the speed and accuracy of various export forma [Benchmark Examples](../modes/benchmark.md){ .md-button } -## Explorer - -Explorer API can be used to explore datasets with advanced semantic, vector-similarity and SQL search among other features. It also enabled searching for images based on their content using natural language by utilizing the power of LLMs. The Explorer API allows you to write your own dataset exploration notebooks or scripts to get insights into your datasets. - -!!! example "Semantic Search Using Explorer" - - === "Using Images" - - ```python - from ultralytics import Explorer - - # create an Explorer object - exp = Explorer(data="coco8.yaml", model="yolo11n.pt") - exp.create_embeddings_table() - - similar = exp.get_similar(img="https://ultralytics.com/images/bus.jpg", limit=10) - print(similar.head()) - - # Search using multiple indices - similar = exp.get_similar( - img=["https://ultralytics.com/images/bus.jpg", "https://ultralytics.com/images/bus.jpg"], limit=10 - ) - print(similar.head()) - ``` - - === "Using Dataset Indices" - - ```python - from ultralytics import Explorer - - # create an Explorer object - exp = Explorer(data="coco8.yaml", model="yolo11n.pt") - exp.create_embeddings_table() - - similar = exp.get_similar(idx=1, limit=10) - print(similar.head()) - - # Search using multiple indices - similar = exp.get_similar(idx=[1, 10], limit=10) - print(similar.head()) - ``` - -[Explorer](../datasets/explorer/index.md){ .md-button } - ## Using Trainers `YOLO` model class is a high-level wrapper on the Trainer classes. Each YOLO task has its own trainer that inherits from `BaseTrainer`. diff --git a/docs/en/usage/simple-utilities.md b/docs/en/usage/simple-utilities.md index 0a947adaf1..45d3dc66c3 100644 --- a/docs/en/usage/simple-utilities.md +++ b/docs/en/usage/simple-utilities.md @@ -25,10 +25,6 @@ The `ultralytics` package comes with a myriad of utilities that can support, enh ## Data -### YOLO Data Explorer - -[YOLO Explorer](../datasets/explorer/index.md) was added in the `8.1.0` anniversary update and is a powerful tool you can use to better understand your dataset. One of the key functions that YOLO Explorer provides, is the ability to use text queries to find object instances in your dataset. - ### Auto Labeling / Annotations Dataset annotation is a very resource intensive and time-consuming process. If you have a YOLO [object detection](https://www.ultralytics.com/glossary/object-detection) model trained on a reasonable amount of data, you can use it and [SAM](../models/sam.md) to auto-annotate additional data (segmentation format). diff --git a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md index 55b92316dc..023f24c505 100644 --- a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md +++ b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md @@ -12,7 +12,7 @@ You can also explore other quickstart options for YOLOv5, such as our [Colab Not ## Prerequisites -1. **NVIDIA Driver**: Version 455.23 or higher. Download from [Nvidia's website](https://www.nvidia.com/Download/index.aspx). +1. **NVIDIA Driver**: Version 455.23 or higher. Download from [NVIDIA's website](https://www.nvidia.com/Download/index.aspx). 2. **NVIDIA-Docker**: Allows Docker to interact with your local GPU. Installation instructions are available on the [NVIDIA-Docker GitHub repository](https://github.com/NVIDIA/nvidia-docker). 3. **Docker Engine - CE**: Version 19.03 or higher. Download and installation instructions can be found on the [Docker website](https://docs.docker.com/get-started/get-docker/). diff --git a/docs/en/yolov5/index.md b/docs/en/yolov5/index.md index 3605058463..17be5e24a0 100644 --- a/docs/en/yolov5/index.md +++ b/docs/en/yolov5/index.md @@ -8,7 +8,7 @@ keywords: YOLOv5, Ultralytics, object detection, computer vision, deep learning,

- + Ultralytics YOLOv5 v7.0 banner

@@ -80,7 +80,7 @@ This badge indicates that all [YOLOv5 GitHub Actions](https://github.com/ultraly space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
## Connect and Contribute diff --git a/docs/en/yolov5/tutorials/train_custom_data.md b/docs/en/yolov5/tutorials/train_custom_data.md index aa093e4b81..8b465c5239 100644 --- a/docs/en/yolov5/tutorials/train_custom_data.md +++ b/docs/en/yolov5/tutorials/train_custom_data.md @@ -18,7 +18,7 @@ pip install -r requirements.txt # install ## Train On Custom Data - + Ultralytics active learning

diff --git a/docs/mkdocs_github_authors.yaml b/docs/mkdocs_github_authors.yaml index 4f7f3b3a38..0e0423c248 100644 --- a/docs/mkdocs_github_authors.yaml +++ b/docs/mkdocs_github_authors.yaml @@ -1,3 +1,6 @@ +107626595+pderrenger@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/107626595?v=4 + username: pderrenger 116908874+jk4e@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/116908874?v=4 username: jk4e @@ -109,6 +112,9 @@ lakshantha@ultralytics.com: lakshanthad@yahoo.com: avatar: https://avatars.githubusercontent.com/u/20147381?v=4 username: lakshanthad +makei05@outlook.de: + avatar: https://avatars.githubusercontent.com/u/78843978?v=4 + username: Skillnoob matthewnoyce@icloud.com: avatar: https://avatars.githubusercontent.com/u/131261051?v=4 username: MatthewNoyce diff --git a/docs/overrides/javascript/extra.js b/docs/overrides/javascript/extra.js index 3233a64411..b106acdfe0 100644 --- a/docs/overrides/javascript/extra.js +++ b/docs/overrides/javascript/extra.js @@ -67,3 +67,86 @@ window.onhashchange = function() { hash: window.location.pathname + window.location.search + window.location.hash }, '*'); }; + +// Add Inkeep button +document.addEventListener("DOMContentLoaded", () => { + const inkeepScript = document.createElement("script"); + inkeepScript.src = "https://unpkg.com/@inkeep/uikit-js@0.3.11/dist/embed.js"; + inkeepScript.type = "module"; + inkeepScript.defer = true; + document.head.appendChild(inkeepScript); + + // Configure and initialize the widget + const addInkeepWidget = () => { + const inkeepWidget = Inkeep().embed({ + componentType: "ChatButton", + colorModeSync: { + observedElement: document.documentElement, + isDarkModeCallback: (el) => { + const currentTheme = el.getAttribute("data-color-mode"); + return currentTheme === "dark"; + }, + colorModeAttribute: "data-color-mode", + }, + properties: { + chatButtonType: "PILL", + fixedPositionXOffset: "1rem", + fixedPositionYOffset: "3rem", + chatButtonBgColor: "#E1FF25", + baseSettings: { + apiKey: "13dfec2e75982bc9bae3199a08e13b86b5fbacd64e9b2f89", + integrationId: "cm1shscmm00y26sj83lgxzvkw", + organizationId: "org_e3869az6hQZ0mXdF", + primaryBrandColor: "#E1FF25", + organizationDisplayName: "Ultralytics", + theme: { + stylesheetUrls: ["/stylesheets/style.css"], + }, + // ...optional settings + }, + modalSettings: { + // optional settings + }, + searchSettings: { + // optional settings + }, + aiChatSettings: { + chatSubjectName: "Ultralytics", + botAvatarSrcUrl: "https://storage.googleapis.com/organization-image-assets/ultralytics-botAvatarSrcUrl-1727908259285.png", + botAvatarDarkSrcUrl: "https://storage.googleapis.com/organization-image-assets/ultralytics-botAvatarDarkSrcUrl-1727908258478.png", + quickQuestions: [ + "What's new in Ultralytics YOLO11?", + "How can I get started with Ultralytics HUB?", + "How does Ultralytics Enterprise Licensing work?" + ], + getHelpCallToActions: [ + { + name: "Ask on Ultralytics GitHub", + url: "https://github.com/ultralytics/ultralytics", + icon: { + builtIn: "FaGithub" + } + }, + { + name: "Ask on Ultralytics Discourse", + url: "https://community.ultralytics.com/", + icon: { + builtIn: "FaDiscourse" + } + }, + { + name: "Ask on Ultralytics Discord", + url: "https://discord.com/invite/ultralytics", + icon: { + builtIn: "FaDiscord" + } + } + ], + }, + }, + }); + }; + inkeepScript.addEventListener("load", () => { + addInkeepWidget(); // initialize the widget + }); +}); diff --git a/docs/overrides/stylesheets/style.css b/docs/overrides/stylesheets/style.css index a9a89d9013..a5bdcc56ab 100644 --- a/docs/overrides/stylesheets/style.css +++ b/docs/overrides/stylesheets/style.css @@ -264,3 +264,9 @@ div.highlight { } } /* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */ + +/* Inkeep button font color ----------------------------------------------------------------------------------------- */ +.ikp-floating-button { + color: #111f68; +} +/* Inkeep button ---------------------------------------------------------------------------------------------------- */ diff --git a/examples/YOLOv8-Action-Recognition/action_recognition.py b/examples/YOLOv8-Action-Recognition/action_recognition.py index aad74375a5..0853981ed4 100644 --- a/examples/YOLOv8-Action-Recognition/action_recognition.py +++ b/examples/YOLOv8-Action-Recognition/action_recognition.py @@ -263,7 +263,7 @@ def crop_and_pad(frame, box, margin_percent): def run( - weights: str = "yolov8n.pt", + weights: str = "yolo11n.pt", device: str = "", source: str = "https://www.youtube.com/watch?v=dQw4w9WgXcQ", output_path: Optional[str] = None, @@ -279,7 +279,7 @@ def run( Run action recognition on a video source using YOLO for object detection and a video classifier. Args: - weights (str): Path to the YOLO model weights. Defaults to "yolov8n.pt". + weights (str): Path to the YOLO model weights. Defaults to "yolo11n.pt". device (str): Device to run the model on. Use 'cuda' for NVIDIA GPU, 'mps' for Apple Silicon, or 'cpu'. Defaults to auto-detection. source (str): Path to mp4 video file or YouTube URL. Defaults to a sample YouTube video. output_path (Optional[str], optional): Path to save the output video. Defaults to None. @@ -421,7 +421,7 @@ def run( def parse_opt(): """Parse command line arguments.""" parser = argparse.ArgumentParser() - parser.add_argument("--weights", type=str, default="yolov8n.pt", help="ultralytics detector model path") + parser.add_argument("--weights", type=str, default="yolo11n.pt", help="ultralytics detector model path") parser.add_argument("--device", default="", help='cuda device, i.e. 0 or 0,1,2,3 or cpu/mps, "" for auto-detection') parser.add_argument( "--source", diff --git a/mkdocs.yml b/mkdocs.yml index f3e7d9d106..ee1a83766c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -162,9 +162,7 @@ nav: - solutions/index.md - Guides: - guides/index.md - - Explorer: - - datasets/explorer/index.md - - NEW 🚀 Live Inference: guides/streamlit-live-inference.md # for promotion of new pages + - Live Inference 🚀 NEW: guides/streamlit-live-inference.md # for promotion of new pages - Languages: - 🇬🇧  English: https://ultralytics.com/docs/ - 🇨🇳  简体中文: https://docs.ultralytics.com/zh/ @@ -251,7 +249,7 @@ nav: - YOLOv8: models/yolov8.md - YOLOv9: models/yolov9.md - YOLOv10: models/yolov10.md - - NEW 🚀 YOLO11: models/yolo11.md + - YOLO11 🚀 NEW: models/yolo11.md - SAM (Segment Anything Model): models/sam.md - SAM 2 (Segment Anything Model 2): models/sam-2.md - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md @@ -261,11 +259,6 @@ nav: - YOLO-World (Real-Time Open-Vocabulary Object Detection): models/yolo-world.md - Datasets: - datasets/index.md - - Explorer: - - datasets/explorer/index.md - - Explorer API: datasets/explorer/api.md - - Explorer Dashboard: datasets/explorer/dashboard.md - - VOC Exploration Example: datasets/explorer/explorer.ipynb - Detection: - datasets/detect/index.md - Argoverse: datasets/detect/argoverse.md @@ -314,7 +307,7 @@ nav: - DOTA8: datasets/obb/dota8.md - Multi-Object Tracking: - datasets/track/index.md - - NEW 🚀 Solutions: + - Solutions 🚀 NEW: - solutions/index.md - Analytics: guides/analytics.md - Object Counting: guides/object-counting.md @@ -330,7 +323,7 @@ nav: - Distance Calculation: guides/distance-calculation.md - Queue Management: guides/queue-management.md - Parking Management: guides/parking-management.md - - NEW 🚀 Live Inference: guides/streamlit-live-inference.md + - Live Inference 🚀 NEW: guides/streamlit-live-inference.md - Guides: - guides/index.md - YOLO Common Issues: guides/yolo-common-issues.md @@ -392,35 +385,35 @@ nav: - Clearml Logging: yolov5/tutorials/clearml_logging_integration.md - Integrations: - integrations/index.md - - TorchScript: integrations/torchscript.md + - Amazon SageMaker: integrations/amazon-sagemaker.md + - ClearML: integrations/clearml.md + - Comet ML: integrations/comet.md + - CoreML: integrations/coreml.md + - DVC: integrations/dvc.md + - Google Colab: integrations/google-colab.md + - Gradio: integrations/gradio.md + - IBM Watsonx: integrations/ibm-watsonx.md + - JupyterLab: integrations/jupyterlab.md + - Kaggle: integrations/kaggle.md + - MLflow: integrations/mlflow.md + - NCNN: integrations/ncnn.md + - Neural Magic: integrations/neural-magic.md - ONNX: integrations/onnx.md - OpenVINO: integrations/openvino.md - - TensorRT: integrations/tensorrt.md - - CoreML: integrations/coreml.md - - TF SavedModel: integrations/tf-savedmodel.md - - TF GraphDef: integrations/tf-graphdef.md - - TFLite: integrations/tflite.md - - TFLite Edge TPU: integrations/edge-tpu.md - - TF.js: integrations/tfjs.md - PaddlePaddle: integrations/paddlepaddle.md - - NCNN: integrations/ncnn.md - - Comet ML: integrations/comet.md + - Paperspace Gradient: integrations/paperspace.md - Ray Tune: integrations/ray-tune.md - Roboflow: integrations/roboflow.md - - MLflow: integrations/mlflow.md - - ClearML: integrations/clearml.md - - DVC: integrations/dvc.md - - Weights & Biases: integrations/weights-biases.md - - Neural Magic: integrations/neural-magic.md - - Gradio: integrations/gradio.md + - TF GraphDef: integrations/tf-graphdef.md + - TF SavedModel: integrations/tf-savedmodel.md + - TF.js: integrations/tfjs.md + - TFLite: integrations/tflite.md + - TFLite Edge TPU: integrations/edge-tpu.md - TensorBoard: integrations/tensorboard.md - - Amazon SageMaker: integrations/amazon-sagemaker.md - - Paperspace Gradient: integrations/paperspace.md - - Google Colab: integrations/google-colab.md - - Kaggle: integrations/kaggle.md - - JupyterLab: integrations/jupyterlab.md - - IBM Watsonx: integrations/ibm-watsonx.md + - TensorRT: integrations/tensorrt.md + - TorchScript: integrations/torchscript.md - VS Code: integrations/vscode.md + - Weights & Biases: integrations/weights-biases.md - HUB: - hub/index.md - Web: @@ -476,11 +469,6 @@ nav: - build: reference/data/build.md - converter: reference/data/converter.md - dataset: reference/data/dataset.md - - explorer: - - explorer: reference/data/explorer/explorer.md - - gui: - - dash: reference/data/explorer/gui/dash.md - - utils: reference/data/explorer/utils.md - loaders: reference/data/loaders.md - split_dota: reference/data/split_dota.md - utils: reference/data/utils.md @@ -761,3 +749,6 @@ plugins: yolov5/environments/yolov5_amazon_web_services_quickstart_tutorial.md: yolov5/environments/aws_quickstart_tutorial.md yolov5/environments/yolov5_google_cloud_platform_quickstart_tutorial.md: yolov5/environments/google_cloud_quickstart_tutorial.md yolov5/environments/yolov5_docker_image_quickstart_tutorial.md: yolov5/environments/docker_image_quickstart_tutorial.md + reference/data/explorer/explorer.md: datasets/explorer/index.md + reference/data/explorer/gui/dash.md: datasets/explorer/index.md + reference/data/explorer/utils.md: datasets/explorer/index.md diff --git a/pyproject.toml b/pyproject.toml index 20a28df2c5..3fb80e62af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ # For comprehensive documentation and usage instructions, visit: https://docs.ultralytics.com [build-system] -requires = ["setuptools>=57.0.0", "wheel"] +requires = ["setuptools>=70.0.0", "wheel"] build-backend = "setuptools.build_meta" # Project settings ----------------------------------------------------------------------------------------------------- @@ -34,7 +34,6 @@ keywords = ["machine-learning", "deep-learning", "computer-vision", "ML", "DL", authors = [ { name = "Glenn Jocher", email = "glenn.jocher@ultralytics.com" }, { name = "Jing Qiu", email = "jing.qiu@ultralytics.com" }, - { name = "Ayush Chaurasia" }, ] maintainers = [ { name = "Ultralytics", email = "hello@ultralytics.com" }, @@ -108,10 +107,9 @@ export = [ "numpy==1.23.5; platform_machine == 'aarch64'", # fix error: `np.bool` was a deprecated alias for the builtin `bool` when using TensorRT models on NVIDIA Jetson "h5py!=3.11.0; platform_machine == 'aarch64'", # fix h5py build issues due to missing aarch64 wheels in 3.11 release ] -explorer = [ - "lancedb", # vector search - "duckdb<=0.9.2", # SQL queries, duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181 - "streamlit", # visualizing with GUI +solutions = [ + "shapely>=2.0.0", # shapely for point and polygon data matching + "streamlit", # for live inference on web browser i.e `yolo streamlit-predict` ] logging = [ "comet", # https://docs.ultralytics.com/integrations/comet/ diff --git a/tests/test_cli.py b/tests/test_cli.py index 3eadf3c24e..05e06bd7aa 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -97,9 +97,12 @@ def test_mobilesam(): # Source source = ASSETS / "zidane.jpg" - # Predict a segment based on a point prompt + # Predict a segment based on a 1D point prompt and 1D labels. model.predict(source, points=[900, 370], labels=[1]) + # Predict a segment based on 3D points and 2D labels (multiple points per object). + model.predict(source, points=[[[900, 370], [1000, 100]]], labels=[[1, 1]]) + # Predict a segment based on a box prompt model.predict(source, bboxes=[439, 437, 524, 709], save=True) diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 3b08edc699..89f8c39b25 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -127,9 +127,21 @@ def test_predict_sam(): # Run inference with bboxes prompt model(SOURCE, bboxes=[439, 437, 524, 709], device=0) - # Run inference with points prompt + # Run inference with no labels + model(ASSETS / "zidane.jpg", points=[900, 370], device=0) + + # Run inference with 1D points and 1D labels model(ASSETS / "zidane.jpg", points=[900, 370], labels=[1], device=0) + # Run inference with 2D points and 1D labels + model(ASSETS / "zidane.jpg", points=[[900, 370]], labels=[1], device=0) + + # Run inference with multiple 2D points and 1D labels + model(ASSETS / "zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1], device=0) + + # Run inference with 3D points and 2D labels (multiple points per object) + model(ASSETS / "zidane.jpg", points=[[[900, 370], [1000, 100]]], labels=[[1, 1]], device=0) + # Create SAMPredictor overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model=WEIGHTS_DIR / "mobile_sam.pt") predictor = SAMPredictor(overrides=overrides) diff --git a/tests/test_explorer.py b/tests/test_explorer.py deleted file mode 100644 index 45b0a31e36..0000000000 --- a/tests/test_explorer.py +++ /dev/null @@ -1,66 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -import PIL -import pytest - -from ultralytics import Explorer -from ultralytics.utils import ASSETS -from ultralytics.utils.torch_utils import TORCH_1_13 - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_similarity(): - """Test the correctness and response length of similarity calculations and SQL queries in the Explorer.""" - exp = Explorer(data="coco8.yaml") - exp.create_embeddings_table() - similar = exp.get_similar(idx=1) - assert len(similar) == 4 - similar = exp.get_similar(img=ASSETS / "bus.jpg") - assert len(similar) == 4 - similar = exp.get_similar(idx=[1, 2], limit=2) - assert len(similar) == 2 - sim_idx = exp.similarity_index() - assert len(sim_idx) == 4 - sql = exp.sql_query("WHERE labels LIKE '%zebra%'") - assert len(sql) == 1 - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_det(): - """Test detection functionalities and verify embedding table includes bounding boxes.""" - exp = Explorer(data="coco8.yaml", model="yolo11n.pt") - exp.create_embeddings_table(force=True) - assert len(exp.table.head()["bboxes"]) > 0 - similar = exp.get_similar(idx=[1, 2], limit=10) - assert len(similar) > 0 - # This is a loose test, just checks errors not correctness - similar = exp.plot_similar(idx=[1, 2], limit=10) - assert isinstance(similar, PIL.Image.Image) - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_seg(): - """Test segmentation functionalities and ensure the embedding table includes segmentation masks.""" - exp = Explorer(data="coco8-seg.yaml", model="yolo11n-seg.pt") - exp.create_embeddings_table(force=True) - assert len(exp.table.head()["masks"]) > 0 - similar = exp.get_similar(idx=[1, 2], limit=10) - assert len(similar) > 0 - similar = exp.plot_similar(idx=[1, 2], limit=10) - assert isinstance(similar, PIL.Image.Image) - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_pose(): - """Test pose estimation functionality and verify the embedding table includes keypoints.""" - exp = Explorer(data="coco8-pose.yaml", model="yolo11n-pose.pt") - exp.create_embeddings_table(force=True) - assert len(exp.table.head()["keypoints"]) > 0 - similar = exp.get_similar(idx=[1, 2], limit=10) - assert len(similar) > 0 - similar = exp.plot_similar(idx=[1, 2], limit=10) - assert isinstance(similar, PIL.Image.Image) diff --git a/tests/test_solutions.py b/tests/test_solutions.py index fabec621d3..d3ba2d5fc2 100644 --- a/tests/test_solutions.py +++ b/tests/test_solutions.py @@ -14,25 +14,22 @@ WORKOUTS_SOLUTION_DEMO = "https://github.com/ultralytics/assets/releases/downloa def test_major_solutions(): """Test the object counting, heatmap, speed estimation and queue management solution.""" safe_download(url=MAJOR_SOLUTIONS_DEMO) - model = YOLO("yolo11n.pt") - names = model.names cap = cv2.VideoCapture("solutions_ci_demo.mp4") assert cap.isOpened(), "Error reading video file" region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] - counter = solutions.ObjectCounter(reg_pts=region_points, names=names, view_img=False) - heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, names=names, view_img=False) - speed = solutions.SpeedEstimator(reg_pts=region_points, names=names, view_img=False) - queue = solutions.QueueManager(names=names, reg_pts=region_points, view_img=False) + counter = solutions.ObjectCounter(region=region_points, model="yolo11n.pt", show=False) + heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False) + speed = solutions.SpeedEstimator(region=region_points, model="yolo11n.pt", show=False) + queue = solutions.QueueManager(region=region_points, model="yolo11n.pt", show=False) while cap.isOpened(): success, im0 = cap.read() if not success: break original_im0 = im0.copy() - tracks = model.track(im0, persist=True, show=False) - _ = counter.start_counting(original_im0.copy(), tracks) - _ = heatmap.generate_heatmap(original_im0.copy(), tracks) - _ = speed.estimate_speed(original_im0.copy(), tracks) - _ = queue.process_queue(original_im0.copy(), tracks) + _ = counter.count(original_im0.copy()) + _ = heatmap.generate_heatmap(original_im0.copy()) + _ = speed.estimate_speed(original_im0.copy()) + _ = queue.process_queue(original_im0.copy()) cap.release() cv2.destroyAllWindows() @@ -41,16 +38,14 @@ def test_major_solutions(): def test_aigym(): """Test the workouts monitoring solution.""" safe_download(url=WORKOUTS_SOLUTION_DEMO) - model = YOLO("yolo11n-pose.pt") cap = cv2.VideoCapture("solution_ci_pose_demo.mp4") assert cap.isOpened(), "Error reading video file" - gym_object = solutions.AIGym(line_thickness=2, pose_type="squat", kpts_to_check=[5, 11, 13]) + gym = solutions.AIGym(line_width=2, kpts=[5, 11, 13]) while cap.isOpened(): success, im0 = cap.read() if not success: break - results = model.track(im0, verbose=False) - _ = gym_object.start_counting(im0, results) + _ = gym.monitor(im0) cap.release() cv2.destroyAllWindows() diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 80b45c154b..5360c25e18 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,13 +1,13 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.3.4" +__version__ = "8.3.12" import os -# Set ENV Variables (place before imports) -os.environ["OMP_NUM_THREADS"] = "1" # reduce CPU utilization during training +# Set ENV variables (place before imports) +if not os.environ.get("OMP_NUM_THREADS"): + os.environ["OMP_NUM_THREADS"] = "1" # default for reduced CPU utilization during training -from ultralytics.data.explorer.explorer import Explorer from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld from ultralytics.utils import ASSETS, SETTINGS from ultralytics.utils.checks import check_yolo as checks @@ -26,5 +26,4 @@ __all__ = ( "checks", "download", "settings", - "Explorer", ) diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 7058c3d4a5..2eb7ff1c01 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -79,14 +79,11 @@ CLI_HELP_MSG = f""" 4. Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required) yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128 - - 5. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API - yolo explorer data=data.yaml model=yolo11n.pt - 6. Streamlit real-time webcam inference GUI + 5. Streamlit real-time webcam inference GUI yolo streamlit-predict - 7. Run special commands: + 6. Run special commands: yolo help yolo checks yolo version @@ -546,35 +543,6 @@ def handle_yolo_settings(args: List[str]) -> None: LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.") -def handle_explorer(args: List[str]): - """ - Launches a graphical user interface that provides tools for interacting with and analyzing datasets using the - Ultralytics Explorer API. It checks for the required 'streamlit' package and informs the user that the Explorer - dashboard is loading. - - Args: - args (List[str]): A list of optional command line arguments. - - Examples: - ```bash - yolo explorer data=data.yaml model=yolo11n.pt - ``` - - Notes: - - Requires 'streamlit' package version 1.29.0 or higher. - - The function does not take any arguments or return any values. - - It is typically called from the command line interface using the 'yolo explorer' command. - """ - checks.check_requirements("streamlit>=1.29.0") - LOGGER.info("💡 Loading Explorer dashboard...") - cmd = ["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"] - new = dict(parse_key_value_pair(a) for a in args) - check_dict_alignment(base={k: DEFAULT_CFG_DICT[k] for k in ["model", "data"]}, custom=new) - for k, v in new.items(): - cmd += [k, v] - subprocess.run(cmd) - - def handle_streamlit_inference(): """ Open the Ultralytics Live Inference Streamlit app for real-time object detection. @@ -669,9 +637,10 @@ def smart_value(v): elif v_lower == "false": return False else: - with contextlib.suppress(Exception): + try: return eval(v) - return v + except: # noqa E722 + return v def entrypoint(debug=""): @@ -714,7 +683,6 @@ def entrypoint(debug=""): "login": lambda: handle_yolo_hub(args), "logout": lambda: handle_yolo_hub(args), "copy-cfg": copy_default_cfg, - "explorer": lambda: handle_explorer(args[1:]), "streamlit-predict": lambda: handle_streamlit_inference(), } full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special} diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index da616d651e..7922f63592 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -1,7 +1,7 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license # Default training settings and hyperparameters for medium-augmentation COCO training -task: detect # (str) YOLO task, i.e. detect, segment, classify, pose +task: detect # (str) YOLO task, i.e. detect, segment, classify, pose, obb mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark # Train settings ------------------------------------------------------------------------------------------------------- diff --git a/ultralytics/cfg/solutions/default.yaml b/ultralytics/cfg/solutions/default.yaml new file mode 100644 index 0000000000..e4e1b845a0 --- /dev/null +++ b/ultralytics/cfg/solutions/default.yaml @@ -0,0 +1,17 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +# Configuration for Ultralytics Solutions + +model: "yolo11n.pt" # The Ultralytics YOLO11 model to be used (e.g., yolo11n.pt for YOLO11 nano version and yolov8n.pt for YOLOv8 nano version) + +region: # Object counting, queue or speed estimation region points. Default region points are [(20, 400), (1080, 404), (1080, 360), (20, 360)] +line_width: 2 # Width of the annotator used to draw regions on the image/video frames + bounding boxes and tracks drawing. Default value is 2. +show: True # Flag to control whether to display output image or not, you can set this as False i.e. when deploying it on some embedded devices. +show_in: True # Flag to display objects moving *into* the defined region +show_out: True # Flag to display objects moving *out of* the defined region +classes: # To count specific classes. i.e, if you want to detect, track and count the person with COCO model, you can use classes=0, Default its None +up_angle: 145.0 # Workouts up_angle for counts, 145.0 is default value. You can adjust it for different workouts, based on position of keypoints. +down_angle: 90 # Workouts down_angle for counts, 90 is default value. You can change it for different workouts, based on position of keypoints. +kpts: [6, 8, 10] # Keypoints for workouts monitoring, i.e. If you want to consider keypoints for pushups that have mostly values of [6, 8, 10]. +colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization. +analytics_type: "line" # Analytics type i.e "line", "pie", "bar" or "area" charts. By default, "line" analytics will be used for processing. diff --git a/ultralytics/data/annotator.py b/ultralytics/data/annotator.py index 5cb0058dcb..30d02d9d73 100644 --- a/ultralytics/data/annotator.py +++ b/ultralytics/data/annotator.py @@ -21,7 +21,7 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", Examples: >>> from ultralytics.data.annotator import auto_annotate - >>> auto_annotate(data="ultralytics/assets", det_model="yolov8n.pt", sam_model="mobile_sam.pt") + >>> auto_annotate(data="ultralytics/assets", det_model="yolo11n.pt", sam_model="mobile_sam.pt") Notes: - The function creates a new directory for output if not specified. diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py index f18c2d54dc..02b3b87ba8 100644 --- a/ultralytics/data/base.py +++ b/ultralytics/data/base.py @@ -90,13 +90,15 @@ class BaseDataset(Dataset): self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files] self.cache = cache.lower() if isinstance(cache, str) else "ram" if cache is True else None - if (self.cache == "ram" and self.check_cache_ram()) or self.cache == "disk": - if self.cache == "ram" and hyp.deterministic: + if self.cache == "ram" and self.check_cache_ram(): + if hyp.deterministic: LOGGER.warning( "WARNING ⚠️ cache='ram' may produce non-deterministic training results. " "Consider cache='disk' as a deterministic alternative if your disk space allows." ) self.cache_images() + elif self.cache == "disk" and self.check_cache_disk(): + self.cache_images() # Transforms self.transforms = self.build_transforms(hyp=hyp) @@ -206,25 +208,55 @@ class BaseDataset(Dataset): if not f.exists(): np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False) + def check_cache_disk(self, safety_margin=0.5): + """Check image caching requirements vs available disk space.""" + import shutil + + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + n = min(self.ni, 30) # extrapolate from 30 random images + for _ in range(n): + im_file = random.choice(self.im_files) + im = cv2.imread(im_file) + if im is None: + continue + b += im.nbytes + if not os.access(Path(im_file).parent, os.W_OK): + self.cache = None + LOGGER.info(f"{self.prefix}Skipping caching images to disk, directory not writeable ⚠️") + return False + disk_required = b * self.ni / n * (1 + safety_margin) # bytes required to cache dataset to disk + total, used, free = shutil.disk_usage(Path(self.im_files[0]).parent) + if disk_required > free: + self.cache = None + LOGGER.info( + f"{self.prefix}{disk_required / gb:.1f}GB disk space required, " + f"with {int(safety_margin * 100)}% safety margin but only " + f"{free / gb:.1f}/{total / gb:.1f}GB free, not caching images to disk ⚠️" + ) + return False + return True + def check_cache_ram(self, safety_margin=0.5): """Check image caching requirements vs available memory.""" b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes n = min(self.ni, 30) # extrapolate from 30 random images for _ in range(n): im = cv2.imread(random.choice(self.im_files)) # sample image + if im is None: + continue ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio b += im.nbytes * ratio**2 mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM mem = psutil.virtual_memory() - success = mem_required < mem.available # to cache or not to cache, that is the question - if not success: + if mem_required > mem.available: self.cache = None LOGGER.info( f"{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images " f"with {int(safety_margin * 100)}% safety margin but only " f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, not caching images ⚠️" ) - return success + return False + return True def set_rectangle(self): """Sets the shape of bounding boxes for YOLO detections as rectangles.""" diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index 03dbf0ade1..fe1aac10ae 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -1,13 +1,18 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license import json +import random +import shutil from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import cv2 import numpy as np +from PIL import Image -from ultralytics.utils import LOGGER, TQDM +from ultralytics.utils import DATASETS_DIR, LOGGER, NUM_THREADS, TQDM +from ultralytics.utils.downloads import download from ultralytics.utils.files import increment_path @@ -588,15 +593,13 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): - im_dir ├─ 001.jpg - ├─ .. + ├─ ... └─ NNN.jpg - labels ├─ 001.txt - ├─ .. + ├─ ... └─ NNN.txt """ - from tqdm import tqdm - from ultralytics import SAM from ultralytics.data import YOLODataset from ultralytics.utils import LOGGER @@ -610,7 +613,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): LOGGER.info("Detection labels detected, generating segment labels by SAM model!") sam_model = SAM(sam_model) - for label in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"): + for label in TQDM(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"): h, w = label["shape"] boxes = label["bboxes"] if len(boxes) == 0: # skip empty labels @@ -635,3 +638,61 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): with open(txt_file, "a") as f: f.writelines(text + "\n" for text in texts) LOGGER.info(f"Generated segment labels saved in {save_dir}") + + +def create_synthetic_coco_dataset(): + """ + Creates a synthetic COCO dataset with random images based on filenames from label lists. + + This function downloads COCO labels, reads image filenames from label list files, + creates synthetic images for train2017 and val2017 subsets, and organizes + them in the COCO dataset structure. It uses multithreading to generate images efficiently. + + Examples: + >>> from ultralytics.data.converter import create_synthetic_coco_dataset + >>> create_synthetic_coco_dataset() + + Notes: + - Requires internet connection to download label files. + - Generates random RGB images of varying sizes (480x480 to 640x640 pixels). + - Existing test2017 directory is removed as it's not needed. + - Reads image filenames from train2017.txt and val2017.txt files. + """ + + def create_synthetic_image(image_file): + """Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes.""" + if not image_file.exists(): + size = (random.randint(480, 640), random.randint(480, 640)) + Image.new( + "RGB", + size=size, + color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), + ).save(image_file) + + # Download labels + dir = DATASETS_DIR / "coco" + url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/" + label_zip = "coco2017labels-segments.zip" + download([url + label_zip], dir=dir.parent) + + # Create synthetic images + shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) # Remove test2017 directory as not needed + with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: + for subset in ["train2017", "val2017"]: + subset_dir = dir / "images" / subset + subset_dir.mkdir(parents=True, exist_ok=True) + + # Read image filenames from label list file + label_list_file = dir / f"{subset}.txt" + if label_list_file.exists(): + with open(label_list_file) as f: + image_files = [dir / line.strip() for line in f] + + # Submit all tasks + futures = [executor.submit(create_synthetic_image, image_file) for image_file in image_files] + for _ in TQDM(as_completed(futures), total=len(futures), desc=f"Generating images for {subset}"): + pass # The actual work is done in the background + else: + print(f"Warning: Labels file {label_list_file} does not exist. Skipping image creation for {subset}.") + + print("Synthetic COCO dataset created successfully.") diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index 7216fa006a..e9462b342b 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import json from collections import defaultdict from itertools import repeat @@ -483,7 +482,7 @@ class ClassificationDataset: desc = f"{self.prefix}Scanning {self.root}..." path = Path(self.root).with_suffix(".cache") # *.cache file path - with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError): + try: cache = load_dataset_cache_file(path) # attempt to load a *.cache file assert cache["version"] == DATASET_CACHE_VERSION # matches current version assert cache["hash"] == get_hash([x[0] for x in self.samples]) # identical hash @@ -495,24 +494,25 @@ class ClassificationDataset: LOGGER.info("\n".join(cache["msgs"])) # display warnings return samples - # Run scan if *.cache retrieval failed - nf, nc, msgs, samples, x = 0, 0, [], [], {} - with ThreadPool(NUM_THREADS) as pool: - results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix))) - pbar = TQDM(results, desc=desc, total=len(self.samples)) - for sample, nf_f, nc_f, msg in pbar: - if nf_f: - samples.append(sample) - if msg: - msgs.append(msg) - nf += nf_f - nc += nc_f - pbar.desc = f"{desc} {nf} images, {nc} corrupt" - pbar.close() - if msgs: - LOGGER.info("\n".join(msgs)) - x["hash"] = get_hash([x[0] for x in self.samples]) - x["results"] = nf, nc, len(samples), samples - x["msgs"] = msgs # warnings - save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) - return samples + except (FileNotFoundError, AssertionError, AttributeError): + # Run scan if *.cache retrieval failed + nf, nc, msgs, samples, x = 0, 0, [], [], {} + with ThreadPool(NUM_THREADS) as pool: + results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix))) + pbar = TQDM(results, desc=desc, total=len(self.samples)) + for sample, nf_f, nc_f, msg in pbar: + if nf_f: + samples.append(sample) + if msg: + msgs.append(msg) + nf += nf_f + nc += nc_f + pbar.desc = f"{desc} {nf} images, {nc} corrupt" + pbar.close() + if msgs: + LOGGER.info("\n".join(msgs)) + x["hash"] = get_hash([x[0] for x in self.samples]) + x["results"] = nf, nc, len(samples), samples + x["msgs"] = msgs # warnings + save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) + return samples diff --git a/ultralytics/data/explorer/__init__.py b/ultralytics/data/explorer/__init__.py deleted file mode 100644 index ce594dc1fd..0000000000 --- a/ultralytics/data/explorer/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -from .utils import plot_query_result - -__all__ = ["plot_query_result"] diff --git a/ultralytics/data/explorer/explorer.py b/ultralytics/data/explorer/explorer.py deleted file mode 100644 index 0407c1a288..0000000000 --- a/ultralytics/data/explorer/explorer.py +++ /dev/null @@ -1,460 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -from io import BytesIO -from pathlib import Path -from typing import Any, List, Tuple, Union - -import cv2 -import numpy as np -import torch -from matplotlib import pyplot as plt -from PIL import Image -from tqdm import tqdm - -from ultralytics.data.augment import Format -from ultralytics.data.dataset import YOLODataset -from ultralytics.data.utils import check_det_dataset -from ultralytics.models.yolo.model import YOLO -from ultralytics.utils import LOGGER, USER_CONFIG_DIR, IterableSimpleNamespace, checks - -from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch - - -class ExplorerDataset(YOLODataset): - """Extends YOLODataset for advanced data exploration and manipulation in model training workflows.""" - - def __init__(self, *args, data: dict = None, **kwargs) -> None: - """Initializes the ExplorerDataset with the provided data arguments, extending the YOLODataset class.""" - super().__init__(*args, data=data, **kwargs) - - def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]: - """Loads 1 image from dataset index 'i' without any resize ops.""" - im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] - if im is None: # not cached in RAM - if fn.exists(): # load npy - im = np.load(fn) - else: # read image - im = cv2.imread(f) # BGR - if im is None: - raise FileNotFoundError(f"Image Not Found {f}") - h0, w0 = im.shape[:2] # orig hw - return im, (h0, w0), im.shape[:2] - - return self.ims[i], self.im_hw0[i], self.im_hw[i] - - def build_transforms(self, hyp: IterableSimpleNamespace = None): - """Creates transforms for dataset images without resizing.""" - return Format( - bbox_format="xyxy", - normalize=False, - return_mask=self.use_segments, - return_keypoint=self.use_keypoints, - batch_idx=True, - mask_ratio=hyp.mask_ratio, - mask_overlap=hyp.overlap_mask, - ) - - -class Explorer: - """Utility class for image embedding, table creation, and similarity querying using LanceDB and YOLO models.""" - - def __init__( - self, - data: Union[str, Path] = "coco128.yaml", - model: str = "yolov8n.pt", - uri: str = USER_CONFIG_DIR / "explorer", - ) -> None: - """Initializes the Explorer class with dataset path, model, and URI for database connection.""" - # Note duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181 - checks.check_requirements(["lancedb>=0.4.3", "duckdb<=0.9.2"]) - import lancedb - - self.connection = lancedb.connect(uri) - self.table_name = f"{Path(data).name.lower()}_{model.lower()}" - self.sim_idx_base_name = ( - f"{self.table_name}_sim_idx".lower() - ) # Use this name and append thres and top_k to reuse the table - self.model = YOLO(model) - self.data = data # None - self.choice_set = None - - self.table = None - self.progress = 0 - - def create_embeddings_table(self, force: bool = False, split: str = "train") -> None: - """ - Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it - already exists. Pass force=True to overwrite the existing table. - - Args: - force (bool): Whether to overwrite the existing table or not. Defaults to False. - split (str): Split of the dataset to use. Defaults to 'train'. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - ``` - """ - if self.table is not None and not force: - LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.") - return - if self.table_name in self.connection.table_names() and not force: - LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.") - self.table = self.connection.open_table(self.table_name) - self.progress = 1 - return - if self.data is None: - raise ValueError("Data must be provided to create embeddings table") - - data_info = check_det_dataset(self.data) - if split not in data_info: - raise ValueError( - f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}" - ) - - choice_set = data_info[split] - choice_set = choice_set if isinstance(choice_set, list) else [choice_set] - self.choice_set = choice_set - dataset = ExplorerDataset(img_path=choice_set, data=data_info, augment=False, cache=False, task=self.model.task) - - # Create the table schema - batch = dataset[0] - vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0] - table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite") - table.add( - self._yield_batches( - dataset, - data_info, - self.model, - exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"], - ) - ) - - self.table = table - - def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]): - """Generates batches of data for embedding, excluding specified keys.""" - for i in tqdm(range(len(dataset))): - self.progress = float(i + 1) / len(dataset) - batch = dataset[i] - for k in exclude_keys: - batch.pop(k, None) - batch = sanitize_batch(batch, data_info) - batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist() - yield [batch] - - def query( - self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25 - ) -> Any: # pyarrow.Table - """ - Query the table for similar images. Accepts a single image or a list of images. - - Args: - imgs (str or list): Path to the image or a list of paths to the images. - limit (int): Number of results to return. - - Returns: - (pyarrow.Table): An arrow table containing the results. Supports converting to: - - pandas dataframe: `result.to_pandas()` - - dict of lists: `result.to_pydict()` - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - similar = exp.query(img="https://ultralytics.com/images/zidane.jpg") - ``` - """ - if self.table is None: - raise ValueError("Table is not created. Please create the table first.") - if isinstance(imgs, str): - imgs = [imgs] - assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}" - embeds = self.model.embed(imgs) - # Get avg if multiple images are passed (len > 1) - embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy() - return self.table.search(embeds).limit(limit).to_arrow() - - def sql_query( - self, query: str, return_type: str = "pandas" - ) -> Union[Any, None]: # pandas.DataFrame or pyarrow.Table - """ - Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown. - - Args: - query (str): SQL query to run. - return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'. - - Returns: - (pyarrow.Table): An arrow table containing the results. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'" - result = exp.sql_query(query) - ``` - """ - assert return_type in { - "pandas", - "arrow", - }, f"Return type should be either `pandas` or `arrow`, but got {return_type}" - import duckdb - - if self.table is None: - raise ValueError("Table is not created. Please create the table first.") - - # Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this. - table = self.table.to_arrow() # noqa NOTE: Don't comment this. This line is used by DuckDB - if not query.startswith("SELECT") and not query.startswith("WHERE"): - raise ValueError( - f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE " - f"clause. found {query}" - ) - if query.startswith("WHERE"): - query = f"SELECT * FROM 'table' {query}" - LOGGER.info(f"Running query: {query}") - - rs = duckdb.sql(query) - if return_type == "arrow": - return rs.arrow() - elif return_type == "pandas": - return rs.df() - - def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image: - """ - Plot the results of a SQL-Like query on the table. - - Args: - query (str): SQL query to run. - labels (bool): Whether to plot the labels or not. - - Returns: - (PIL.Image): Image containing the plot. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'" - result = exp.plot_sql_query(query) - ``` - """ - result = self.sql_query(query, return_type="arrow") - if len(result) == 0: - LOGGER.info("No results found.") - return None - img = plot_query_result(result, plot_labels=labels) - return Image.fromarray(img) - - def get_similar( - self, - img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, - idx: Union[int, List[int]] = None, - limit: int = 25, - return_type: str = "pandas", - ) -> Any: # pandas.DataFrame or pyarrow.Table - """ - Query the table for similar images. Accepts a single image or a list of images. - - Args: - img (str or list): Path to the image or a list of paths to the images. - idx (int or list): Index of the image in the table or a list of indexes. - limit (int): Number of results to return. Defaults to 25. - return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'. - - Returns: - (pandas.DataFrame): A dataframe containing the results. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - similar = exp.get_similar(img="https://ultralytics.com/images/zidane.jpg") - ``` - """ - assert return_type in {"pandas", "arrow"}, f"Return type should be `pandas` or `arrow`, but got {return_type}" - img = self._check_imgs_or_idxs(img, idx) - similar = self.query(img, limit=limit) - - if return_type == "arrow": - return similar - elif return_type == "pandas": - return similar.to_pandas() - - def plot_similar( - self, - img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, - idx: Union[int, List[int]] = None, - limit: int = 25, - labels: bool = True, - ) -> Image.Image: - """ - Plot the similar images. Accepts images or indexes. - - Args: - img (str or list): Path to the image or a list of paths to the images. - idx (int or list): Index of the image in the table or a list of indexes. - labels (bool): Whether to plot the labels or not. - limit (int): Number of results to return. Defaults to 25. - - Returns: - (PIL.Image): Image containing the plot. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - similar = exp.plot_similar(img="https://ultralytics.com/images/zidane.jpg") - ``` - """ - similar = self.get_similar(img, idx, limit, return_type="arrow") - if len(similar) == 0: - LOGGER.info("No results found.") - return None - img = plot_query_result(similar, plot_labels=labels) - return Image.fromarray(img) - - def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Any: # pd.DataFrame - """ - Calculate the similarity index of all the images in the table. Here, the index will contain the data points that - are max_dist or closer to the image in the embedding space at a given index. - - Args: - max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2. - top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit. - vector search. Defaults: None. - force (bool): Whether to overwrite the existing similarity index or not. Defaults to True. - - Returns: - (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, - and columns include indices of similar images and their respective distances. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - sim_idx = exp.similarity_index() - ``` - """ - if self.table is None: - raise ValueError("Table is not created. Please create the table first.") - sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower() - if sim_idx_table_name in self.connection.table_names() and not force: - LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.") - return self.connection.open_table(sim_idx_table_name).to_pandas() - - if top_k and not (1.0 >= top_k >= 0.0): - raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}") - if max_dist < 0.0: - raise ValueError(f"max_dist must be greater than 0. Got {max_dist}") - - top_k = int(top_k * len(self.table)) if top_k else len(self.table) - top_k = max(top_k, 1) - features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict() - im_files = features["im_file"] - embeddings = features["vector"] - - sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite") - - def _yield_sim_idx(): - """Generates a dataframe with similarity indices and distances for images.""" - for i in tqdm(range(len(embeddings))): - sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}") - yield [ - { - "idx": i, - "im_file": im_files[i], - "count": len(sim_idx), - "sim_im_files": sim_idx["im_file"].tolist(), - } - ] - - sim_table.add(_yield_sim_idx()) - self.sim_index = sim_table - return sim_table.to_pandas() - - def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image: - """ - Plot the similarity index of all the images in the table. Here, the index will contain the data points that are - max_dist or closer to the image in the embedding space at a given index. - - Args: - max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2. - top_k (float): Percentage of closest data points to consider when counting. Used to apply limit when - running vector search. Defaults to 0.01. - force (bool): Whether to overwrite the existing similarity index or not. Defaults to True. - - Returns: - (PIL.Image): Image containing the plot. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - - similarity_idx_plot = exp.plot_similarity_index() - similarity_idx_plot.show() # view image preview - similarity_idx_plot.save("path/to/save/similarity_index_plot.png") # save contents to file - ``` - """ - sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force) - sim_count = sim_idx["count"].tolist() - sim_count = np.array(sim_count) - - indices = np.arange(len(sim_count)) - - # Create the bar plot - plt.bar(indices, sim_count) - - # Customize the plot (optional) - plt.xlabel("data idx") - plt.ylabel("Count") - plt.title("Similarity Count") - buffer = BytesIO() - plt.savefig(buffer, format="png") - buffer.seek(0) - - # Use Pillow to open the image from the buffer - return Image.fromarray(np.array(Image.open(buffer))) - - def _check_imgs_or_idxs( - self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]] - ) -> List[np.ndarray]: - """Determines whether to fetch images or indexes based on provided arguments and returns image paths.""" - if img is None and idx is None: - raise ValueError("Either img or idx must be provided.") - if img is not None and idx is not None: - raise ValueError("Only one of img or idx must be provided.") - if idx is not None: - idx = idx if isinstance(idx, list) else [idx] - img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"] - - return img if isinstance(img, list) else [img] - - def ask_ai(self, query): - """ - Ask AI a question. - - Args: - query (str): Question to ask. - - Returns: - (pandas.DataFrame): A dataframe containing filtered results to the SQL query. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - answer = exp.ask_ai("Show images with 1 person and 2 dogs") - ``` - """ - result = prompt_sql_query(query) - try: - return self.sql_query(result) - except Exception as e: - LOGGER.error("AI generated query is not valid. Please try again with a different prompt") - LOGGER.error(e) - return None diff --git a/ultralytics/data/explorer/gui/__init__.py b/ultralytics/data/explorer/gui/__init__.py deleted file mode 100644 index 9e68dc1224..0000000000 --- a/ultralytics/data/explorer/gui/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/data/explorer/gui/dash.py b/ultralytics/data/explorer/gui/dash.py deleted file mode 100644 index 81f1f62a8a..0000000000 --- a/ultralytics/data/explorer/gui/dash.py +++ /dev/null @@ -1,282 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -import sys -import time -from threading import Thread - -from ultralytics import Explorer -from ultralytics.utils import ROOT, SETTINGS -from ultralytics.utils.checks import check_requirements - -check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3")) - -import streamlit as st -from streamlit_select import image_select - - -def _get_explorer(): - """Initializes and returns an instance of the Explorer class.""" - exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model")) - thread = Thread( - target=exp.create_embeddings_table, - kwargs={"force": st.session_state.get("force_recreate_embeddings"), "split": st.session_state.get("split")}, - ) - thread.start() - progress_bar = st.progress(0, text="Creating embeddings table...") - while exp.progress < 1: - time.sleep(0.1) - progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%") - thread.join() - st.session_state["explorer"] = exp - progress_bar.empty() - - -def init_explorer_form(data=None, model=None): - """Initializes an Explorer instance and creates embeddings table with progress tracking.""" - if data is None: - datasets = ROOT / "cfg" / "datasets" - ds = [d.name for d in datasets.glob("*.yaml")] - else: - ds = [data] - - if model is None: - models = [ - "yolov8n.pt", - "yolov8s.pt", - "yolov8m.pt", - "yolov8l.pt", - "yolov8x.pt", - "yolov8n-seg.pt", - "yolov8s-seg.pt", - "yolov8m-seg.pt", - "yolov8l-seg.pt", - "yolov8x-seg.pt", - "yolov8n-pose.pt", - "yolov8s-pose.pt", - "yolov8m-pose.pt", - "yolov8l-pose.pt", - "yolov8x-pose.pt", - ] - else: - models = [model] - - splits = ["train", "val", "test"] - - with st.form(key="explorer_init_form"): - col1, col2, col3 = st.columns(3) - with col1: - st.selectbox("Select dataset", ds, key="dataset") - with col2: - st.selectbox("Select model", models, key="model") - with col3: - st.selectbox("Select split", splits, key="split") - st.checkbox("Force recreate embeddings", key="force_recreate_embeddings") - - st.form_submit_button("Explore", on_click=_get_explorer) - - -def query_form(): - """Sets up a form in Streamlit to initialize Explorer with dataset and model selection.""" - with st.form("query_form"): - col1, col2 = st.columns([0.8, 0.2]) - with col1: - st.text_input( - "Query", - "WHERE labels LIKE '%person%' AND labels LIKE '%dog%'", - label_visibility="collapsed", - key="query", - ) - with col2: - st.form_submit_button("Query", on_click=run_sql_query) - - -def ai_query_form(): - """Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection.""" - with st.form("ai_query_form"): - col1, col2 = st.columns([0.8, 0.2]) - with col1: - st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query") - with col2: - st.form_submit_button("Ask AI", on_click=run_ai_query) - - -def find_similar_imgs(imgs): - """Initializes a Streamlit form for AI-based image querying with custom input.""" - exp = st.session_state["explorer"] - similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow") - paths = similar.to_pydict()["im_file"] - st.session_state["imgs"] = paths - st.session_state["res"] = similar - - -def similarity_form(selected_imgs): - """Initializes a form for AI-based image querying with custom input in Streamlit.""" - st.write("Similarity Search") - with st.form("similarity_form"): - subcol1, subcol2 = st.columns([1, 1]) - with subcol1: - st.number_input( - "limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit" - ) - - with subcol2: - disabled = not len(selected_imgs) - st.write("Selected: ", len(selected_imgs)) - st.form_submit_button( - "Search", - disabled=disabled, - on_click=find_similar_imgs, - args=(selected_imgs,), - ) - if disabled: - st.error("Select at least one image to search.") - - -# def persist_reset_form(): -# with st.form("persist_reset"): -# col1, col2 = st.columns([1, 1]) -# with col1: -# st.form_submit_button("Reset", on_click=reset) -# -# with col2: -# st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True)) - - -def run_sql_query(): - """Executes an SQL query and returns the results.""" - st.session_state["error"] = None - query = st.session_state.get("query") - if query.rstrip().lstrip(): - exp = st.session_state["explorer"] - res = exp.sql_query(query, return_type="arrow") - st.session_state["imgs"] = res.to_pydict()["im_file"] - st.session_state["res"] = res - - -def run_ai_query(): - """Execute SQL query and update session state with query results.""" - if not SETTINGS["openai_api_key"]: - st.session_state["error"] = ( - 'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."' - ) - return - import pandas # scope for faster 'import ultralytics' - - st.session_state["error"] = None - query = st.session_state.get("ai_query") - if query.rstrip().lstrip(): - exp = st.session_state["explorer"] - res = exp.ask_ai(query) - if not isinstance(res, pandas.DataFrame) or res.empty: - st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it." - return - st.session_state["imgs"] = res["im_file"].to_list() - st.session_state["res"] = res - - -def reset_explorer(): - """Resets the explorer to its initial state by clearing session variables.""" - st.session_state["explorer"] = None - st.session_state["imgs"] = None - st.session_state["error"] = None - - -def utralytics_explorer_docs_callback(): - """Resets the explorer to its initial state by clearing session variables.""" - with st.container(border=True): - st.image( - "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg", - width=100, - ) - st.markdown( - "

This demo is built using Ultralytics Explorer API. Visit API docs to try examples & learn more

", - unsafe_allow_html=True, - help=None, - ) - st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/") - - -def layout(data=None, model=None): - """Resets explorer session variables and provides documentation with a link to API docs.""" - st.set_page_config(layout="wide", initial_sidebar_state="collapsed") - st.markdown("

Ultralytics Explorer Demo

", unsafe_allow_html=True) - - if st.session_state.get("explorer") is None: - init_explorer_form(data, model) - return - - st.button(":arrow_backward: Select Dataset", on_click=reset_explorer) - exp = st.session_state.get("explorer") - col1, col2 = st.columns([0.75, 0.25], gap="small") - imgs = [] - if st.session_state.get("error"): - st.error(st.session_state["error"]) - elif st.session_state.get("imgs"): - imgs = st.session_state.get("imgs") - else: - imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"] - st.session_state["res"] = exp.table.to_arrow() - total_imgs, selected_imgs = len(imgs), [] - with col1: - subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5) - with subcol1: - st.write("Max Images Displayed:") - with subcol2: - num = st.number_input( - "Max Images Displayed", - min_value=0, - max_value=total_imgs, - value=min(500, total_imgs), - key="num_imgs_displayed", - label_visibility="collapsed", - ) - with subcol3: - st.write("Start Index:") - with subcol4: - start_idx = st.number_input( - "Start Index", - min_value=0, - max_value=total_imgs, - value=0, - key="start_index", - label_visibility="collapsed", - ) - with subcol5: - reset = st.button("Reset", use_container_width=False, key="reset") - if reset: - st.session_state["imgs"] = None - st.experimental_rerun() - - query_form() - ai_query_form() - if total_imgs: - labels, boxes, masks, kpts, classes = None, None, None, None, None - task = exp.model.task - if st.session_state.get("display_labels"): - labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num] - boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num] - masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num] - kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num] - classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num] - imgs_displayed = imgs[start_idx : start_idx + num] - selected_imgs = image_select( - f"Total samples: {total_imgs}", - images=imgs_displayed, - use_container_width=False, - # indices=[i for i in range(num)] if select_all else None, - labels=labels, - classes=classes, - bboxes=boxes, - masks=masks if task == "segment" else None, - kpts=kpts if task == "pose" else None, - ) - - with col2: - similarity_form(selected_imgs) - st.checkbox("Labels", value=False, key="display_labels") - utralytics_explorer_docs_callback() - - -if __name__ == "__main__": - kwargs = dict(zip(sys.argv[1::2], sys.argv[2::2])) - layout(**kwargs) diff --git a/ultralytics/data/explorer/utils.py b/ultralytics/data/explorer/utils.py deleted file mode 100644 index 76f2557275..0000000000 --- a/ultralytics/data/explorer/utils.py +++ /dev/null @@ -1,167 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -import getpass -from typing import List - -import cv2 -import numpy as np - -from ultralytics.data.augment import LetterBox -from ultralytics.utils import LOGGER as logger -from ultralytics.utils import SETTINGS -from ultralytics.utils.checks import check_requirements -from ultralytics.utils.ops import xyxy2xywh -from ultralytics.utils.plotting import plot_images - - -def get_table_schema(vector_size): - """Extracts and returns the schema of a database table.""" - from lancedb.pydantic import LanceModel, Vector - - class Schema(LanceModel): - im_file: str - labels: List[str] - cls: List[int] - bboxes: List[List[float]] - masks: List[List[List[int]]] - keypoints: List[List[List[float]]] - vector: Vector(vector_size) - - return Schema - - -def get_sim_index_schema(): - """Returns a LanceModel schema for a database table with specified vector size.""" - from lancedb.pydantic import LanceModel - - class Schema(LanceModel): - idx: int - im_file: str - count: int - sim_im_files: List[str] - - return Schema - - -def sanitize_batch(batch, dataset_info): - """Sanitizes input batch for inference, ensuring correct format and dimensions.""" - batch["cls"] = batch["cls"].flatten().int().tolist() - box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1]) - batch["bboxes"] = [box for box, _ in box_cls_pair] - batch["cls"] = [cls for _, cls in box_cls_pair] - batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]] - batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]] - batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]] - return batch - - -def plot_query_result(similar_set, plot_labels=True): - """ - Plot images from the similar set. - - Args: - similar_set (list): Pyarrow or pandas object containing the similar data points - plot_labels (bool): Whether to plot labels or not - """ - import pandas # scope for faster 'import ultralytics' - - similar_set = ( - similar_set.to_dict(orient="list") if isinstance(similar_set, pandas.DataFrame) else similar_set.to_pydict() - ) - empty_masks = [[[]]] - empty_boxes = [[]] - images = similar_set.get("im_file", []) - bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else [] - masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else [] - kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else [] - cls = similar_set.get("cls", []) - - plot_size = 640 - imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], [] - for i, imf in enumerate(images): - im = cv2.imread(imf) - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - h, w = im.shape[:2] - r = min(plot_size / h, plot_size / w) - imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1)) - if plot_labels: - if len(bboxes) > i and len(bboxes[i]) > 0: - box = np.array(bboxes[i], dtype=np.float32) - box[:, [0, 2]] *= r - box[:, [1, 3]] *= r - plot_boxes.append(box) - if len(masks) > i and len(masks[i]) > 0: - mask = np.array(masks[i], dtype=np.uint8)[0] - plot_masks.append(LetterBox(plot_size, center=False)(image=mask)) - if len(kpts) > i and kpts[i] is not None: - kpt = np.array(kpts[i], dtype=np.float32) - kpt[:, :, :2] *= r - plot_kpts.append(kpt) - batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i) - imgs = np.stack(imgs, axis=0) - masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8) - kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32) - boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32) - batch_idx = np.concatenate(batch_idx, axis=0) - cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0) - - return plot_images( - imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False - ) - - -def prompt_sql_query(query): - """Plots images with optional labels from a similar data set.""" - check_requirements("openai>=1.6.1") - from openai import OpenAI - - if not SETTINGS["openai_api_key"]: - logger.warning("OpenAI API key not found in settings. Please enter your API key below.") - openai_api_key = getpass.getpass("OpenAI API key: ") - SETTINGS.update({"openai_api_key": openai_api_key}) - openai = OpenAI(api_key=SETTINGS["openai_api_key"]) - - messages = [ - { - "role": "system", - "content": """ - You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on - the following schema and a user request. You only need to output the format with fixed selection - statement that selects everything from "'table'", like `SELECT * from 'table'` - - Schema: - im_file: string not null - labels: list not null - child 0, item: string - cls: list not null - child 0, item: int64 - bboxes: list> not null - child 0, item: list - child 0, item: double - masks: list>> not null - child 0, item: list> - child 0, item: list - child 0, item: int64 - keypoints: list>> not null - child 0, item: list> - child 0, item: list - child 0, item: double - vector: fixed_size_list[256] not null - child 0, item: float - - Some details about the schema: - - the "labels" column contains the string values like 'person' and 'dog' for the respective objects - in each image - - the "cls" column contains the integer values on these classes that map them the labels - - Example of a correct query: - request - Get all data points that contain 2 or more people and at least one dog - correct query- - SELECT * FROM 'table' WHERE ARRAY_LENGTH(cls) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1; - """, - }, - {"role": "user", "content": f"{query}"}, - ] - - response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages) - return response.choices[0].message.content diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py index e91f2082c5..ead7d6138a 100644 --- a/ultralytics/data/loaders.py +++ b/ultralytics/data/loaders.py @@ -18,11 +18,29 @@ from PIL import Image from ultralytics.data.utils import FORMATS_HELP_MSG, IMG_FORMATS, VID_FORMATS from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, ops from ultralytics.utils.checks import check_requirements +from ultralytics.utils.patches import imread @dataclass class SourceTypes: - """Class to represent various types of input sources for predictions.""" + """ + Class to represent various types of input sources for predictions. + + This class uses dataclass to define boolean flags for different types of input sources that can be used for + making predictions with YOLO models. + + Attributes: + stream (bool): Flag indicating if the input source is a video stream. + screenshot (bool): Flag indicating if the input source is a screenshot. + from_img (bool): Flag indicating if the input source is an image file. + + Examples: + >>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False) + >>> print(source_types.stream) + True + >>> print(source_types.from_img) + False + """ stream: bool = False screenshot: bool = False @@ -32,38 +50,47 @@ class SourceTypes: class LoadStreams: """ - Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams. + Stream Loader for various types of video streams. + + Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video + streams simultaneously, making it suitable for real-time video analysis tasks. Attributes: - sources (str): The source input paths or URLs for the video streams. - vid_stride (int): Video frame-rate stride, defaults to 1. - buffer (bool): Whether to buffer input streams, defaults to False. + sources (List[str]): The source input paths or URLs for the video streams. + vid_stride (int): Video frame-rate stride. + buffer (bool): Whether to buffer input streams. running (bool): Flag to indicate if the streaming thread is running. mode (str): Set to 'stream' indicating real-time capture. - imgs (list): List of image frames for each stream. - fps (list): List of FPS for each stream. - frames (list): List of total frames for each stream. - threads (list): List of threads for each stream. - shape (list): List of shapes for each stream. - caps (list): List of cv2.VideoCapture objects for each stream. + imgs (List[List[np.ndarray]]): List of image frames for each stream. + fps (List[float]): List of FPS for each stream. + frames (List[int]): List of total frames for each stream. + threads (List[Thread]): List of threads for each stream. + shape (List[Tuple[int, int, int]]): List of shapes for each stream. + caps (List[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream. bs (int): Batch size for processing. Methods: - __init__: Initialize the stream loader. update: Read stream frames in daemon thread. close: Close stream loader and release resources. __iter__: Returns an iterator object for the class. __next__: Returns source paths, transformed, and original images for processing. __len__: Return the length of the sources object. - Example: - ```bash - yolo predict source='rtsp://example.com/media.mp4' - ``` + Examples: + >>> stream_loader = LoadStreams("rtsp://example.com/stream1.mp4") + >>> for sources, imgs, _ in stream_loader: + ... # Process the images + ... pass + >>> stream_loader.close() + + Notes: + - The class uses threading to efficiently load frames from multiple streams simultaneously. + - It automatically handles YouTube links, converting them to the best available stream URL. + - The class implements a buffer system to manage frame storage and retrieval. """ def __init__(self, sources="file.streams", vid_stride=1, buffer=False): - """Initialize instance variables and check for consistent input stream shapes.""" + """Initialize stream loader for multiple video sources, supporting various stream types.""" torch.backends.cudnn.benchmark = True # faster for fixed-size inference self.buffer = buffer # buffer input streams self.running = True # running flag for Thread @@ -114,7 +141,7 @@ class LoadStreams: LOGGER.info("") # newline def update(self, i, cap, stream): - """Read stream `i` frames in daemon thread.""" + """Read stream frames in daemon thread and update image buffer.""" n, f = 0, self.frames[i] # frame number, frame array while self.running and cap.isOpened() and n < (f - 1): if len(self.imgs[i]) < 30: # keep a <=30-image buffer @@ -134,7 +161,7 @@ class LoadStreams: time.sleep(0.01) # wait until the buffer is empty def close(self): - """Close stream loader and release resources.""" + """Terminates stream loader, stops threads, and releases video capture resources.""" self.running = False # stop flag for Thread for thread in self.threads: if thread.is_alive(): @@ -152,7 +179,7 @@ class LoadStreams: return self def __next__(self): - """Returns source paths, transformed and original images for processing.""" + """Returns the next batch of frames from multiple video streams for processing.""" self.count += 1 images = [] @@ -179,16 +206,16 @@ class LoadStreams: return self.sources, images, [""] * self.bs def __len__(self): - """Return the length of the sources object.""" + """Return the number of video streams in the LoadStreams object.""" return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years class LoadScreenshots: """ - YOLOv8 screenshot dataloader. + Ultralytics screenshot dataloader for capturing and processing screen images. - This class manages the loading of screenshot images for processing with YOLOv8. - Suitable for use with `yolo predict source=screen`. + This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with + `yolo predict source=screen`. Attributes: source (str): The source input indicating which screen to capture. @@ -201,15 +228,21 @@ class LoadScreenshots: frame (int): Counter for captured frames. sct (mss.mss): Screen capture object from `mss` library. bs (int): Batch size, set to 1. - monitor (dict): Monitor configuration details. + fps (int): Frames per second, set to 30. + monitor (Dict[str, int]): Monitor configuration details. Methods: __iter__: Returns an iterator object. __next__: Captures the next screenshot and returns it. + + Examples: + >>> loader = LoadScreenshots("0 100 100 640 480") # screen 0, top-left (100,100), 640x480 + >>> for source, im, im0s, vid_cap, s in loader: + ... print(f"Captured frame: {im.shape}") """ def __init__(self, source): - """Source = [screen_number left top width height] (pixels).""" + """Initialize screenshot capture with specified screen and region parameters.""" check_requirements("mss") import mss # noqa @@ -236,11 +269,11 @@ class LoadScreenshots: self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} def __iter__(self): - """Returns an iterator of the object.""" + """Yields the next screenshot image from the specified screen or region for processing.""" return self def __next__(self): - """Screen capture with 'mss' to get raw pixels from the screen as np array.""" + """Captures and returns the next screenshot as a numpy array using the mss library.""" im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " @@ -250,29 +283,45 @@ class LoadScreenshots: class LoadImagesAndVideos: """ - YOLOv8 image/video dataloader. + A class for loading and processing images and videos for YOLO object detection. - This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from - various formats, including single image files, video files, and lists of image and video paths. + This class manages the loading and pre-processing of image and video data from various sources, including + single image files, video files, and lists of image and video paths. Attributes: - files (list): List of image and video file paths. + files (List[str]): List of image and video file paths. nf (int): Total number of files (images and videos). - video_flag (list): Flags indicating whether a file is a video (True) or an image (False). + video_flag (List[bool]): Flags indicating whether a file is a video (True) or an image (False). mode (str): Current mode, 'image' or 'video'. - vid_stride (int): Stride for video frame-rate, defaults to 1. - bs (int): Batch size, set to 1 for this class. + vid_stride (int): Stride for video frame-rate. + bs (int): Batch size. cap (cv2.VideoCapture): Video capture object for OpenCV. frame (int): Frame counter for video. frames (int): Total number of frames in the video. - count (int): Counter for iteration, initialized at 0 during `__iter__()`. + count (int): Counter for iteration, initialized at 0 during __iter__(). + ni (int): Number of images. Methods: - _new_video(path): Create a new cv2.VideoCapture object for a given video path. + __init__: Initialize the LoadImagesAndVideos object. + __iter__: Returns an iterator object for VideoStream or ImageFolder. + __next__: Returns the next batch of images or video frames along with their paths and metadata. + _new_video: Creates a new video capture object for the given path. + __len__: Returns the number of batches in the object. + + Examples: + >>> loader = LoadImagesAndVideos("path/to/data", batch=32, vid_stride=1) + >>> for paths, imgs, info in loader: + ... # Process batch of images or video frames + ... pass + + Notes: + - Supports various image formats including HEIC. + - Handles both local files and directories. + - Can read from a text file containing paths to images and videos. """ def __init__(self, path, batch=1, vid_stride=1): - """Initialize the Dataloader and raise FileNotFoundError if file not found.""" + """Initialize dataloader for images and videos, supporting various input formats.""" parent = None if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line parent = Path(path).parent @@ -316,12 +365,12 @@ class LoadImagesAndVideos: raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}") def __iter__(self): - """Returns an iterator object for VideoStream or ImageFolder.""" + """Iterates through image/video files, yielding source paths, images, and metadata.""" self.count = 0 return self def __next__(self): - """Returns the next batch of images or video frames along with their paths and metadata.""" + """Returns the next batch of images or video frames with their paths and metadata.""" paths, imgs, info = [], [], [] while len(imgs) < self.bs: if self.count >= self.nf: # end of file list @@ -336,6 +385,7 @@ class LoadImagesAndVideos: if not self.cap or not self.cap.isOpened(): self._new_video(path) + success = False for _ in range(self.vid_stride): success = self.cap.grab() if not success: @@ -359,8 +409,19 @@ class LoadImagesAndVideos: if self.count < self.nf: self._new_video(self.files[self.count]) else: + # Handle image files (including HEIC) self.mode = "image" - im0 = cv2.imread(path) # BGR + if path.split(".")[-1].lower() == "heic": + # Load HEIC image using Pillow with pillow-heif + check_requirements("pillow-heif") + + from pillow_heif import register_heif_opener + + register_heif_opener() # Register HEIF opener with Pillow + with Image.open(path) as img: + im0 = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # convert image to BGR nparray + else: + im0 = imread(path) # BGR if im0 is None: LOGGER.warning(f"WARNING ⚠️ Image Read Error {path}") else: @@ -374,7 +435,7 @@ class LoadImagesAndVideos: return paths, imgs, info def _new_video(self, path): - """Creates a new video capture object for the given path.""" + """Creates a new video capture object for the given path and initializes video-related attributes.""" self.frame = 0 self.cap = cv2.VideoCapture(path) self.fps = int(self.cap.get(cv2.CAP_PROP_FPS)) @@ -383,40 +444,50 @@ class LoadImagesAndVideos: self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) def __len__(self): - """Returns the number of batches in the object.""" - return math.ceil(self.nf / self.bs) # number of files + """Returns the number of files (images and videos) in the dataset.""" + return math.ceil(self.nf / self.bs) # number of batches class LoadPilAndNumpy: """ Load images from PIL and Numpy arrays for batch processing. - This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats. - It performs basic validation and format conversion to ensure that the images are in the required format for - downstream processing. + This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic + validation and format conversion to ensure that the images are in the required format for downstream processing. Attributes: - paths (list): List of image paths or autogenerated filenames. - im0 (list): List of images stored as Numpy arrays. - mode (str): Type of data being processed, defaults to 'image'. + paths (List[str]): List of image paths or autogenerated filenames. + im0 (List[np.ndarray]): List of images stored as Numpy arrays. + mode (str): Type of data being processed, set to 'image'. bs (int): Batch size, equivalent to the length of `im0`. Methods: - _single_check(im): Validate and format a single image to a Numpy array. + _single_check: Validate and format a single image to a Numpy array. + + Examples: + >>> from PIL import Image + >>> import numpy as np + >>> pil_img = Image.new("RGB", (100, 100)) + >>> np_img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + >>> loader = LoadPilAndNumpy([pil_img, np_img]) + >>> paths, images, _ = next(iter(loader)) + >>> print(f"Loaded {len(images)} images") + Loaded 2 images """ def __init__(self, im0): - """Initialize PIL and Numpy Dataloader.""" + """Initializes a loader for PIL and Numpy images, converting inputs to a standardized format.""" if not isinstance(im0, list): im0 = [im0] - self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)] + # use `image{i}.jpg` when Image.filename returns an empty path. + self.paths = [getattr(im, "filename", "") or f"image{i}.jpg" for i, im in enumerate(im0)] self.im0 = [self._single_check(im) for im in im0] self.mode = "image" self.bs = len(self.im0) @staticmethod def _single_check(im): - """Validate and format an image to numpy array.""" + """Validate and format an image to numpy array, ensuring RGB order and contiguous memory.""" assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}" if isinstance(im, Image.Image): if im.mode != "RGB": @@ -426,41 +497,48 @@ class LoadPilAndNumpy: return im def __len__(self): - """Returns the length of the 'im0' attribute.""" + """Returns the length of the 'im0' attribute, representing the number of loaded images.""" return len(self.im0) def __next__(self): - """Returns batch paths, images, processed images, None, ''.""" + """Returns the next batch of images, paths, and metadata for processing.""" if self.count == 1: # loop only once as it's batch inference raise StopIteration self.count += 1 return self.paths, self.im0, [""] * self.bs def __iter__(self): - """Enables iteration for class LoadPilAndNumpy.""" + """Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing.""" self.count = 0 return self class LoadTensor: """ - Load images from torch.Tensor data. + A class for loading and processing tensor data for object detection tasks. - This class manages the loading and pre-processing of image data from PyTorch tensors for further processing. + This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for + further processing in object detection pipelines. Attributes: - im0 (torch.Tensor): The input tensor containing the image(s). + im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W). bs (int): Batch size, inferred from the shape of `im0`. - mode (str): Current mode, set to 'image'. - paths (list): List of image paths or filenames. - count (int): Counter for iteration, initialized at 0 during `__iter__()`. + mode (str): Current processing mode, set to 'image'. + paths (List[str]): List of image paths or auto-generated filenames. Methods: - _single_check(im, stride): Validate and possibly modify the input tensor. + _single_check: Validates and formats an input tensor. + + Examples: + >>> import torch + >>> tensor = torch.rand(1, 3, 640, 640) + >>> loader = LoadTensor(tensor) + >>> paths, images, info = next(iter(loader)) + >>> print(f"Processed {len(images)} images") """ def __init__(self, im0) -> None: - """Initialize Tensor Dataloader.""" + """Initialize LoadTensor object for processing torch.Tensor image data.""" self.im0 = self._single_check(im0) self.bs = self.im0.shape[0] self.mode = "image" @@ -468,7 +546,7 @@ class LoadTensor: @staticmethod def _single_check(im, stride=32): - """Validate and format an image to torch.Tensor.""" + """Validates and formats a single image tensor, ensuring correct shape and normalization.""" s = ( f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) " f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible." @@ -490,24 +568,24 @@ class LoadTensor: return im def __iter__(self): - """Returns an iterator object.""" + """Yields an iterator object for iterating through tensor image data.""" self.count = 0 return self def __next__(self): - """Return next item in the iterator.""" + """Yields the next batch of tensor images and metadata for processing.""" if self.count == 1: raise StopIteration self.count += 1 return self.paths, self.im0, [""] * self.bs def __len__(self): - """Returns the batch size.""" + """Returns the batch size of the tensor input.""" return self.bs def autocast_list(source): - """Merges a list of source of different types into a list of numpy arrays or PIL images.""" + """Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction.""" files = [] for im in source: if isinstance(im, (str, Path)): # filename or uri @@ -527,21 +605,24 @@ def get_best_youtube_url(url, method="pytube"): """ Retrieves the URL of the best quality MP4 video stream from a given YouTube video. - This function uses the specified method to extract the video info from YouTube. It supports the following methods: - - "pytube": Uses the pytube library to fetch the video streams. - - "pafy": Uses the pafy library to fetch the video streams. - - "yt-dlp": Uses the yt-dlp library to fetch the video streams. - - The function then finds the highest quality MP4 format that has a video codec but no audio codec, and returns the - URL of this video stream. - Args: url (str): The URL of the YouTube video. - method (str): The method to use for extracting video info. Default is "pytube". Other options are "pafy" and - "yt-dlp". + method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp". + Defaults to "pytube". Returns: - (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found. + (str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found. + + Examples: + >>> url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + >>> best_url = get_best_youtube_url(url) + >>> print(best_url) + https://rr4---sn-q4flrnek.googlevideo.com/videoplayback?expire=... + + Notes: + - Requires additional libraries based on the chosen method: pytubefix, pafy, or yt-dlp. + - The function prioritizes streams with at least 1080p resolution when available. + - For the "yt-dlp" method, it looks for formats with video codec, no audio, and *.mp4 extension. """ if method == "pytube": # Switched from pytube to pytubefix to resolve https://github.com/pytube/pytube/issues/1954 diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index e82d8bb759..3748ac2db5 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import hashlib import json import os @@ -36,7 +35,7 @@ from ultralytics.utils.downloads import download, safe_download, unzip_file from ultralytics.utils.ops import segments2boxes HELP_URL = "See https://docs.ultralytics.com/datasets for dataset formatting guidance." -IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"} # image suffixes +IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm", "heic"} # image suffixes VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes PIN_MEMORY = str(os.getenv("PIN_MEMORY", True)).lower() == "true" # global pin_memory for dataloaders FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}" @@ -60,12 +59,14 @@ def exif_size(img: Image.Image): """Returns exif-corrected PIL size.""" s = img.size # (width, height) if img.format == "JPEG": # only support JPEG images - with contextlib.suppress(Exception): + try: exif = img.getexif() if exif: rotation = exif.get(274, None) # the EXIF key for the orientation tag is 274 if rotation in {6, 8}: # rotation 270 or 90 s = s[1], s[0] + except: # noqa E722 + pass return s @@ -216,7 +217,7 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): ms = [] for si in range(len(segments)): mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) - ms.append(mask) + ms.append(mask.astype(masks.dtype)) areas.append(mask.sum()) areas = np.asarray(areas) index = np.argsort(-areas) @@ -452,12 +453,12 @@ class HUBDatasetStats: path = Path(path).resolve() LOGGER.info(f"Starting HUB dataset checks for {path}....") - self.task = task # detect, segment, pose, classify + self.task = task # detect, segment, pose, classify, obb if self.task == "classify": unzip_dir = unzip_file(path) data = check_cls_dataset(unzip_dir) data["path"] = unzip_dir - else: # detect, segment, pose + else: # detect, segment, pose, obb _, data_dir, yaml_path = self._unzip(Path(path)) try: # Load YAML with checks diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 73ee545f33..b2c0329168 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -1,52 +1,52 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """ -Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit. +Export a YOLO PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit. Format | `format=argument` | Model --- | --- | --- -PyTorch | - | yolov8n.pt -TorchScript | `torchscript` | yolov8n.torchscript -ONNX | `onnx` | yolov8n.onnx -OpenVINO | `openvino` | yolov8n_openvino_model/ -TensorRT | `engine` | yolov8n.engine -CoreML | `coreml` | yolov8n.mlpackage -TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ -TensorFlow GraphDef | `pb` | yolov8n.pb -TensorFlow Lite | `tflite` | yolov8n.tflite -TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite -TensorFlow.js | `tfjs` | yolov8n_web_model/ -PaddlePaddle | `paddle` | yolov8n_paddle_model/ -NCNN | `ncnn` | yolov8n_ncnn_model/ +PyTorch | - | yolo11n.pt +TorchScript | `torchscript` | yolo11n.torchscript +ONNX | `onnx` | yolo11n.onnx +OpenVINO | `openvino` | yolo11n_openvino_model/ +TensorRT | `engine` | yolo11n.engine +CoreML | `coreml` | yolo11n.mlpackage +TensorFlow SavedModel | `saved_model` | yolo11n_saved_model/ +TensorFlow GraphDef | `pb` | yolo11n.pb +TensorFlow Lite | `tflite` | yolo11n.tflite +TensorFlow Edge TPU | `edgetpu` | yolo11n_edgetpu.tflite +TensorFlow.js | `tfjs` | yolo11n_web_model/ +PaddlePaddle | `paddle` | yolo11n_paddle_model/ +NCNN | `ncnn` | yolo11n_ncnn_model/ Requirements: $ pip install "ultralytics[export]" Python: from ultralytics import YOLO - model = YOLO('yolov8n.pt') + model = YOLO('yolo11n.pt') results = model.export(format='onnx') CLI: - $ yolo mode=export model=yolov8n.pt format=onnx + $ yolo mode=export model=yolo11n.pt format=onnx Inference: - $ yolo predict model=yolov8n.pt # PyTorch - yolov8n.torchscript # TorchScript - yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True - yolov8n_openvino_model # OpenVINO - yolov8n.engine # TensorRT - yolov8n.mlpackage # CoreML (macOS-only) - yolov8n_saved_model # TensorFlow SavedModel - yolov8n.pb # TensorFlow GraphDef - yolov8n.tflite # TensorFlow Lite - yolov8n_edgetpu.tflite # TensorFlow Edge TPU - yolov8n_paddle_model # PaddlePaddle - yolov8n_ncnn_model # NCNN + $ yolo predict model=yolo11n.pt # PyTorch + yolo11n.torchscript # TorchScript + yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolo11n_openvino_model # OpenVINO + yolo11n.engine # TensorRT + yolo11n.mlpackage # CoreML (macOS-only) + yolo11n_saved_model # TensorFlow SavedModel + yolo11n.pb # TensorFlow GraphDef + yolo11n.tflite # TensorFlow Lite + yolo11n_edgetpu.tflite # TensorFlow Edge TPU + yolo11n_paddle_model # PaddlePaddle + yolo11n_ncnn_model # NCNN TensorFlow.js: $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example $ npm install - $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model + $ ln -s ../../yolo11n_web_model public/yolo11n_web_model $ npm start """ @@ -124,7 +124,7 @@ def gd_outputs(gd): def try_export(inner_func): - """YOLOv8 export decorator, i.e. @try_export.""" + """YOLO export decorator, i.e. @try_export.""" inner_args = get_default_args(inner_func) def outer_func(*args, **kwargs): @@ -183,11 +183,10 @@ class Exporter: # Get the closest match if format is invalid matches = difflib.get_close_matches(fmt, fmts, n=1, cutoff=0.6) # 60% similarity required to match - if matches: - LOGGER.warning(f"WARNING ⚠️ Invalid export format='{fmt}', updating to format='{matches[0]}'") - fmt = matches[0] - else: + if not matches: raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}") + LOGGER.warning(f"WARNING ⚠️ Invalid export format='{fmt}', updating to format='{matches[0]}'") + fmt = matches[0] flags = [x == fmt for x in fmts] if sum(flags) != 1: raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}") @@ -379,7 +378,7 @@ class Exporter: @try_export def export_torchscript(self, prefix=colorstr("TorchScript:")): - """YOLOv8 TorchScript model export.""" + """YOLO TorchScript model export.""" LOGGER.info(f"\n{prefix} starting export with torch {torch.__version__}...") f = self.file.with_suffix(".torchscript") @@ -396,7 +395,7 @@ class Exporter: @try_export def export_onnx(self, prefix=colorstr("ONNX:")): - """YOLOv8 ONNX export.""" + """YOLO ONNX export.""" requirements = ["onnx>=1.12.0"] if self.args.simplify: requirements += ["onnxslim==0.1.34", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")] @@ -453,7 +452,7 @@ class Exporter: @try_export def export_openvino(self, prefix=colorstr("OpenVINO:")): - """YOLOv8 OpenVINO export.""" + """YOLO OpenVINO export.""" check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}') # fix OpenVINO issue on ARM64 import openvino as ov @@ -467,7 +466,7 @@ class Exporter: def serialize(ov_model, file): """Set RT info, serialize and save metadata YAML.""" - ov_model.set_rt_info("YOLOv8", ["model_info", "model_type"]) + ov_model.set_rt_info("YOLO", ["model_info", "model_type"]) ov_model.set_rt_info(True, ["model_info", "reverse_input_channels"]) ov_model.set_rt_info(114, ["model_info", "pad_value"]) ov_model.set_rt_info([255.0], ["model_info", "scale_values"]) @@ -525,7 +524,7 @@ class Exporter: @try_export def export_paddle(self, prefix=colorstr("PaddlePaddle:")): - """YOLOv8 Paddle export.""" + """YOLO Paddle export.""" check_requirements(("paddlepaddle", "x2paddle")) import x2paddle # noqa from x2paddle.convert import pytorch2paddle # noqa @@ -539,7 +538,7 @@ class Exporter: @try_export def export_ncnn(self, prefix=colorstr("NCNN:")): - """YOLOv8 NCNN export using PNNX https://github.com/pnnx/pnnx.""" + """YOLO NCNN export using PNNX https://github.com/pnnx/pnnx.""" check_requirements("ncnn") import ncnn # noqa @@ -607,7 +606,7 @@ class Exporter: @try_export def export_coreml(self, prefix=colorstr("CoreML:")): - """YOLOv8 CoreML export.""" + """YOLO CoreML export.""" mlmodel = self.args.format.lower() == "mlmodel" # legacy *.mlmodel export format requested check_requirements("coremltools>=6.0,<=6.2" if mlmodel else "coremltools>=7.0") import coremltools as ct # noqa @@ -684,7 +683,7 @@ class Exporter: @try_export def export_engine(self, prefix=colorstr("TensorRT:")): - """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt.""" + """YOLO TensorRT export https://developer.nvidia.com/tensorrt.""" assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'" f_onnx, _ = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016 @@ -818,7 +817,7 @@ class Exporter: @try_export def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")): - """YOLOv8 TensorFlow SavedModel export.""" + """YOLO TensorFlow SavedModel export.""" cuda = torch.cuda.is_available() try: import tensorflow as tf # noqa @@ -870,22 +869,19 @@ class Exporter: np_data = None if self.args.int8: tmp_file = f / "tmp_tflite_int8_calibration_images.npy" # int8 calibration images file - verbosity = "info" if self.args.data: f.mkdir() images = [batch["img"].permute(0, 2, 3, 1) for batch in self.get_int8_calibration_dataloader(prefix)] images = torch.cat(images, 0).float() np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]] - else: - verbosity = "error" LOGGER.info(f"{prefix} starting TFLite export with onnx2tf {onnx2tf.__version__}...") - onnx2tf.convert( + keras_model = onnx2tf.convert( input_onnx_file_path=f_onnx, output_folder_path=str(f), not_use_onnxsim=True, - verbosity=verbosity, + verbosity="error", # note INT8-FP16 activation bug https://github.com/ultralytics/ultralytics/issues/15873 output_integer_quantized_tflite=self.args.int8, quant_type="per-tensor", # "per-tensor" (faster) or "per-channel" (slower but more accurate) custom_input_op_name_np_data_path=np_data, @@ -906,11 +902,11 @@ class Exporter: for file in f.rglob("*.tflite"): f.unlink() if "quant_with_int16_act.tflite" in str(f) else self._add_tflite_metadata(file) - return str(f), tf.saved_model.load(f, tags=None, options=None) # load saved_model as Keras model + return str(f), keras_model # or keras_model = tf.saved_model.load(f, tags=None, options=None) @try_export def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")): - """YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow.""" + """YOLO TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow.""" import tensorflow as tf # noqa from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa @@ -926,7 +922,7 @@ class Exporter: @try_export def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")): - """YOLOv8 TensorFlow Lite export.""" + """YOLO TensorFlow Lite export.""" # BUG https://github.com/ultralytics/ultralytics/issues/13436 import tensorflow as tf # noqa @@ -942,7 +938,7 @@ class Exporter: @try_export def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): - """YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/.""" + """YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/.""" LOGGER.warning(f"{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185") cmd = "edgetpu_compiler --version" @@ -964,7 +960,15 @@ class Exporter: LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...") f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model - cmd = f'edgetpu_compiler -s -d -k 10 --out_dir "{Path(f).parent}" "{tflite_model}"' + cmd = ( + "edgetpu_compiler " + f'--out_dir "{Path(f).parent}" ' + "--show_operations " + "--search_delegate " + "--delegate_search_step 3 " + "--timeout_sec 180 " + f'"{tflite_model}"' + ) LOGGER.info(f"{prefix} running '{cmd}'") subprocess.run(cmd, shell=True) self._add_tflite_metadata(f) @@ -972,7 +976,7 @@ class Exporter: @try_export def export_tfjs(self, prefix=colorstr("TensorFlow.js:")): - """YOLOv8 TensorFlow.js export.""" + """YOLO TensorFlow.js export.""" check_requirements("tensorflowjs") if ARM64: # Fix error: `np.object` was a deprecated alias for the builtin `object` when exporting to TF.js on ARM64 @@ -1071,7 +1075,7 @@ class Exporter: tmp_file.unlink() def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")): - """YOLOv8 CoreML pipeline.""" + """YOLO CoreML pipeline.""" import coremltools as ct # noqa LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...") diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index c4db53426a..c5b63eed83 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -72,16 +72,16 @@ class Model(nn.Module): Examples: >>> from ultralytics import YOLO - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.predict("image.jpg") - >>> model.train(data="coco128.yaml", epochs=3) + >>> model.train(data="coco8.yaml", epochs=3) >>> metrics = model.val() >>> model.export(format="onnx") """ def __init__( self, - model: Union[str, Path] = "yolov8n.pt", + model: Union[str, Path] = "yolo11n.pt", task: str = None, verbose: bool = False, ) -> None: @@ -106,7 +106,7 @@ class Model(nn.Module): ImportError: If required dependencies for specific model types (like HUB SDK) are not installed. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model = Model("path/to/model.yaml", task="detect") >>> model = Model("hub_model", verbose=True) """ @@ -168,7 +168,7 @@ class Model(nn.Module): Results object. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model("https://ultralytics.com/images/bus.jpg") >>> for r in results: ... print(f"Detected {len(r)} objects in image") @@ -192,7 +192,7 @@ class Model(nn.Module): Examples: >>> Model.is_triton_model("http://localhost:8000/v2/models/yolov8n") True - >>> Model.is_triton_model("yolov8n.pt") + >>> Model.is_triton_model("yolo11n.pt") False """ from urllib.parse import urlsplit @@ -217,7 +217,7 @@ class Model(nn.Module): Examples: >>> Model.is_hub_model("https://hub.ultralytics.com/models/MODEL") True - >>> Model.is_hub_model("yolov8n.pt") + >>> Model.is_hub_model("yolo11n.pt") False """ return model.startswith(f"{HUB_WEB_ROOT}/models/") @@ -274,7 +274,7 @@ class Model(nn.Module): Examples: >>> model = Model() - >>> model._load("yolov8n.pt") + >>> model._load("yolo11n.pt") >>> model._load("path/to/weights.pth", task="detect") """ if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")): @@ -307,7 +307,7 @@ class Model(nn.Module): information about supported model formats and operations. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model._check_is_pytorch_model() # No error raised >>> model = Model("yolov8n.onnx") >>> model._check_is_pytorch_model() # Raises TypeError @@ -338,7 +338,7 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.reset_weights() """ self._check_is_pytorch_model() @@ -349,7 +349,7 @@ class Model(nn.Module): p.requires_grad = True return self - def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model": + def load(self, weights: Union[str, Path] = "yolo11n.pt") -> "Model": """ Loads parameters from the specified weights file into the model. @@ -367,7 +367,7 @@ class Model(nn.Module): Examples: >>> model = Model() - >>> model.load("yolov8n.pt") + >>> model.load("yolo11n.pt") >>> model.load(Path("path/to/weights.pt")) """ self._check_is_pytorch_model() @@ -391,7 +391,7 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.save("my_model.pt") """ self._check_is_pytorch_model() @@ -428,7 +428,7 @@ class Model(nn.Module): TypeError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.info() # Prints model summary >>> info_list = model.info(detailed=True, verbose=False) # Returns detailed info as a list """ @@ -451,7 +451,7 @@ class Model(nn.Module): TypeError: If the model is not a PyTorch nn.Module. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.fuse() >>> # Model is now fused and ready for optimized inference """ @@ -483,7 +483,7 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> image = "https://ultralytics.com/images/bus.jpg" >>> embeddings = model.embed(image) >>> print(embeddings[0].shape) @@ -520,7 +520,7 @@ class Model(nn.Module): Results object. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.predict(source="path/to/image.jpg", conf=0.25) >>> for r in results: ... print(r.boxes.data) # print detection bounding boxes @@ -543,7 +543,7 @@ class Model(nn.Module): prompts = args.pop("prompts", None) # for SAM-type models if not self.predictor: - self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks) + self.predictor = (predictor or self._smart_load("predictor"))(overrides=args, _callbacks=self.callbacks) self.predictor.setup_model(model=self.model, verbose=is_cli) else: # only update args if predictor is already setup self.predictor.args = get_cfg(self.predictor.args, args) @@ -581,7 +581,7 @@ class Model(nn.Module): AttributeError: If the predictor does not have registered trackers. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.track(source="path/to/video.mp4", show=True) >>> for r in results: ... print(r.boxes.id) # print tracking IDs @@ -624,8 +624,8 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") - >>> results = model.val(data="coco128.yaml", imgsz=640) + >>> model = YOLO("yolo11n.pt") + >>> results = model.val(data="coco8.yaml", imgsz=640) >>> print(results.box.map) # Print mAP50-95 """ custom = {"rect": True} # method defaults @@ -666,7 +666,7 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.benchmark(data="coco8.yaml", imgsz=640, half=True) >>> print(results) """ @@ -716,7 +716,7 @@ class Model(nn.Module): RuntimeError: If the export process fails due to errors. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.export(format="onnx", dynamic=True, simplify=True) 'path/to/exported/model.onnx' """ @@ -771,8 +771,8 @@ class Model(nn.Module): ModuleNotFoundError: If the HUB SDK is not installed. Examples: - >>> model = YOLO("yolov8n.pt") - >>> results = model.train(data="coco128.yaml", epochs=3) + >>> model = YOLO("yolo11n.pt") + >>> results = model.train(data="coco8.yaml", epochs=3) """ self._check_is_pytorch_model() if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model @@ -836,7 +836,7 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.tune(use_ray=True, iterations=20) >>> print(results) """ @@ -871,7 +871,7 @@ class Model(nn.Module): AssertionError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model = model._apply(lambda t: t.cuda()) # Move model to GPU """ self._check_is_pytorch_model() @@ -896,7 +896,7 @@ class Model(nn.Module): AttributeError: If the model or predictor does not have a 'names' attribute. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> print(model.names) {0: 'person', 1: 'bicycle', 2: 'car', ...} """ @@ -924,7 +924,7 @@ class Model(nn.Module): AttributeError: If the model is not a PyTorch nn.Module instance. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> print(model.device) device(type='cuda', index=0) # if CUDA is available >>> model = model.to("cpu") @@ -946,7 +946,7 @@ class Model(nn.Module): (object | None): The transform object of the model if available, otherwise None. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> transforms = model.transforms >>> if transforms: ... print(f"Model transforms: {transforms}") @@ -975,9 +975,9 @@ class Model(nn.Module): Examples: >>> def on_train_start(trainer): ... print("Training is starting!") - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.add_callback("on_train_start", on_train_start) - >>> model.train(data="coco128.yaml", epochs=1) + >>> model.train(data="coco8.yaml", epochs=1) """ self.callbacks[event].append(func) @@ -994,7 +994,7 @@ class Model(nn.Module): recognized by the Ultralytics callback system. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.add_callback("on_train_start", lambda: print("Training started")) >>> model.clear_callback("on_train_start") >>> # All callbacks for 'on_train_start' are now removed @@ -1024,7 +1024,7 @@ class Model(nn.Module): modifications, ensuring consistent behavior across different runs or experiments. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.add_callback("on_train_start", custom_function) >>> model.reset_callbacks() # All callbacks are now reset to their default functions diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index 8ace18f611..16f12a88ea 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -381,7 +381,7 @@ class BasePredictor: # Save images else: - cv2.imwrite(save_path, im) + cv2.imwrite(str(Path(save_path).with_suffix(".jpg")), im) # save to JPG for best support def show(self, p=""): """Display an image in a window using the OpenCV imshow function.""" diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py index 57cc4b04bc..7d8192d634 100644 --- a/ultralytics/engine/results.py +++ b/ultralytics/engine/results.py @@ -676,7 +676,7 @@ class Results(SimpleClass): Examples: >>> from ultralytics import YOLO - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model("path/to/image.jpg") >>> for result in results: ... result.save_txt("output.txt") diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 9fcc697040..352067397f 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -469,10 +469,8 @@ class BaseTrainer: if RANK in {-1, 0}: # Do final val with best.pt - LOGGER.info( - f"\n{epoch - self.start_epoch + 1} epochs completed in " - f"{(time.time() - self.train_time_start) / 3600:.3f} hours." - ) + seconds = time.time() - self.train_time_start + LOGGER.info(f"\n{epoch - self.start_epoch + 1} epochs completed in {seconds / 3600:.3f} hours.") self.final_eval() if self.args.plots: self.plot_metrics() @@ -504,7 +502,7 @@ class BaseTrainer: """Read results.csv into a dict using pandas.""" import pandas as pd # scope for faster 'import ultralytics' - return {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()} + return pd.read_csv(self.csv).to_dict(orient="list") def save_model(self): """Save model training checkpoints with additional metadata.""" @@ -654,10 +652,11 @@ class BaseTrainer: def save_metrics(self, metrics): """Saves training metrics to a CSV file.""" keys, vals = list(metrics.keys()), list(metrics.values()) - n = len(metrics) + 1 # number of cols - s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n") # header + n = len(metrics) + 2 # number of cols + s = "" if self.csv.exists() else (("%s," * n % tuple(["epoch", "time"] + keys)).rstrip(",") + "\n") # header + t = time.time() - self.train_time_start with open(self.csv, "a") as f: - f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n") + f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n") def plot_metrics(self): """Plot and display metrics visually.""" diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py index 2f42eb603d..0330abb83d 100644 --- a/ultralytics/engine/tuner.py +++ b/ultralytics/engine/tuner.py @@ -12,7 +12,7 @@ Example: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False) ``` """ @@ -54,7 +54,7 @@ class Tuner: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False) ``` @@ -62,7 +62,7 @@ class Tuner: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.tune(space={key1: val1, key2: val2}) # custom search space dictionary ``` """ diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 5e0f0988c5..daa058a9de 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -119,6 +119,8 @@ class BaseValidator: self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1) model.eval() else: + if str(self.args.model).endswith(".yaml"): + LOGGER.warning("WARNING ⚠️ validating an untrained model YAML will result in 0 mAP.") callbacks.add_integration_callbacks(self) model = AutoBackend( weights=model or self.args.model, diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 768d63d8f1..978f7cfd68 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -213,11 +213,14 @@ class Predictor(BasePredictor): Args: im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W). bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4). - points (np.ndarray | List | None): Points indicating object locations with shape (N, 2), in pixels. - labels (np.ndarray | List | None): Point prompt labels with shape (N,). 1 for foreground, 0 for background. + points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels. + labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background. masks (np.ndarray | None): Low-res masks from previous predictions with shape (N, H, W). For SAM, H=W=256. multimask_output (bool): Flag to return multiple masks for ambiguous prompts. + Raises: + AssertionError: If the number of points don't match the number of labels, in case labels were passed. + Returns: (tuple): Tuple containing: - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks. @@ -240,11 +243,15 @@ class Predictor(BasePredictor): points = points[None] if points.ndim == 1 else points # Assuming labels are all positive if users don't pass labels. if labels is None: - labels = np.ones(points.shape[0]) + labels = np.ones(points.shape[:-1]) labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device) + assert ( + points.shape[-2] == labels.shape[-1] + ), f"Number of points {points.shape[-2]} should match number of labels {labels.shape[-1]}." points *= r - # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1) - points, labels = points[:, None, :], labels[:, None] + if points.ndim == 2: + # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1) + points, labels = points[:, None, :], labels[:, None] if bboxes is not None: bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device) bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes diff --git a/ultralytics/models/yolo/detect/predict.py b/ultralytics/models/yolo/detect/predict.py index 7a1799f2a5..136f988299 100644 --- a/ultralytics/models/yolo/detect/predict.py +++ b/ultralytics/models/yolo/detect/predict.py @@ -14,7 +14,7 @@ class DetectionPredictor(BasePredictor): from ultralytics.utils import ASSETS from ultralytics.models.yolo.detect import DetectionPredictor - args = dict(model="yolov8n.pt", source=ASSETS) + args = dict(model="yolo11n.pt", source=ASSETS) predictor = DetectionPredictor(overrides=args) predictor.predict_cli() ``` diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py index 5be24c946e..e0dbb367f7 100644 --- a/ultralytics/models/yolo/detect/train.py +++ b/ultralytics/models/yolo/detect/train.py @@ -24,7 +24,7 @@ class DetectionTrainer(BaseTrainer): ```python from ultralytics.models.yolo.detect import DetectionTrainer - args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3) + args = dict(model="yolo11n.pt", data="coco8.yaml", epochs=3) trainer = DetectionTrainer(overrides=args) trainer.train() ``` diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py index 0823f75144..05db8cba37 100644 --- a/ultralytics/models/yolo/detect/val.py +++ b/ultralytics/models/yolo/detect/val.py @@ -22,7 +22,7 @@ class DetectionValidator(BaseValidator): ```python from ultralytics.models.yolo.detect import DetectionValidator - args = dict(model="yolov8n.pt", data="coco8.yaml") + args = dict(model="yolo11n.pt", data="coco8.yaml") validator = DetectionValidator(args=args) validator() ``` @@ -75,7 +75,7 @@ class DetectionValidator(BaseValidator): ) # is COCO self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(len(model.names))) - self.args.save_json |= (self.is_coco or self.is_lvis) and not self.training # run on final val if training COCO + self.args.save_json |= self.args.val and (self.is_coco or self.is_lvis) and not self.training # run final val self.names = model.names self.nc = len(model.names) self.metrics.names = self.names diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py index 692537dd6a..6381960399 100644 --- a/ultralytics/models/yolo/model.py +++ b/ultralytics/models/yolo/model.py @@ -11,7 +11,7 @@ from ultralytics.utils import ROOT, yaml_load class YOLO(Model): """YOLO (You Only Look Once) object detection model.""" - def __init__(self, model="yolov8n.pt", task=None, verbose=False): + def __init__(self, model="yolo11n.pt", task=None, verbose=False): """Initialize YOLO model, switching to YOLOWorld if model filename contains '-world'.""" path = Path(model) if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index a0e2e43bbf..78949cb631 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -1,7 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license import ast -import contextlib import json import platform import zipfile @@ -45,8 +44,10 @@ def check_class_names(names): def default_class_names(data=None): """Applies default class names to an input YAML file or returns numerical class names.""" if data: - with contextlib.suppress(Exception): + try: return yaml_load(check_yaml(data))["names"] + except: # noqa E722 + pass return {i: f"class{i}" for i in range(999)} # return default if above errors @@ -81,7 +82,7 @@ class AutoBackend(nn.Module): @torch.no_grad() def __init__( self, - weights="yolov8n.pt", + weights="yolo11n.pt", device=torch.device("cpu"), dnn=False, data=None, @@ -264,8 +265,8 @@ class AutoBackend(nn.Module): if -1 in tuple(model.get_tensor_shape(name)): dynamic = True context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1])) - if dtype == np.float16: - fp16 = True + if dtype == np.float16: + fp16 = True else: output_names.append(name) shape = tuple(context.get_tensor_shape(name)) @@ -321,8 +322,10 @@ class AutoBackend(nn.Module): with open(w, "rb") as f: gd.ParseFromString(f.read()) frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd)) - with contextlib.suppress(StopIteration): # find metadata in SavedModel alongside GraphDef + try: # find metadata in SavedModel alongside GraphDef metadata = next(Path(w).resolve().parent.rglob(f"{Path(w).stem}_saved_model*/metadata.yaml")) + except StopIteration: + pass # TFLite or TFLite Edge TPU elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python @@ -345,10 +348,12 @@ class AutoBackend(nn.Module): input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs # Load metadata - with contextlib.suppress(zipfile.BadZipFile): + try: with zipfile.ZipFile(w, "r") as model: meta_file = model.namelist()[0] metadata = ast.literal_eval(model.read(meta_file).decode("utf-8")) + except zipfile.BadZipFile: + pass # TF.js elif tfjs: diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 5f2931777e..60911e779f 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -19,7 +19,7 @@ __all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder", "v10D class Detect(nn.Module): - """YOLOv8 Detect head for detection models.""" + """YOLO Detect head for detection models.""" dynamic = False # force grid reconstruction export = False # export mode @@ -30,7 +30,7 @@ class Detect(nn.Module): strides = torch.empty(0) # init def __init__(self, nc=80, ch=()): - """Initializes the YOLOv8 detection layer with specified number of classes and channels.""" + """Initializes the YOLO detection layer with specified number of classes and channels.""" super().__init__() self.nc = nc # number of classes self.nl = len(ch) # number of detection layers @@ -162,7 +162,7 @@ class Detect(nn.Module): class Segment(Detect): - """YOLOv8 Segment head for segmentation models.""" + """YOLO Segment head for segmentation models.""" def __init__(self, nc=80, nm=32, npr=256, ch=()): """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.""" @@ -187,7 +187,7 @@ class Segment(Detect): class OBB(Detect): - """YOLOv8 OBB detection head for detection with rotation models.""" + """YOLO OBB detection head for detection with rotation models.""" def __init__(self, nc=80, ne=1, ch=()): """Initialize OBB with number of classes `nc` and layer channels `ch`.""" @@ -217,7 +217,7 @@ class OBB(Detect): class Pose(Detect): - """YOLOv8 Pose head for keypoints models.""" + """YOLO Pose head for keypoints models.""" def __init__(self, nc=80, kpt_shape=(17, 3), ch=()): """Initialize YOLO network with default parameters and Convolutional Layers.""" @@ -257,10 +257,10 @@ class Pose(Detect): class Classify(nn.Module): - """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2).""" + """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).""" def __init__(self, c1, c2, k=1, s=1, p=None, g=1): - """Initializes YOLOv8 classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.""" + """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.""" super().__init__() c_ = 1280 # efficientnet_b0 size self.conv = Conv(c1, c_, k, s, p, g) @@ -277,10 +277,10 @@ class Classify(nn.Module): class WorldDetect(Detect): - """Head for integrating YOLOv8 detection models with semantic understanding from text embeddings.""" + """Head for integrating YOLO detection models with semantic understanding from text embeddings.""" def __init__(self, nc=80, embed=512, with_bn=False, ch=()): - """Initialize YOLOv8 detection layer with nc classes and layer channels ch.""" + """Initialize YOLO detection layer with nc classes and layer channels ch.""" super().__init__(nc, ch) c3 = max(ch[0], min(self.nc, 100)) self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch) diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index 4ae6dba72e..407021c82a 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -2,6 +2,7 @@ import contextlib import pickle +import re import types from copy import deepcopy from pathlib import Path @@ -958,8 +959,10 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m] # get module for j, a in enumerate(args): if isinstance(a, str): - with contextlib.suppress(ValueError): + try: args[j] = locals()[a] if a in locals() else ast.literal_eval(a) + except ValueError: + pass n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain if m in { @@ -1058,10 +1061,10 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace("__main__.", "") # module type - m.np = sum(x.numel() for x in m_.parameters()) # number params + m_.np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type if verbose: - LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}") # print + LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m_.np:10.0f} {t:<45}{str(args):<30}") # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: @@ -1072,8 +1075,6 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) def yaml_model_load(path): """Load a YOLOv8 model from a YAML file.""" - import re - path = Path(path) if path.stem in (f"yolov{d}{x}6" for x in "nsmlx" for d in (5, 8)): new_stem = re.sub(r"(\d+)([nslmx])6(.+)?$", r"\1\2-p6\3", path.stem) @@ -1100,11 +1101,10 @@ def guess_model_scale(model_path): Returns: (str): The size character of the model's scale, which can be n, s, m, l, or x. """ - with contextlib.suppress(AttributeError): - import re - + try: return re.search(r"yolo[v]?\d+([nslmx])", Path(model_path).stem).group(1) # n, s, m, l, or x - return "" + except AttributeError: + return "" def guess_model_task(model): @@ -1137,17 +1137,23 @@ def guess_model_task(model): # Guess from model cfg if isinstance(model, dict): - with contextlib.suppress(Exception): + try: return cfg2task(model) + except: # noqa E722 + pass # Guess from PyTorch model if isinstance(model, nn.Module): # PyTorch model for x in "model.args", "model.model.args", "model.model.model.args": - with contextlib.suppress(Exception): + try: return eval(x)["task"] + except: # noqa E722 + pass for x in "model.yaml", "model.model.yaml", "model.model.model.yaml": - with contextlib.suppress(Exception): + try: return cfg2task(eval(x)) + except: # noqa E722 + pass for m in model.modules(): if isinstance(m, Segment): diff --git a/ultralytics/solutions/ai_gym.py b/ultralytics/solutions/ai_gym.py index 349e46e8f0..26f22d7032 100644 --- a/ultralytics/solutions/ai_gym.py +++ b/ultralytics/solutions/ai_gym.py @@ -1,127 +1,79 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import cv2 - -from ultralytics.utils.checks import check_imshow +from ultralytics.solutions.solutions import BaseSolution # Import a parent class from ultralytics.utils.plotting import Annotator -class AIGym: +class AIGym(BaseSolution): """A class to manage the gym steps of people in a real-time video stream based on their poses.""" - def __init__( - self, - kpts_to_check, - line_thickness=2, - view_img=False, - pose_up_angle=145.0, - pose_down_angle=90.0, - pose_type="pullup", - ): + def __init__(self, **kwargs): + """Initialization function for AiGYM class, a child class of BaseSolution class, can be used for workouts + monitoring. """ - Initializes the AIGym class with the specified parameters. - - Args: - kpts_to_check (list): Indices of keypoints to check. - line_thickness (int, optional): Thickness of the lines drawn. Defaults to 2. - view_img (bool, optional): Flag to display the image. Defaults to False. - pose_up_angle (float, optional): Angle threshold for the 'up' pose. Defaults to 145.0. - pose_down_angle (float, optional): Angle threshold for the 'down' pose. Defaults to 90.0. - pose_type (str, optional): Type of pose to detect ('pullup', 'pushup', 'abworkout'). Defaults to "pullup". + # Check if the model name ends with '-pose' + if "model" in kwargs and "-pose" not in kwargs["model"]: + kwargs["model"] = "yolo11n-pose.pt" + elif "model" not in kwargs: + kwargs["model"] = "yolo11n-pose.pt" + + super().__init__(**kwargs) + self.count = [] # List for counts, necessary where there are multiple objects in frame + self.angle = [] # List for angle, necessary where there are multiple objects in frame + self.stage = [] # List for stage, necessary where there are multiple objects in frame + + # Extract details from CFG single time for usage later + self.initial_stage = None + self.up_angle = float(self.CFG["up_angle"]) # Pose up predefined angle to consider up pose + self.down_angle = float(self.CFG["down_angle"]) # Pose down predefined angle to consider down pose + self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage + self.lw = self.CFG["line_width"] # Store line_width for usage + + def monitor(self, im0): """ - # Image and line thickness - self.im0 = None - self.tf = line_thickness - - # Keypoints and count information - self.keypoints = None - self.poseup_angle = pose_up_angle - self.posedown_angle = pose_down_angle - self.threshold = 0.001 - - # Store stage, count and angle information - self.angle = None - self.count = None - self.stage = None - self.pose_type = pose_type - self.kpts_to_check = kpts_to_check - - # Visual Information - self.view_img = view_img - self.annotator = None - - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) - self.count = [] - self.angle = [] - self.stage = [] - - def start_counting(self, im0, results): - """ - Function used to count the gym steps. + Monitor the workouts using Ultralytics YOLOv8 Pose Model: https://docs.ultralytics.com/tasks/pose/. Args: - im0 (ndarray): Current frame from the video stream. - results (list): Pose estimation data. + im0 (ndarray): The input image that will be used for processing + Returns + im0 (ndarray): The processed image for more usage """ - self.im0 = im0 - - if not len(results[0]): - return self.im0 - - if len(results[0]) > len(self.count): - new_human = len(results[0]) - len(self.count) - self.count += [0] * new_human - self.angle += [0] * new_human - self.stage += ["-"] * new_human - - self.keypoints = results[0].keypoints.data - self.annotator = Annotator(im0, line_width=self.tf) - - for ind, k in enumerate(reversed(self.keypoints)): - # Estimate angle and draw specific points based on pose type - if self.pose_type in {"pushup", "pullup", "abworkout", "squat"}: - self.angle[ind] = self.annotator.estimate_pose_angle( - k[int(self.kpts_to_check[0])].cpu(), - k[int(self.kpts_to_check[1])].cpu(), - k[int(self.kpts_to_check[2])].cpu(), - ) - self.im0 = self.annotator.draw_specific_points(k, self.kpts_to_check, shape=(640, 640), radius=10) - - # Check and update pose stages and counts based on angle - if self.pose_type in {"abworkout", "pullup"}: - if self.angle[ind] > self.poseup_angle: - self.stage[ind] = "down" - if self.angle[ind] < self.posedown_angle and self.stage[ind] == "down": - self.stage[ind] = "up" - self.count[ind] += 1 - - elif self.pose_type in {"pushup", "squat"}: - if self.angle[ind] > self.poseup_angle: - self.stage[ind] = "up" - if self.angle[ind] < self.posedown_angle and self.stage[ind] == "up": - self.stage[ind] = "down" + # Extract tracks + tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"])[0] + + if tracks.boxes.id is not None: + # Extract and check keypoints + if len(tracks) > len(self.count): + new_human = len(tracks) - len(self.count) + self.angle += [0] * new_human + self.count += [0] * new_human + self.stage += ["-"] * new_human + + # Initialize annotator + self.annotator = Annotator(im0, line_width=self.lw) + + # Enumerate over keypoints + for ind, k in enumerate(reversed(tracks.keypoints.data)): + # Get keypoints and estimate the angle + kpts = [k[int(self.kpts[i])].cpu() for i in range(3)] + self.angle[ind] = self.annotator.estimate_pose_angle(*kpts) + im0 = self.annotator.draw_specific_points(k, self.kpts, radius=self.lw * 3) + + # Determine stage and count logic based on angle thresholds + if self.angle[ind] < self.down_angle: + if self.stage[ind] == "up": self.count[ind] += 1 + self.stage[ind] = "down" + elif self.angle[ind] > self.up_angle: + self.stage[ind] = "up" + # Display angle, count, and stage text self.annotator.plot_angle_and_count_and_stage( - angle_text=self.angle[ind], - count_text=self.count[ind], - stage_text=self.stage[ind], - center_kpt=k[int(self.kpts_to_check[1])], + angle_text=self.angle[ind], # angle text for display + count_text=self.count[ind], # count text for workouts + stage_text=self.stage[ind], # stage position text + center_kpt=k[int(self.kpts[1])], # center keypoint for display ) - # Draw keypoints - self.annotator.kpts(k, shape=(640, 640), radius=1, kpt_line=True) - - # Display the image if environment supports it and view_img is True - if self.env_check and self.view_img: - cv2.imshow("Ultralytics YOLOv8 AI GYM", self.im0) - if cv2.waitKey(1) & 0xFF == ord("q"): - return - - return self.im0 - - -if __name__ == "__main__": - kpts_to_check = [0, 1, 2] # example keypoints - aigym = AIGym(kpts_to_check) + self.display_output(im0) # Display output image, if environment support display + return im0 # return an image for writing or further usage diff --git a/ultralytics/solutions/analytics.py b/ultralytics/solutions/analytics.py index c299009778..ade3431bf1 100644 --- a/ultralytics/solutions/analytics.py +++ b/ultralytics/solutions/analytics.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import warnings from itertools import cycle import cv2 @@ -9,299 +8,187 @@ import numpy as np from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from matplotlib.figure import Figure +from ultralytics.solutions.solutions import BaseSolution # Import a parent class -class Analytics: + +class Analytics(BaseSolution): """A class to create and update various types of charts (line, bar, pie, area) for visual analytics.""" - def __init__( - self, - type, - writer, - im0_shape, - title="ultralytics", - x_label="x", - y_label="y", - bg_color="white", - fg_color="black", - line_color="yellow", - line_width=2, - points_width=10, - fontsize=13, - view_img=False, - save_img=True, - max_points=50, - ): - """ - Initialize the Analytics class with various chart types. + def __init__(self, **kwargs): + """Initialize the Analytics class with various chart types.""" + super().__init__(**kwargs) - Args: - type (str): Type of chart to initialize ('line', 'bar', 'pie', or 'area'). - writer (object): Video writer object to save the frames. - im0_shape (tuple): Shape of the input image (width, height). - title (str): Title of the chart. - x_label (str): Label for the x-axis. - y_label (str): Label for the y-axis. - bg_color (str): Background color of the chart. - fg_color (str): Foreground (text) color of the chart. - line_color (str): Line color for line charts. - line_width (int): Width of the lines in line charts. - points_width (int): Width of line points highlighter - fontsize (int): Font size for chart text. - view_img (bool): Whether to display the image. - save_img (bool): Whether to save the image. - max_points (int): Specifies when to remove the oldest points in a graph for multiple lines. - """ - self.bg_color = bg_color - self.fg_color = fg_color - self.view_img = view_img - self.save_img = save_img - self.title = title - self.writer = writer - self.max_points = max_points - self.line_color = line_color - self.x_label = x_label - self.y_label = y_label - self.points_width = points_width - self.line_width = line_width - self.fontsize = fontsize + self.type = self.CFG["analytics_type"] # extract type of analytics + self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#" + self.y_label = "Total Counts" + + # Predefined data + self.bg_color = "#00F344" # background color of frame + self.fg_color = "#111E68" # foreground color of frame + self.title = "Ultralytics Solutions" # window name + self.max_points = 45 # maximum points to be drawn on window + self.fontsize = 25 # text font size for display + figsize = (19.2, 10.8) # Set output image size 1920 * 1080 + self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"]) - # Set figure size based on image shape - figsize = (im0_shape[0] / 100, im0_shape[1] / 100) + self.total_counts = 0 # count variable for storing total counts i.e for line + self.clswise_count = {} # dictionary for classwise counts - if type in {"line", "area"}: - # Initialize line or area plot + # Ensure line and area chart + if self.type in {"line", "area"}: self.lines = {} self.fig = Figure(facecolor=self.bg_color, figsize=figsize) - self.canvas = FigureCanvas(self.fig) + self.canvas = FigureCanvas(self.fig) # Set common axis properties self.ax = self.fig.add_subplot(111, facecolor=self.bg_color) - if type == "line": - (self.line,) = self.ax.plot([], [], color=self.line_color, linewidth=self.line_width) - - elif type in {"bar", "pie"}: + if self.type == "line": + (self.line,) = self.ax.plot([], [], color="cyan", linewidth=self.line_width) + elif self.type in {"bar", "pie"}: # Initialize bar or pie plot self.fig, self.ax = plt.subplots(figsize=figsize, facecolor=self.bg_color) + self.canvas = FigureCanvas(self.fig) # Set common axis properties self.ax.set_facecolor(self.bg_color) - color_palette = [ - (31, 119, 180), - (255, 127, 14), - (44, 160, 44), - (214, 39, 40), - (148, 103, 189), - (140, 86, 75), - (227, 119, 194), - (127, 127, 127), - (188, 189, 34), - (23, 190, 207), - ] - self.color_palette = [(r / 255, g / 255, b / 255, 1) for r, g, b in color_palette] - self.color_cycle = cycle(self.color_palette) self.color_mapping = {} + self.ax.axis("equal") if type == "pie" else None # Ensure pie chart is circular - # Ensure pie chart is circular - self.ax.axis("equal") if type == "pie" else None - - # Set common axis properties - self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize) - self.ax.set_xlabel(x_label, color=self.fg_color, fontsize=self.fontsize - 3) - self.ax.set_ylabel(y_label, color=self.fg_color, fontsize=self.fontsize - 3) - self.ax.tick_params(axis="both", colors=self.fg_color) + def process_data(self, im0, frame_number): + """ + Process the image data, run object tracking. - def update_area(self, frame_number, counts_dict): + Args: + im0 (ndarray): Input image for processing. + frame_number (int): Video frame # for plotting the data. + """ + self.extract_tracks(im0) # Extract tracks + + if self.type == "line": + for box in self.boxes: + self.total_counts += 1 + im0 = self.update_graph(frame_number=frame_number) + self.total_counts = 0 + elif self.type == "pie" or self.type == "bar" or self.type == "area": + self.clswise_count = {} + for box, cls in zip(self.boxes, self.clss): + if self.names[int(cls)] in self.clswise_count: + self.clswise_count[self.names[int(cls)]] += 1 + else: + self.clswise_count[self.names[int(cls)]] = 1 + im0 = self.update_graph(frame_number=frame_number, count_dict=self.clswise_count, plot=self.type) + else: + raise ModuleNotFoundError(f"{self.type} chart is not supported ❌") + return im0 + + def update_graph(self, frame_number, count_dict=None, plot="line"): """ - Update the area graph with new data for multiple classes. + Update the graph (line or area) with new data for single or multiple classes. Args: frame_number (int): The current frame number. - counts_dict (dict): Dictionary with class names as keys and counts as values. + count_dict (dict, optional): Dictionary with class names as keys and counts as values for multiple classes. + If None, updates a single line graph. + plot (str): Type of the plot i.e. line, bar or area. """ - x_data = np.array([]) - y_data_dict = {key: np.array([]) for key in counts_dict.keys()} - - if self.ax.lines: - x_data = self.ax.lines[0].get_xdata() - for line, key in zip(self.ax.lines, counts_dict.keys()): - y_data_dict[key] = line.get_ydata() - - x_data = np.append(x_data, float(frame_number)) - max_length = len(x_data) - - for key in counts_dict.keys(): - y_data_dict[key] = np.append(y_data_dict[key], float(counts_dict[key])) - if len(y_data_dict[key]) < max_length: - y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])), "constant") - - # Remove the oldest points if the number of points exceeds max_points - if len(x_data) > self.max_points: - x_data = x_data[1:] - for key in counts_dict.keys(): - y_data_dict[key] = y_data_dict[key][1:] - - self.ax.clear() - - colors = ["#E1FF25", "#0BDBEB", "#FF64DA", "#111F68", "#042AFF"] - color_cycle = cycle(colors) - - for key, y_data in y_data_dict.items(): - color = next(color_cycle) - self.ax.fill_between(x_data, y_data, color=color, alpha=0.6) - self.ax.plot( - x_data, - y_data, - color=color, - linewidth=self.line_width, - marker="o", - markersize=self.points_width, - label=f"{key} Data Points", - ) - + if count_dict is None: + # Single line update + x_data = np.append(self.line.get_xdata(), float(frame_number)) + y_data = np.append(self.line.get_ydata(), float(self.total_counts)) + + if len(x_data) > self.max_points: + x_data, y_data = x_data[-self.max_points :], y_data[-self.max_points :] + + self.line.set_data(x_data, y_data) + self.line.set_label("Counts") + self.line.set_color("#7b0068") # Pink color + self.line.set_marker("*") + self.line.set_markersize(self.line_width * 5) + else: + labels = list(count_dict.keys()) + counts = list(count_dict.values()) + if plot == "area": + color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"]) + # Multiple lines or area update + x_data = self.ax.lines[0].get_xdata() if self.ax.lines else np.array([]) + y_data_dict = {key: np.array([]) for key in count_dict.keys()} + if self.ax.lines: + for line, key in zip(self.ax.lines, count_dict.keys()): + y_data_dict[key] = line.get_ydata() + + x_data = np.append(x_data, float(frame_number)) + max_length = len(x_data) + for key in count_dict.keys(): + y_data_dict[key] = np.append(y_data_dict[key], float(count_dict[key])) + if len(y_data_dict[key]) < max_length: + y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])), "constant") + if len(x_data) > self.max_points: + x_data = x_data[1:] + for key in count_dict.keys(): + y_data_dict[key] = y_data_dict[key][1:] + + self.ax.clear() + for key, y_data in y_data_dict.items(): + color = next(color_cycle) + self.ax.fill_between(x_data, y_data, color=color, alpha=0.7) + self.ax.plot( + x_data, + y_data, + color=color, + linewidth=self.line_width, + marker="o", + markersize=self.line_width * 5, + label=f"{key} Data Points", + ) + if plot == "bar": + self.ax.clear() # clear bar data + for label in labels: # Map labels to colors + if label not in self.color_mapping: + self.color_mapping[label] = next(self.color_cycle) + colors = [self.color_mapping[label] for label in labels] + bars = self.ax.bar(labels, counts, color=colors) + for bar, count in zip(bars, counts): + self.ax.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + str(count), + ha="center", + va="bottom", + color=self.fg_color, + ) + # Create the legend using labels from the bars + for bar, label in zip(bars, labels): + bar.set_label(label) # Assign label to each bar + self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color) + if plot == "pie": + total = sum(counts) + percentages = [size / total * 100 for size in counts] + start_angle = 90 + self.ax.clear() + + # Create pie chart and create legend labels with percentages + wedges, autotexts = self.ax.pie( + counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None + ) + legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)] + + # Assign the legend using the wedges and manually created labels + self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) + self.fig.subplots_adjust(left=0.1, right=0.75) # Adjust layout to fit the legend + + # Common plot settings + self.ax.set_facecolor("#f0f0f0") # Set to light gray or any other color you like self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize) self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3) self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3) - legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.fg_color) - # Set legend text color + # Add and format legend + legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.bg_color) for text in legend.get_texts(): text.set_color(self.fg_color) - self.canvas.draw() - im0 = np.array(self.canvas.renderer.buffer_rgba()) - self.write_and_display(im0) - - def update_line(self, frame_number, total_counts): - """ - Update the line graph with new data. - - Args: - frame_number (int): The current frame number. - total_counts (int): The total counts to plot. - """ - # Update line graph data - x_data = self.line.get_xdata() - y_data = self.line.get_ydata() - x_data = np.append(x_data, float(frame_number)) - y_data = np.append(y_data, float(total_counts)) - self.line.set_data(x_data, y_data) + # Redraw graph, update view, capture, and display the updated plot self.ax.relim() self.ax.autoscale_view() self.canvas.draw() im0 = np.array(self.canvas.renderer.buffer_rgba()) - self.write_and_display(im0) - - def update_multiple_lines(self, counts_dict, labels_list, frame_number): - """ - Update the line graph with multiple classes. - - Args: - counts_dict (int): Dictionary include each class counts. - labels_list (int): list include each classes names. - frame_number (int): The current frame number. - """ - warnings.warn("Display is not supported for multiple lines, output will be stored normally!") - for obj in labels_list: - if obj not in self.lines: - (line,) = self.ax.plot([], [], label=obj, marker="o", markersize=self.points_width) - self.lines[obj] = line - - x_data = self.lines[obj].get_xdata() - y_data = self.lines[obj].get_ydata() - - # Remove the initial point if the number of points exceeds max_points - if len(x_data) >= self.max_points: - x_data = np.delete(x_data, 0) - y_data = np.delete(y_data, 0) - - x_data = np.append(x_data, float(frame_number)) # Ensure frame_number is converted to float - y_data = np.append(y_data, float(counts_dict.get(obj, 0))) # Ensure total_count is converted to float - self.lines[obj].set_data(x_data, y_data) - - self.ax.relim() - self.ax.autoscale_view() - self.ax.legend() - self.canvas.draw() - - im0 = np.array(self.canvas.renderer.buffer_rgba()) - self.view_img = False # for multiple line view_img not supported yet, coming soon! - self.write_and_display(im0) - - def write_and_display(self, im0): - """ - Write and display the line graph - Args: - im0 (ndarray): Image for processing. - """ im0 = cv2.cvtColor(im0[:, :, :3], cv2.COLOR_RGBA2BGR) - cv2.imshow(self.title, im0) if self.view_img else None - self.writer.write(im0) if self.save_img else None - - def update_bar(self, count_dict): - """ - Update the bar graph with new data. - - Args: - count_dict (dict): Dictionary containing the count data to plot. - """ - # Update bar graph data - self.ax.clear() - self.ax.set_facecolor(self.bg_color) - labels = list(count_dict.keys()) - counts = list(count_dict.values()) - - # Map labels to colors - for label in labels: - if label not in self.color_mapping: - self.color_mapping[label] = next(self.color_cycle) - - colors = [self.color_mapping[label] for label in labels] - - bars = self.ax.bar(labels, counts, color=colors) - for bar, count in zip(bars, counts): - self.ax.text( - bar.get_x() + bar.get_width() / 2, - bar.get_height(), - str(count), - ha="center", - va="bottom", - color=self.fg_color, - ) - - # Display and save the updated graph - canvas = FigureCanvas(self.fig) - canvas.draw() - buf = canvas.buffer_rgba() - im0 = np.asarray(buf) - self.write_and_display(im0) - - def update_pie(self, classes_dict): - """ - Update the pie chart with new data. - - Args: - classes_dict (dict): Dictionary containing the class data to plot. - """ - # Update pie chart data - labels = list(classes_dict.keys()) - sizes = list(classes_dict.values()) - total = sum(sizes) - percentages = [size / total * 100 for size in sizes] - start_angle = 90 - self.ax.clear() - - # Create pie chart without labels inside the slices - wedges, autotexts = self.ax.pie(sizes, autopct=None, startangle=start_angle, textprops={"color": self.fg_color}) - - # Construct legend labels with percentages - legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)] - self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) - - # Adjust layout to fit the legend - self.fig.tight_layout() - self.fig.subplots_adjust(left=0.1, right=0.75) - - # Display and save the updated chart - im0 = self.fig.canvas.draw() - im0 = np.array(self.fig.canvas.renderer.buffer_rgba()) - self.write_and_display(im0) - + self.display_output(im0) -if __name__ == "__main__": - Analytics("line", writer=None, im0_shape=None) + return im0 # Return the image diff --git a/ultralytics/solutions/heatmap.py b/ultralytics/solutions/heatmap.py index 728b167bc8..30d1817d76 100644 --- a/ultralytics/solutions/heatmap.py +++ b/ultralytics/solutions/heatmap.py @@ -1,259 +1,93 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -from collections import defaultdict - import cv2 import numpy as np -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.object_counter import ObjectCounter # Import object counter class from ultralytics.utils.plotting import Annotator -check_requirements("shapely>=2.0.0") - -from shapely.geometry import LineString, Point, Polygon - -class Heatmap: +class Heatmap(ObjectCounter): """A class to draw heatmaps in real-time video stream based on their tracks.""" - def __init__( - self, - names, - imw=0, - imh=0, - colormap=cv2.COLORMAP_JET, - heatmap_alpha=0.5, - view_img=False, - view_in_counts=True, - view_out_counts=True, - count_reg_pts=None, - count_txt_color=(0, 0, 0), - count_bg_color=(255, 255, 255), - count_reg_color=(255, 0, 255), - region_thickness=5, - line_dist_thresh=15, - line_thickness=2, - decay_factor=0.99, - shape="circle", - ): - """Initializes the heatmap class with default values for Visual, Image, track, count and heatmap parameters.""" - # Visual information - self.annotator = None - self.view_img = view_img - self.shape = shape - - self.initialized = False - self.names = names # Classes names - - # Image information - self.imw = imw - self.imh = imh - self.im0 = None - self.tf = line_thickness - self.view_in_counts = view_in_counts - self.view_out_counts = view_out_counts - - # Heatmap colormap and heatmap np array - self.colormap = colormap - self.heatmap = None - self.heatmap_alpha = heatmap_alpha - - # Predict/track information - self.boxes = [] - self.track_ids = [] - self.clss = [] - self.track_history = defaultdict(list) - - # Region & Line Information - self.counting_region = None - self.line_dist_thresh = line_dist_thresh - self.region_thickness = region_thickness - self.region_color = count_reg_color - - # Object Counting Information - self.in_counts = 0 - self.out_counts = 0 - self.count_ids = [] - self.class_wise_count = {} - self.count_txt_color = count_txt_color - self.count_bg_color = count_bg_color - self.cls_txtdisplay_gap = 50 - - # Decay factor - self.decay_factor = decay_factor + def __init__(self, **kwargs): + """Initializes function for heatmap class with default values.""" + super().__init__(**kwargs) - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) + self.initialized = False # bool variable for heatmap initialization + if self.region is not None: # check if user provided the region coordinates + self.initialize_region() - # Region and line selection - self.count_reg_pts = count_reg_pts - print(self.count_reg_pts) - if self.count_reg_pts is not None: - if len(self.count_reg_pts) == 2: - print("Line Counter Initiated.") - self.counting_region = LineString(self.count_reg_pts) - elif len(self.count_reg_pts) >= 3: - print("Polygon Counter Initiated.") - self.counting_region = Polygon(self.count_reg_pts) - else: - print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.") - print("Using Line Counter Now") - self.counting_region = LineString(self.count_reg_pts) + # store colormap + self.colormap = cv2.COLORMAP_PARULA if self.CFG["colormap"] is None else self.CFG["colormap"] - # Shape of heatmap, if not selected - if self.shape not in {"circle", "rect"}: - print("Unknown shape value provided, 'circle' & 'rect' supported") - print("Using Circular shape now") - self.shape = "circle" - - def extract_results(self, tracks): + def heatmap_effect(self, box): """ - Extracts results from the provided data. + Efficient calculation of heatmap area and effect location for applying colormap. Args: - tracks (list): List of tracks obtained from the object tracking process. - """ - if tracks[0].boxes.id is not None: - self.boxes = tracks[0].boxes.xyxy.cpu() - self.clss = tracks[0].boxes.cls.tolist() - self.track_ids = tracks[0].boxes.id.int().tolist() - - def generate_heatmap(self, im0, tracks): + box (list): Bounding Box coordinates data [x0, y0, x1, y1] """ - Generate heatmap based on tracking data. + x0, y0, x1, y1 = map(int, box) + radius_squared = (min(x1 - x0, y1 - y0) // 2) ** 2 - Args: - im0 (nd array): Image - tracks (list): List of tracks obtained from the object tracking process. - """ - self.im0 = im0 + # Create a meshgrid with region of interest (ROI) for vectorized distance calculations + xv, yv = np.meshgrid(np.arange(x0, x1), np.arange(y0, y1)) - # Initialize heatmap only once - if not self.initialized: - self.heatmap = np.zeros((int(self.im0.shape[0]), int(self.im0.shape[1])), dtype=np.float32) - self.initialized = True + # Calculate squared distances from the center + dist_squared = (xv - ((x0 + x1) // 2)) ** 2 + (yv - ((y0 + y1) // 2)) ** 2 - self.heatmap *= self.decay_factor # decay factor + # Create a mask of points within the radius + within_radius = dist_squared <= radius_squared - self.extract_results(tracks) - self.annotator = Annotator(self.im0, self.tf, None) + # Update only the values within the bounding box in a single vectorized operation + self.heatmap[y0:y1, x0:x1][within_radius] += 2 - if self.track_ids: - # Draw counting region - if self.count_reg_pts is not None: - self.annotator.draw_region( - reg_pts=self.count_reg_pts, color=self.region_color, thickness=self.region_thickness - ) - - for box, cls, track_id in zip(self.boxes, self.clss, self.track_ids): - # Store class info - if self.names[cls] not in self.class_wise_count: - self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0} - - if self.shape == "circle": - center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)) - radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2 + def generate_heatmap(self, im0): + """ + Generate heatmap for each frame using Ultralytics. - y, x = np.ogrid[0 : self.heatmap.shape[0], 0 : self.heatmap.shape[1]] - mask = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= radius**2 + Args: + im0 (ndarray): Input image array for processing + Returns: + im0 (ndarray): Processed image for further usage + """ + self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99 if not self.initialized else self.heatmap + self.initialized = True # Initialize heatmap only once - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += ( - 2 * mask[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] - ) + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks - else: - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += 2 + # Iterate over bounding boxes, track ids and classes index + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + # Draw bounding box and counting region + self.heatmap_effect(box) - # Store tracking hist - track_line = self.track_history[track_id] - track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) - if len(track_line) > 30: - track_line.pop(0) + if self.region is not None: + self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2) + self.store_tracking_history(track_id, box) # Store track history + self.store_classwise_counts(cls) # store classwise counts in dict + # Store tracking previous position and perform object counting prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None + self.count_objects(self.track_line, box, track_id, prev_position, cls) # Perform object counting - if self.count_reg_pts is not None: - # Count objects in any polygon - if len(self.count_reg_pts) >= 3: - is_inside = self.counting_region.contains(Point(track_line[-1])) - - if prev_position is not None and is_inside and track_id not in self.count_ids: - self.count_ids.append(track_id) - - if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - # Count objects using line - elif len(self.count_reg_pts) == 2: - if prev_position is not None and track_id not in self.count_ids: - distance = Point(track_line[-1]).distance(self.counting_region) - if distance < self.line_dist_thresh and track_id not in self.count_ids: - self.count_ids.append(track_id) - - if (box[0] - prev_position[0]) * ( - self.counting_region.centroid.x - prev_position[0] - ) > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - else: - for box, cls in zip(self.boxes, self.clss): - if self.shape == "circle": - center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)) - radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2 - - y, x = np.ogrid[0 : self.heatmap.shape[0], 0 : self.heatmap.shape[1]] - mask = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= radius**2 - - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += ( - 2 * mask[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] - ) - - else: - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += 2 - - if self.count_reg_pts is not None: - labels_dict = {} - - for key, value in self.class_wise_count.items(): - if value["IN"] != 0 or value["OUT"] != 0: - if not self.view_in_counts and not self.view_out_counts: - continue - elif not self.view_in_counts: - labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}" - elif not self.view_out_counts: - labels_dict[str.capitalize(key)] = f"IN {value['IN']}" - else: - labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}" - - if labels_dict is not None: - self.annotator.display_analytics(self.im0, labels_dict, self.count_txt_color, self.count_bg_color, 10) + self.display_counts(im0) if self.region is not None else None # Display the counts on the frame # Normalize, apply colormap to heatmap and combine with original image - heatmap_normalized = cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX) - heatmap_colored = cv2.applyColorMap(heatmap_normalized.astype(np.uint8), self.colormap) - self.im0 = cv2.addWeighted(self.im0, 1 - self.heatmap_alpha, heatmap_colored, self.heatmap_alpha, 0) - - if self.env_check and self.view_img: - self.display_frames() - - return self.im0 - - def display_frames(self): - """Display frame.""" - cv2.imshow("Ultralytics Heatmap", self.im0) - - if cv2.waitKey(1) & 0xFF == ord("q"): - return - - -if __name__ == "__main__": - classes_names = {0: "person", 1: "car"} # example class names - heatmap = Heatmap(classes_names) + im0 = ( + im0 + if self.track_data.id is None + else cv2.addWeighted( + im0, + 0.5, + cv2.applyColorMap( + cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8), self.colormap + ), + 0.5, + 0, + ) + ) + + self.display_output(im0) # display output with base class function + return im0 # return output image for more usage diff --git a/ultralytics/solutions/object_counter.py b/ultralytics/solutions/object_counter.py index cc7fe45946..7d9bb8c9f4 100644 --- a/ultralytics/solutions/object_counter.py +++ b/ultralytics/solutions/object_counter.py @@ -1,243 +1,131 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -from collections import defaultdict +from shapely.geometry import LineString, Point -import cv2 - -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.solutions import BaseSolution # Import a parent class from ultralytics.utils.plotting import Annotator, colors -check_requirements("shapely>=2.0.0") -from shapely.geometry import LineString, Point, Polygon +class ObjectCounter(BaseSolution): + """A class to manage the counting of objects in a real-time video stream based on their tracks.""" + def __init__(self, **kwargs): + """Initialization function for Count class, a child class of BaseSolution class, can be used for counting the + objects. + """ + super().__init__(**kwargs) -class ObjectCounter: - """A class to manage the counting of objects in a real-time video stream based on their tracks.""" + self.in_count = 0 # Counter for objects moving inward + self.out_count = 0 # Counter for objects moving outward + self.counted_ids = [] # List of IDs of objects that have been counted + self.classwise_counts = {} # Dictionary for counts, categorized by object class + self.region_initialized = False # Bool variable for region initialization - def __init__( - self, - names, - reg_pts=None, - line_thickness=2, - view_img=False, - view_in_counts=True, - view_out_counts=True, - draw_tracks=False, - ): + self.show_in = self.CFG["show_in"] + self.show_out = self.CFG["show_out"] + + def count_objects(self, track_line, box, track_id, prev_position, cls): """ - Initializes the ObjectCounter with various tracking and counting parameters. + Helper function to count objects within a polygonal region. Args: - names (dict): Dictionary of class names. - reg_pts (list): List of points defining the counting region. - line_thickness (int): Line thickness for bounding boxes. - view_img (bool): Flag to control whether to display the video stream. - view_in_counts (bool): Flag to control whether to display the in counts on the video stream. - view_out_counts (bool): Flag to control whether to display the out counts on the video stream. - draw_tracks (bool): Flag to control whether to draw the object tracks. + track_line (dict): last 30 frame track record + box (list): Bounding box data for specific track in current frame + track_id (int): track ID of the object + prev_position (tuple): last frame position coordinates of the track + cls (int): Class index for classwise count updates """ - # Mouse events - self.is_drawing = False - self.selected_point = None - - # Region & Line Information - self.reg_pts = [(20, 400), (1260, 400)] if reg_pts is None else reg_pts - self.counting_region = None - - # Image and annotation Information - self.im0 = None - self.tf = line_thickness - self.view_img = view_img - self.view_in_counts = view_in_counts - self.view_out_counts = view_out_counts - - self.names = names # Classes names - self.window_name = "Ultralytics YOLOv8 Object Counter" - - # Object counting Information - self.in_counts = 0 - self.out_counts = 0 - self.count_ids = [] - self.class_wise_count = {} - - # Tracks info - self.track_history = defaultdict(list) - self.draw_tracks = draw_tracks - - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) - - # Initialize counting region - if len(self.reg_pts) == 2: - print("Line Counter Initiated.") - self.counting_region = LineString(self.reg_pts) - elif len(self.reg_pts) >= 3: - print("Polygon Counter Initiated.") - self.counting_region = Polygon(self.reg_pts) - else: - print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.") - print("Using Line Counter Now") - self.counting_region = LineString(self.reg_pts) - - # Define the counting line segment - self.counting_line_segment = LineString( - [ - (self.reg_pts[0][0], self.reg_pts[0][1]), - (self.reg_pts[1][0], self.reg_pts[1][1]), - ] - ) - - def mouse_event_for_region(self, event, x, y, flags, params): + if prev_position is None or track_id in self.counted_ids: + return + + centroid = self.r_s.centroid + dx = (box[0] - prev_position[0]) * (centroid.x - prev_position[0]) + dy = (box[1] - prev_position[1]) * (centroid.y - prev_position[1]) + + if len(self.region) >= 3 and self.r_s.contains(Point(track_line[-1])): + self.counted_ids.append(track_id) + # For polygon region + if dx > 0: + self.in_count += 1 + self.classwise_counts[self.names[cls]]["IN"] += 1 + else: + self.out_count += 1 + self.classwise_counts[self.names[cls]]["OUT"] += 1 + + elif len(self.region) < 3 and LineString([prev_position, box[:2]]).intersects(self.l_s): + self.counted_ids.append(track_id) + # For linear region + if dx > 0 and dy > 0: + self.in_count += 1 + self.classwise_counts[self.names[cls]]["IN"] += 1 + else: + self.out_count += 1 + self.classwise_counts[self.names[cls]]["OUT"] += 1 + + def store_classwise_counts(self, cls): """ - Handles mouse events for defining and moving the counting region in a real-time video stream. + Initialize class-wise counts if not already present. Args: - event (int): The type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.). - x (int): The x-coordinate of the mouse pointer. - y (int): The y-coordinate of the mouse pointer. - flags (int): Any associated event flags (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY, etc.). - params (dict): Additional parameters for the function. + cls (int): Class index for classwise count updates + """ + if self.names[cls] not in self.classwise_counts: + self.classwise_counts[self.names[cls]] = {"IN": 0, "OUT": 0} + + def display_counts(self, im0): """ - if event == cv2.EVENT_LBUTTONDOWN: - for i, point in enumerate(self.reg_pts): - if ( - isinstance(point, (tuple, list)) - and len(point) >= 2 - and (abs(x - point[0]) < 10 and abs(y - point[1]) < 10) - ): - self.selected_point = i - self.is_drawing = True - break - - elif event == cv2.EVENT_MOUSEMOVE: - if self.is_drawing and self.selected_point is not None: - self.reg_pts[self.selected_point] = (x, y) - self.counting_region = Polygon(self.reg_pts) - - elif event == cv2.EVENT_LBUTTONUP: - self.is_drawing = False - self.selected_point = None - - def extract_and_process_tracks(self, tracks): - """Extracts and processes tracks for object counting in a video stream.""" - # Annotator Init and region drawing - annotator = Annotator(self.im0, self.tf, self.names) - - # Draw region or line - annotator.draw_region(reg_pts=self.reg_pts, color=(104, 0, 123), thickness=self.tf * 2) - - # Extract tracks for OBB or object detection - track_data = tracks[0].obb or tracks[0].boxes - - if track_data and track_data.id is not None: - boxes = track_data.xyxy.cpu() - clss = track_data.cls.cpu().tolist() - track_ids = track_data.id.int().cpu().tolist() - - # Extract tracks - for box, track_id, cls in zip(boxes, track_ids, clss): - # Draw bounding box - annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True)) - - # Store class info - if self.names[cls] not in self.class_wise_count: - self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0} - - # Draw Tracks - track_line = self.track_history[track_id] - track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) - if len(track_line) > 30: - track_line.pop(0) - - # Draw track trails - if self.draw_tracks: - annotator.draw_centroid_and_tracks( - track_line, - color=colors(int(track_id), True), - track_thickness=self.tf, - ) - - prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None - - # Count objects in any polygon - if len(self.reg_pts) >= 3: - is_inside = self.counting_region.contains(Point(track_line[-1])) - - if prev_position is not None and is_inside and track_id not in self.count_ids: - self.count_ids.append(track_id) - - if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - # Count objects using line - elif len(self.reg_pts) == 2: - if ( - prev_position is not None - and track_id not in self.count_ids - and LineString([(prev_position[0], prev_position[1]), (box[0], box[1])]).intersects( - self.counting_line_segment - ) - ): - self.count_ids.append(track_id) - - # Determine the direction of movement (IN or OUT) - dx = (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) - dy = (box[1] - prev_position[1]) * (self.counting_region.centroid.y - prev_position[1]) - if dx > 0 and dy > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - labels_dict = {} - - for key, value in self.class_wise_count.items(): - if value["IN"] != 0 or value["OUT"] != 0: - if not self.view_in_counts and not self.view_out_counts: - continue - elif not self.view_in_counts: - labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}" - elif not self.view_out_counts: - labels_dict[str.capitalize(key)] = f"IN {value['IN']}" - else: - labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}" + Helper function to display object counts on the frame. + + Args: + im0 (ndarray): The input image or frame + """ + labels_dict = { + str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} " + f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip() + for key, value in self.classwise_counts.items() + if value["IN"] != 0 or value["OUT"] != 0 + } if labels_dict: - annotator.display_analytics(self.im0, labels_dict, (104, 31, 17), (255, 255, 255), 10) - - def display_frames(self): - """Displays the current frame with annotations and regions in a window.""" - if self.env_check: - cv2.namedWindow(self.window_name) - if len(self.reg_pts) == 4: # only add mouse event If user drawn region - cv2.setMouseCallback(self.window_name, self.mouse_event_for_region, {"region_points": self.reg_pts}) - cv2.imshow(self.window_name, self.im0) - # Break Window - if cv2.waitKey(1) & 0xFF == ord("q"): - return - - def start_counting(self, im0, tracks): + self.annotator.display_analytics(im0, labels_dict, (104, 31, 17), (255, 255, 255), 10) + + def count(self, im0): """ - Main function to start the object counting process. + Processes input data (frames or object tracks) and updates counts. Args: - im0 (ndarray): Current frame from the video stream. - tracks (list): List of tracks obtained from the object tracking process. + im0 (ndarray): The input image that will be used for processing + Returns + im0 (ndarray): The processed image for more usage """ - self.im0 = im0 # store image - self.extract_and_process_tracks(tracks) # draw region even if no objects + if not self.region_initialized: + self.initialize_region() + self.region_initialized = True + + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks + + self.annotator.draw_region( + reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2 + ) # Draw region + + # Iterate over bounding boxes, track ids and classes index + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + # Draw bounding box and counting region + self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True)) + self.store_tracking_history(track_id, box) # Store track history + self.store_classwise_counts(cls) # store classwise counts in dict + + # Draw tracks of objects + self.annotator.draw_centroid_and_tracks( + self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width + ) - if self.view_img: - self.display_frames() - return self.im0 + # store previous position of track for object counting + prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None + self.count_objects(self.track_line, box, track_id, prev_position, cls) # Perform object counting + self.display_counts(im0) # Display the counts on the frame + self.display_output(im0) # display output with base class function -if __name__ == "__main__": - classes_names = {0: "person", 1: "car"} # example class names - ObjectCounter(classes_names) + return im0 # return output image for more usage diff --git a/ultralytics/solutions/queue_management.py b/ultralytics/solutions/queue_management.py index ef60150395..287f337dc5 100644 --- a/ultralytics/solutions/queue_management.py +++ b/ultralytics/solutions/queue_management.py @@ -1,127 +1,64 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -from collections import defaultdict +from shapely.geometry import Point -import cv2 - -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.solutions import BaseSolution # Import a parent class from ultralytics.utils.plotting import Annotator, colors -check_requirements("shapely>=2.0.0") - -from shapely.geometry import Point, Polygon - -class QueueManager: +class QueueManager(BaseSolution): """A class to manage the queue in a real-time video stream based on object tracks.""" - def __init__( - self, - names, - reg_pts=None, - line_thickness=2, - view_img=False, - draw_tracks=False, - ): + def __init__(self, **kwargs): + """Initializes the QueueManager with specified parameters for tracking and counting objects.""" + super().__init__(**kwargs) + self.initialize_region() + self.counts = 0 # Queue counts Information + self.rect_color = (255, 255, 255) # Rectangle color + self.region_length = len(self.region) # Store region length for further usage + + def process_queue(self, im0): """ - Initializes the QueueManager with specified parameters for tracking and counting objects. + Main function to start the queue management process. Args: - names (dict): A dictionary mapping class IDs to class names. - reg_pts (list of tuples, optional): Points defining the counting region polygon. Defaults to a predefined - rectangle. - line_thickness (int, optional): Thickness of the annotation lines. Defaults to 2. - view_img (bool, optional): Whether to display the image frames. Defaults to False. - draw_tracks (bool, optional): Whether to draw tracks of the objects. Defaults to False. + im0 (ndarray): The input image that will be used for processing + Returns + im0 (ndarray): The processed image for more usage """ - # Region & Line Information - self.reg_pts = reg_pts if reg_pts is not None else [(20, 60), (20, 680), (1120, 680), (1120, 60)] - self.counting_region = ( - Polygon(self.reg_pts) if len(self.reg_pts) >= 3 else Polygon([(20, 60), (20, 680), (1120, 680), (1120, 60)]) - ) - - # annotation Information - self.tf = line_thickness - self.view_img = view_img - - self.names = names # Class names - - # Object counting Information - self.counts = 0 - - # Tracks info - self.track_history = defaultdict(list) - self.draw_tracks = draw_tracks - - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) - - def extract_and_process_tracks(self, tracks, im0): - """Extracts and processes tracks for queue management in a video stream.""" - # Initialize annotator and draw the queue region - annotator = Annotator(im0, self.tf, self.names) self.counts = 0 # Reset counts every frame - if tracks[0].boxes.id is not None: - boxes = tracks[0].boxes.xyxy.cpu() - clss = tracks[0].boxes.cls.cpu().tolist() - track_ids = tracks[0].boxes.id.int().cpu().tolist() + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks - # Extract tracks - for box, track_id, cls in zip(boxes, track_ids, clss): - # Draw bounding box - annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True)) + self.annotator.draw_region( + reg_pts=self.region, color=self.rect_color, thickness=self.line_width * 2 + ) # Draw region - # Update track history - track_line = self.track_history[track_id] - track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) - if len(track_line) > 30: - track_line.pop(0) + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + # Draw bounding box and counting region + self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True)) + self.store_tracking_history(track_id, box) # Store track history - # Draw track trails if enabled - if self.draw_tracks: - annotator.draw_centroid_and_tracks( - track_line, - color=colors(int(track_id), True), - track_thickness=self.line_thickness, - ) - - prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None - - # Check if the object is inside the counting region - if len(self.reg_pts) >= 3: - is_inside = self.counting_region.contains(Point(track_line[-1])) - if prev_position is not None and is_inside: - self.counts += 1 - - # Display queue counts - label = f"Queue Counts : {str(self.counts)}" - if label is not None: - annotator.queue_counts_display( - label, - points=self.reg_pts, - region_color=(255, 0, 255), - txt_color=(104, 31, 17), + # Draw tracks of objects + self.annotator.draw_centroid_and_tracks( + self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width ) - if self.env_check and self.view_img: - annotator.draw_region(reg_pts=self.reg_pts, thickness=self.tf * 2, color=(255, 0, 255)) - cv2.imshow("Ultralytics YOLOv8 Queue Manager", im0) - # Close window on 'q' key press - if cv2.waitKey(1) & 0xFF == ord("q"): - return + # Cache frequently accessed attributes + track_history = self.track_history.get(track_id, []) - def process_queue(self, im0, tracks): - """ - Main function to start the queue management process. - - Args: - im0 (ndarray): Current frame from the video stream. - tracks (list): List of tracks obtained from the object tracking process. - """ - self.extract_and_process_tracks(tracks, im0) # Extract and process tracks - return im0 + # store previous position of track and check if the object is inside the counting region + prev_position = track_history[-2] if len(track_history) > 1 else None + if self.region_length >= 3 and prev_position and self.r_s.contains(Point(self.track_line[-1])): + self.counts += 1 + # Display queue counts + self.annotator.queue_counts_display( + f"Queue Counts : {str(self.counts)}", + points=self.region, + region_color=self.rect_color, + txt_color=(104, 31, 17), + ) + self.display_output(im0) # display output with base class function -if __name__ == "__main__": - classes_names = {0: "person", 1: "car"} # example class names - queue_manager = QueueManager(classes_names) + return im0 # return output image for more usage diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py new file mode 100644 index 0000000000..71a92becfd --- /dev/null +++ b/ultralytics/solutions/solutions.py @@ -0,0 +1,95 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from collections import defaultdict +from pathlib import Path + +import cv2 + +from ultralytics import YOLO +from ultralytics.utils import LOGGER, yaml_load +from ultralytics.utils.checks import check_imshow, check_requirements + +check_requirements("shapely>=2.0.0") +from shapely.geometry import LineString, Polygon + +DEFAULT_SOL_CFG_PATH = Path(__file__).resolve().parents[1] / "cfg/solutions/default.yaml" + + +class BaseSolution: + """A class to manage all the Ultralytics Solutions: https://docs.ultralytics.com/solutions/.""" + + def __init__(self, **kwargs): + """ + Base initializer for all solutions. + + Child classes should call this with necessary parameters. + """ + # Load config and update with args + self.CFG = yaml_load(DEFAULT_SOL_CFG_PATH) + self.CFG.update(kwargs) + LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}") + + self.region = self.CFG["region"] # Store region data for other classes usage + self.line_width = self.CFG["line_width"] # Store line_width for usage + + # Load Model and store classes names + self.model = YOLO(self.CFG["model"]) + self.names = self.model.names + + # Initialize environment and region setup + self.env_check = check_imshow(warn=True) + self.track_history = defaultdict(list) + + def extract_tracks(self, im0): + """ + Apply object tracking and extract tracks. + + Args: + im0 (ndarray): The input image or frame + """ + self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"]) + + # Extract tracks for OBB or object detection + self.track_data = self.tracks[0].obb or self.tracks[0].boxes + + if self.track_data and self.track_data.id is not None: + self.boxes = self.track_data.xyxy.cpu() + self.clss = self.track_data.cls.cpu().tolist() + self.track_ids = self.track_data.id.int().cpu().tolist() + else: + LOGGER.warning("WARNING ⚠️ no tracks found!") + self.boxes, self.clss, self.track_ids = [], [], [] + + def store_tracking_history(self, track_id, box): + """ + Store object tracking history. + + Args: + track_id (int): The track ID of the object + box (list): Bounding box coordinates of the object + """ + # Store tracking history + self.track_line = self.track_history[track_id] + self.track_line.append(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)) + if len(self.track_line) > 30: + self.track_line.pop(0) + + def initialize_region(self): + """Initialize the counting region and line segment based on config.""" + self.region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] if self.region is None else self.region + self.r_s = Polygon(self.region) if len(self.region) >= 3 else LineString(self.region) # region segment + self.l_s = LineString( + [(self.region[0][0], self.region[0][1]), (self.region[1][0], self.region[1][1])] + ) # line segment + + def display_output(self, im0): + """ + Display the results of the processing, which could involve showing frames, printing counts, or saving results. + + Args: + im0 (ndarray): The input image or frame + """ + if self.CFG.get("show") and self.env_check: + cv2.imshow("Ultralytics Solutions", im0) + if cv2.waitKey(1) & 0xFF == ord("q"): + return diff --git a/ultralytics/solutions/speed_estimation.py b/ultralytics/solutions/speed_estimation.py index 70964241fd..decd159b55 100644 --- a/ultralytics/solutions/speed_estimation.py +++ b/ultralytics/solutions/speed_estimation.py @@ -1,116 +1,76 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -from collections import defaultdict from time import time -import cv2 import numpy as np -from ultralytics.utils.checks import check_imshow +from ultralytics.solutions.solutions import BaseSolution, LineString from ultralytics.utils.plotting import Annotator, colors -class SpeedEstimator: +class SpeedEstimator(BaseSolution): """A class to estimate the speed of objects in a real-time video stream based on their tracks.""" - def __init__(self, names, reg_pts=None, view_img=False, line_thickness=2, spdl_dist_thresh=10): - """ - Initializes the SpeedEstimator with the given parameters. - - Args: - names (dict): Dictionary of class names. - reg_pts (list, optional): List of region points for speed estimation. Defaults to [(20, 400), (1260, 400)]. - view_img (bool, optional): Whether to display the image with annotations. Defaults to False. - line_thickness (int, optional): Thickness of the lines for drawing boxes and tracks. Defaults to 2. - spdl_dist_thresh (int, optional): Distance threshold for speed calculation. Defaults to 10. - """ - # Region information - self.reg_pts = reg_pts if reg_pts is not None else [(20, 400), (1260, 400)] + def __init__(self, **kwargs): + """Initializes the SpeedEstimator with the given parameters.""" + super().__init__(**kwargs) - self.names = names # Classes names + self.initialize_region() # Initialize speed region - # Tracking information - self.trk_history = defaultdict(list) - - self.view_img = view_img # bool for displaying inference - self.tf = line_thickness # line thickness for annotator self.spd = {} # set for speed data self.trkd_ids = [] # list for already speed_estimated and tracked ID's - self.spdl = spdl_dist_thresh # Speed line distance threshold self.trk_pt = {} # set for tracks previous time self.trk_pp = {} # set for tracks previous point - # Check if the environment supports imshow - self.env_check = check_imshow(warn=True) - - def estimate_speed(self, im0, tracks): + def estimate_speed(self, im0): """ Estimates the speed of objects based on tracking data. Args: - im0 (ndarray): Image. - tracks (list): List of tracks obtained from the object tracking process. - - Returns: - (ndarray): The image with annotated boxes and tracks. + im0 (ndarray): The input image that will be used for processing + Returns + im0 (ndarray): The processed image for more usage """ - if tracks[0].boxes.id is None: - return im0 + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks - boxes = tracks[0].boxes.xyxy.cpu() - clss = tracks[0].boxes.cls.cpu().tolist() - t_ids = tracks[0].boxes.id.int().cpu().tolist() - annotator = Annotator(im0, line_width=self.tf) - annotator.draw_region(reg_pts=self.reg_pts, color=(255, 0, 255), thickness=self.tf * 2) + self.annotator.draw_region( + reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2 + ) # Draw region - for box, t_id, cls in zip(boxes, t_ids, clss): - track = self.trk_history[t_id] - bbox_center = (float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2)) - track.append(bbox_center) + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + self.store_tracking_history(track_id, box) # Store track history - if len(track) > 30: - track.pop(0) + # Check if track_id is already in self.trk_pp or trk_pt initialize if not + if track_id not in self.trk_pt: + self.trk_pt[track_id] = 0 + if track_id not in self.trk_pp: + self.trk_pp[track_id] = self.track_line[-1] - trk_pts = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + speed_label = f"{int(self.spd[track_id])} km/h" if track_id in self.spd else self.names[int(cls)] + self.annotator.box_label(box, label=speed_label, color=colors(track_id, True)) # Draw bounding box - if t_id not in self.trk_pt: - self.trk_pt[t_id] = 0 + # Draw tracks of objects + self.annotator.draw_centroid_and_tracks( + self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width + ) - speed_label = f"{int(self.spd[t_id])} km/h" if t_id in self.spd else self.names[int(cls)] - bbox_color = colors(int(t_id), True) - - annotator.box_label(box, speed_label, bbox_color) - cv2.polylines(im0, [trk_pts], isClosed=False, color=bbox_color, thickness=self.tf) - cv2.circle(im0, (int(track[-1][0]), int(track[-1][1])), self.tf * 2, bbox_color, -1) - - # Calculation of object speed - if not self.reg_pts[0][0] < track[-1][0] < self.reg_pts[1][0]: - return - if self.reg_pts[1][1] - self.spdl < track[-1][1] < self.reg_pts[1][1] + self.spdl: - direction = "known" - elif self.reg_pts[0][1] - self.spdl < track[-1][1] < self.reg_pts[0][1] + self.spdl: + # Calculate object speed and direction based on region intersection + if LineString([self.trk_pp[track_id], self.track_line[-1]]).intersects(self.l_s): direction = "known" else: direction = "unknown" - if self.trk_pt.get(t_id) != 0 and direction != "unknown" and t_id not in self.trkd_ids: - self.trkd_ids.append(t_id) - - time_difference = time() - self.trk_pt[t_id] + # Perform speed calculation and tracking updates if direction is valid + if direction == "known" and track_id not in self.trkd_ids: + self.trkd_ids.append(track_id) + time_difference = time() - self.trk_pt[track_id] if time_difference > 0: - self.spd[t_id] = np.abs(track[-1][1] - self.trk_pp[t_id][1]) / time_difference - - self.trk_pt[t_id] = time() - self.trk_pp[t_id] = track[-1] - - if self.view_img and self.env_check: - cv2.imshow("Ultralytics Speed Estimation", im0) - if cv2.waitKey(1) & 0xFF == ord("q"): - return + self.spd[track_id] = np.abs(self.track_line[-1][1] - self.trk_pp[track_id][1]) / time_difference - return im0 + self.trk_pt[track_id] = time() + self.trk_pp[track_id] = self.track_line[-1] + self.display_output(im0) # display output with base class function -if __name__ == "__main__": - names = {0: "person", 1: "car"} # example class names - speed_estimator = SpeedEstimator(names) + return im0 # return output image for more usage diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index a7ab5d905c..0ae25a8980 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -61,8 +61,8 @@ HELP_MSG = """ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.yaml") # build a new model from scratch - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.yaml") # build a new model from scratch + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Use the model results = model.train(data="coco8.yaml", epochs=3) # train the model @@ -77,21 +77,21 @@ HELP_MSG = """ yolo TASK MODE ARGS Where TASK (optional) is one of [detect, segment, classify, pose, obb] - MODE (required) is one of [train, val, predict, export, benchmark] + MODE (required) is one of [train, val, predict, export, track, benchmark] ARGS (optional) are any number of custom "arg=value" pairs like "imgsz=320" that override defaults. See all ARGS at https://docs.ultralytics.com/usage/cfg or with "yolo cfg" - Train a detection model for 10 epochs with an initial learning_rate of 0.01 - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 - Predict a YouTube video using a pretrained segmentation model at image size 320: - yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + yolo segment predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 - Val a pretrained detection model at batch-size 1 and image size 640: - yolo detect val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640 + yolo detect val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640 - - Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) - yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + - Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required) + yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128 - Run special commands: yolo help @@ -523,10 +523,11 @@ def read_device_model() -> str: Returns: (str): Model file contents if read successfully or empty string otherwise. """ - with contextlib.suppress(Exception): + try: with open("/proc/device-tree/model") as f: return f.read() - return "" + except: # noqa E722 + return "" def is_ubuntu() -> bool: @@ -536,10 +537,11 @@ def is_ubuntu() -> bool: Returns: (bool): True if OS is Ubuntu, False otherwise. """ - with contextlib.suppress(FileNotFoundError): + try: with open("/etc/os-release") as f: return "ID=ubuntu" in f.read() - return False + except FileNotFoundError: + return False def is_colab(): @@ -569,11 +571,7 @@ def is_jupyter(): Returns: (bool): True if running inside a Jupyter Notebook, False otherwise. """ - with contextlib.suppress(Exception): - from IPython import get_ipython - - return get_ipython() is not None - return False + return "get_ipython" in locals() def is_docker() -> bool: @@ -583,10 +581,11 @@ def is_docker() -> bool: Returns: (bool): True if the script is running inside a Docker container, False otherwise. """ - with contextlib.suppress(Exception): + try: with open("/proc/self/cgroup") as f: return "docker" in f.read() - return False + except: # noqa E722 + return False def is_raspberrypi() -> bool: @@ -617,14 +616,15 @@ def is_online() -> bool: Returns: (bool): True if connection is successful, False otherwise. """ - with contextlib.suppress(Exception): + try: assert str(os.getenv("YOLO_OFFLINE", "")).lower() != "true" # check if ENV var YOLO_OFFLINE="True" import socket for dns in ("1.1.1.1", "8.8.8.8"): # check Cloudflare and Google DNS socket.create_connection(address=(dns, 80), timeout=2.0).close() return True - return False + except: # noqa E722 + return False def is_pip_package(filepath: str = __name__) -> bool: @@ -711,9 +711,11 @@ def get_git_origin_url(): (str | None): The origin URL of the git repository or None if not git directory. """ if IS_GIT_DIR: - with contextlib.suppress(subprocess.CalledProcessError): + try: origin = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]) return origin.decode().strip() + except subprocess.CalledProcessError: + return None def get_git_branch(): @@ -724,9 +726,11 @@ def get_git_branch(): (str | None): The current git branch name or None if not a git directory. """ if IS_GIT_DIR: - with contextlib.suppress(subprocess.CalledProcessError): + try: origin = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]) return origin.decode().strip() + except subprocess.CalledProcessError: + return None def get_default_args(func): @@ -751,9 +755,11 @@ def get_ubuntu_version(): (str): Ubuntu version or None if not an Ubuntu OS. """ if is_ubuntu(): - with contextlib.suppress(FileNotFoundError, AttributeError): + try: with open("/etc/os-release") as f: return re.search(r'VERSION_ID="(\d+\.\d+)"', f.read())[1] + except (FileNotFoundError, AttributeError): + return None def get_user_config_dir(sub_dir="Ultralytics"): @@ -989,55 +995,56 @@ def set_sentry(): Additionally, the function sets custom tags and user information for Sentry events. """ if ( - SETTINGS["sync"] - and RANK in {-1, 0} - and Path(ARGV[0]).name == "yolo" - and not TESTS_RUNNING - and ONLINE - and IS_PIP_PACKAGE - and not IS_GIT_DIR + not SETTINGS["sync"] + or RANK not in {-1, 0} + or Path(ARGV[0]).name != "yolo" + or TESTS_RUNNING + or not ONLINE + or not IS_PIP_PACKAGE + or IS_GIT_DIR ): - # If sentry_sdk package is not installed then return and do not use Sentry - try: - import sentry_sdk # noqa - except ImportError: - return - - def before_send(event, hint): - """ - Modify the event before sending it to Sentry based on specific exception types and messages. + return + # If sentry_sdk package is not installed then return and do not use Sentry + try: + import sentry_sdk # noqa + except ImportError: + return + + def before_send(event, hint): + """ + Modify the event before sending it to Sentry based on specific exception types and messages. - Args: - event (dict): The event dictionary containing information about the error. - hint (dict): A dictionary containing additional information about the error. + Args: + event (dict): The event dictionary containing information about the error. + hint (dict): A dictionary containing additional information about the error. - Returns: - dict: The modified event or None if the event should not be sent to Sentry. - """ - if "exc_info" in hint: - exc_type, exc_value, _ = hint["exc_info"] - if exc_type in {KeyboardInterrupt, FileNotFoundError} or "out of memory" in str(exc_value): - return None # do not send event - - event["tags"] = { - "sys_argv": ARGV[0], - "sys_argv_name": Path(ARGV[0]).name, - "install": "git" if IS_GIT_DIR else "pip" if IS_PIP_PACKAGE else "other", - "os": ENVIRONMENT, - } - return event - - sentry_sdk.init( - dsn="https://888e5a0778212e1d0314c37d4b9aae5d@o4504521589325824.ingest.us.sentry.io/4504521592406016", - debug=False, - auto_enabling_integrations=False, - traces_sample_rate=1.0, - release=__version__, - environment="production", # 'dev' or 'production' - before_send=before_send, - ignore_errors=[KeyboardInterrupt, FileNotFoundError], - ) - sentry_sdk.set_user({"id": SETTINGS["uuid"]}) # SHA-256 anonymized UUID hash + Returns: + dict: The modified event or None if the event should not be sent to Sentry. + """ + if "exc_info" in hint: + exc_type, exc_value, _ = hint["exc_info"] + if exc_type in {KeyboardInterrupt, FileNotFoundError} or "out of memory" in str(exc_value): + return None # do not send event + + event["tags"] = { + "sys_argv": ARGV[0], + "sys_argv_name": Path(ARGV[0]).name, + "install": "git" if IS_GIT_DIR else "pip" if IS_PIP_PACKAGE else "other", + "os": ENVIRONMENT, + } + return event + + sentry_sdk.init( + dsn="https://888e5a0778212e1d0314c37d4b9aae5d@o4504521589325824.ingest.us.sentry.io/4504521592406016", + debug=False, + auto_enabling_integrations=False, + traces_sample_rate=1.0, + release=__version__, + environment="production", # 'dev' or 'production' + before_send=before_send, + ignore_errors=[KeyboardInterrupt, FileNotFoundError], + ) + sentry_sdk.set_user({"id": SETTINGS["uuid"]}) # SHA-256 anonymized UUID hash class JSONDict(dict): diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py index 2d09c5d894..6a0d9cbc29 100644 --- a/ultralytics/utils/autobatch.py +++ b/ultralytics/utils/autobatch.py @@ -1,6 +1,7 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.""" +import os from copy import deepcopy import numpy as np @@ -57,7 +58,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): # Inspect CUDA memory gb = 1 << 30 # bytes to GiB (1024 ** 3) - d = str(device).upper() # 'CUDA:0' + d = f"CUDA:{os.getenv('CUDA_VISIBLE_DEVICES', '0').strip()[0]}" # 'CUDA:0' properties = torch.cuda.get_device_properties(device) # device properties t = properties.total_memory / gb # GiB total r = torch.cuda.memory_reserved(device) / gb # GiB reserved @@ -66,10 +67,10 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free") # Profile batch sizes - batch_sizes = [1, 2, 4, 8, 16] + batch_sizes = [1, 2, 4, 8, 16] if t < 16 else [1, 2, 4, 8, 16, 32, 64] try: img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] - results = profile(img, model, n=3, device=device) + results = profile(img, model, n=1, device=device) # Fit a solution y = [x[2] for x in results if x] # memory [2] @@ -89,3 +90,5 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): except Exception as e: LOGGER.warning(f"{prefix}WARNING ⚠️ error detected: {e}, using default batch-size {batch_size}.") return batch_size + finally: + torch.cuda.empty_cache() diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index fe6e2a6513..653f48d3a9 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -47,7 +47,7 @@ from ultralytics.utils.torch_utils import get_cpu_info, select_device def benchmark( - model=WEIGHTS_DIR / "yolov8n.pt", + model=WEIGHTS_DIR / "yolo11n.pt", data=None, imgsz=160, half=False, @@ -76,7 +76,7 @@ def benchmark( Examples: Benchmark a YOLO model with default settings: >>> from ultralytics.utils.benchmarks import benchmark - >>> benchmark(model="yolov8n.pt", imgsz=640) + >>> benchmark(model="yolo11n.pt", imgsz=640) """ import pandas as pd # scope for faster 'import ultralytics' diff --git a/ultralytics/utils/callbacks/clearml.py b/ultralytics/utils/callbacks/clearml.py index e076e55fa7..05c518d0b6 100644 --- a/ultralytics/utils/callbacks/clearml.py +++ b/ultralytics/utils/callbacks/clearml.py @@ -68,9 +68,9 @@ def on_pretrain_routine_start(trainer): PatchedMatplotlib.update_current_task(None) else: task = Task.init( - project_name=trainer.args.project or "YOLOv8", + project_name=trainer.args.project or "Ultralytics", task_name=trainer.args.name, - tags=["YOLOv8"], + tags=["Ultralytics"], output_uri=True, reuse_last_task_id=False, auto_connect_frameworks={"pytorch": False, "matplotlib": False}, diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py index 7e90a53863..3a217c3f25 100644 --- a/ultralytics/utils/callbacks/comet.py +++ b/ultralytics/utils/callbacks/comet.py @@ -15,7 +15,7 @@ try: # Ensures certain logging functions only run for supported tasks COMET_SUPPORTED_TASKS = ["detect"] - # Names of plots created by YOLOv8 that are logged to Comet + # Names of plots created by Ultralytics that are logged to Comet EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve", "confusion_matrix" LABEL_PLOT_NAMES = "labels", "labels_correlogram" @@ -31,8 +31,8 @@ def _get_comet_mode(): def _get_comet_model_name(): - """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'.""" - return os.getenv("COMET_MODEL_NAME", "YOLOv8") + """Returns the model name for Comet from the environment variable COMET_MODEL_NAME or defaults to 'Ultralytics'.""" + return os.getenv("COMET_MODEL_NAME", "Ultralytics") def _get_eval_batch_logging_interval(): @@ -110,7 +110,7 @@ def _fetch_trainer_metadata(trainer): def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad): """ - YOLOv8 resizes images during training and the label values are normalized based on this resized shape. + YOLO resizes images during training and the label values are normalized based on this resized shape. This function rescales the bounding box labels to the original image shape. """ diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py index ef449b6609..8e2d30f903 100644 --- a/ultralytics/utils/callbacks/mlflow.py +++ b/ultralytics/utils/callbacks/mlflow.py @@ -71,7 +71,7 @@ def on_pretrain_routine_end(trainer): mlflow.set_tracking_uri(uri) # Set experiment and run names - experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/YOLOv8" + experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/Ultralytics" run_name = os.environ.get("MLFLOW_RUN") or trainer.args.name mlflow.set_experiment(experiment_name) diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py index 6be8a821f5..978a445bd2 100644 --- a/ultralytics/utils/callbacks/neptune.py +++ b/ultralytics/utils/callbacks/neptune.py @@ -52,7 +52,11 @@ def on_pretrain_routine_start(trainer): """Callback function called before the training routine starts.""" try: global run - run = neptune.init_run(project=trainer.args.project or "YOLOv8", name=trainer.args.name, tags=["YOLOv8"]) + run = neptune.init_run( + project=trainer.args.project or "Ultralytics", + name=trainer.args.name, + tags=["Ultralytics"], + ) run["Configuration/Hyperparameters"] = {k: "" if v is None else v for k, v in vars(trainer.args).items()} except Exception as e: LOGGER.warning(f"WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}") diff --git a/ultralytics/utils/callbacks/tensorboard.py b/ultralytics/utils/callbacks/tensorboard.py index 2aa114b53b..f0ff02fa89 100644 --- a/ultralytics/utils/callbacks/tensorboard.py +++ b/ultralytics/utils/callbacks/tensorboard.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr @@ -45,26 +44,27 @@ def _log_tensorboard_graph(trainer): warnings.simplefilter("ignore", category=torch.jit.TracerWarning) # suppress jit trace warning # Try simple method first (YOLO) - with contextlib.suppress(Exception): + try: trainer.model.eval() # place in .eval() mode to avoid BatchNorm statistics changes WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), []) LOGGER.info(f"{PREFIX}model graph visualization added ✅") return - # Fallback to TorchScript export steps (RTDETR) - try: - model = deepcopy(de_parallel(trainer.model)) - model.eval() - model = model.fuse(verbose=False) - for m in model.modules(): - if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class) - m.export = True - m.format = "torchscript" - model(im) # dry run - WRITER.add_graph(torch.jit.trace(model, im, strict=False), []) - LOGGER.info(f"{PREFIX}model graph visualization added ✅") - except Exception as e: - LOGGER.warning(f"{PREFIX}WARNING ⚠️ TensorBoard graph visualization failure {e}") + except: # noqa E722 + # Fallback to TorchScript export steps (RTDETR) + try: + model = deepcopy(de_parallel(trainer.model)) + model.eval() + model = model.fuse(verbose=False) + for m in model.modules(): + if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class) + m.export = True + m.format = "torchscript" + model(im) # dry run + WRITER.add_graph(torch.jit.trace(model, im, strict=False), []) + LOGGER.info(f"{PREFIX}model graph visualization added ✅") + except Exception as e: + LOGGER.warning(f"{PREFIX}WARNING ⚠️ TensorBoard graph visualization failure {e}") def on_pretrain_routine_start(trainer): diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py index 7b69b7a45a..7b6d00cfc3 100644 --- a/ultralytics/utils/callbacks/wb.py +++ b/ultralytics/utils/callbacks/wb.py @@ -109,7 +109,7 @@ def _log_plots(plots, step): def on_pretrain_routine_start(trainer): """Initiate and start project if module is present.""" - wb.run or wb.init(project=trainer.args.project or "YOLOv8", name=trainer.args.name, config=vars(trainer.args)) + wb.run or wb.init(project=trainer.args.project or "Ultralytics", name=trainer.args.name, config=vars(trainer.args)) def on_fit_epoch_end(trainer): diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 85eccf67e3..76455e2329 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import glob import inspect import math @@ -239,12 +238,14 @@ def check_version( c = parse_version(current) # '1.2.3' -> (1, 2, 3) for r in required.strip(",").split(","): op, version = re.match(r"([^0-9]*)([\d.]+)", r).groups() # split '>=22.04' -> ('>=', '22.04') + if not op: + op = ">=" # assume >= if no op passed v = parse_version(version) # '1.2.3' -> (1, 2, 3) if op == "==" and c != v: result = False elif op == "!=" and c == v: result = False - elif op in {">=", ""} and not (c >= v): # if no constraint passed assume '>=required' + elif op == ">=" and not (c >= v): result = False elif op == "<=" and not (c <= v): result = False @@ -271,11 +272,13 @@ def check_latest_pypi_version(package_name="ultralytics"): Returns: (str): The latest version of the package. """ - with contextlib.suppress(Exception): + try: requests.packages.urllib3.disable_warnings() # Disable the InsecureRequestWarning response = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=3) if response.status_code == 200: return response.json()["info"]["version"] + except: # noqa E722 + return None def check_pip_update_available(): @@ -286,7 +289,7 @@ def check_pip_update_available(): (bool): True if an update is available, False otherwise. """ if ONLINE and IS_PIP_PACKAGE: - with contextlib.suppress(Exception): + try: from ultralytics import __version__ latest = check_latest_pypi_version() @@ -296,6 +299,8 @@ def check_pip_update_available(): f"Update with 'pip install -U ultralytics'" ) return True + except: # noqa E722 + pass return False @@ -330,18 +335,19 @@ def check_font(font="Arial.ttf"): return file -def check_python(minimum: str = "3.8.0", hard: bool = True) -> bool: +def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = True) -> bool: """ Check current python version against the required minimum version. Args: minimum (str): Required minimum version of python. hard (bool, optional): If True, raise an AssertionError if the requirement is not met. + verbose (bool, optional): If True, print warning message if requirement is not met. Returns: (bool): Whether the installed Python version meets the minimum constraints. """ - return check_version(PYTHON_VERSION, minimum, name="Python", hard=hard) + return check_version(PYTHON_VERSION, minimum, name="Python", hard=hard, verbose=verbose) @TryExcept() @@ -371,8 +377,6 @@ def check_requirements(requirements=ROOT.parent / "requirements.txt", exclude=() ``` """ prefix = colorstr("red", "bold", "requirements:") - check_python() # check python version - check_torchvision() # check torch-torchvision compatibility if isinstance(requirements, Path): # requirements.txt file file = requirements.resolve() assert file.exists(), f"{prefix} {file} not found, check failed." @@ -454,7 +458,7 @@ def check_torchvision(): ) -def check_suffix(file="yolov8n.pt", suffix=".pt", msg=""): +def check_suffix(file="yolo11n.pt", suffix=".pt", msg=""): """Check file(s) for acceptable suffix.""" if file and suffix: if isinstance(suffix, str): @@ -577,10 +581,12 @@ def check_yolo(verbose=True, device=""): ram = psutil.virtual_memory().total total, used, free = shutil.disk_usage("/") s = f"({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)" - with contextlib.suppress(Exception): # clear display if ipython is installed + try: from IPython import display - display.clear_output() + display.clear_output() # clear display if notebook + except ImportError: + pass else: s = "" @@ -593,38 +599,54 @@ def collect_system_info(): import psutil from ultralytics.utils import ENVIRONMENT # scope to avoid circular import - from ultralytics.utils.torch_utils import get_cpu_info + from ultralytics.utils.torch_utils import get_cpu_info, get_gpu_info - ram_info = psutil.virtual_memory().total / (1024**3) # Convert bytes to GB + gib = 1 << 30 # bytes per GiB + cuda = torch and torch.cuda.is_available() check_yolo() - LOGGER.info( - f"\n{'OS':<20}{platform.platform()}\n" - f"{'Environment':<20}{ENVIRONMENT}\n" - f"{'Python':<20}{PYTHON_VERSION}\n" - f"{'Install':<20}{'git' if IS_GIT_DIR else 'pip' if IS_PIP_PACKAGE else 'other'}\n" - f"{'RAM':<20}{ram_info:.2f} GB\n" - f"{'CPU':<20}{get_cpu_info()}\n" - f"{'CUDA':<20}{torch.version.cuda if torch and torch.cuda.is_available() else None}\n" - ) + total, used, free = shutil.disk_usage("/") + + info_dict = { + "OS": platform.platform(), + "Environment": ENVIRONMENT, + "Python": PYTHON_VERSION, + "Install": "git" if IS_GIT_DIR else "pip" if IS_PIP_PACKAGE else "other", + "RAM": f"{psutil.virtual_memory().total / gib:.2f} GB", + "Disk": f"{(total - free) / gib:.1f}/{total / gib:.1f} GB", + "CPU": get_cpu_info(), + "CPU count": os.cpu_count(), + "GPU": get_gpu_info(index=0) if cuda else None, + "GPU count": torch.cuda.device_count() if cuda else None, + "CUDA": torch.version.cuda if cuda else None, + } + LOGGER.info("\n" + "\n".join(f"{k:<20}{v}" for k, v in info_dict.items()) + "\n") + package_info = {} for r in parse_requirements(package="ultralytics"): try: current = metadata.version(r.name) - is_met = "✅ " if check_version(current, str(r.specifier), hard=True) else "❌ " + is_met = "✅ " if check_version(current, str(r.specifier), name=r.name, hard=True) else "❌ " except metadata.PackageNotFoundError: current = "(not installed)" is_met = "❌ " - LOGGER.info(f"{r.name:<20}{is_met}{current}{r.specifier}") + package_info[r.name] = f"{is_met}{current}{r.specifier}" + LOGGER.info(f"{r.name:<20}{package_info[r.name]}") + + info_dict["Package Info"] = package_info if is_github_action_running(): - LOGGER.info( - f"\nRUNNER_OS: {os.getenv('RUNNER_OS')}\n" - f"GITHUB_EVENT_NAME: {os.getenv('GITHUB_EVENT_NAME')}\n" - f"GITHUB_WORKFLOW: {os.getenv('GITHUB_WORKFLOW')}\n" - f"GITHUB_ACTOR: {os.getenv('GITHUB_ACTOR')}\n" - f"GITHUB_REPOSITORY: {os.getenv('GITHUB_REPOSITORY')}\n" - f"GITHUB_REPOSITORY_OWNER: {os.getenv('GITHUB_REPOSITORY_OWNER')}\n" - ) + github_info = { + "RUNNER_OS": os.getenv("RUNNER_OS"), + "GITHUB_EVENT_NAME": os.getenv("GITHUB_EVENT_NAME"), + "GITHUB_WORKFLOW": os.getenv("GITHUB_WORKFLOW"), + "GITHUB_ACTOR": os.getenv("GITHUB_ACTOR"), + "GITHUB_REPOSITORY": os.getenv("GITHUB_REPOSITORY"), + "GITHUB_REPOSITORY_OWNER": os.getenv("GITHUB_REPOSITORY_OWNER"), + } + LOGGER.info("\n" + "\n".join(f"{k}: {v}" for k, v in github_info.items())) + info_dict["GitHub Info"] = github_info + + return info_dict def check_amp(model): @@ -691,9 +713,10 @@ def check_amp(model): def git_describe(path=ROOT): # path must be a directory """Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe.""" - with contextlib.suppress(Exception): + try: return subprocess.check_output(f"git -C {path} describe --tags --long --always", shell=True).decode()[:-1] - return "" + except: # noqa E722 + return "" def print_args(args: Optional[dict] = None, show_file=True, show_func=False): @@ -748,6 +771,8 @@ def cuda_is_available() -> bool: return cuda_device_count() > 0 -# Define constants +# Run checks and define constants +check_python("3.8", hard=False, verbose=True) # check python version +check_torchvision() # check torch-torchvision compatibility IS_PYTHON_MINIMUM_3_10 = check_python("3.10", hard=False) IS_PYTHON_3_12 = PYTHON_VERSION.startswith("3.12") diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py index 5cbc868ab6..f356f47bb1 100644 --- a/ultralytics/utils/downloads.py +++ b/ultralytics/utils/downloads.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import re import shutil import subprocess @@ -53,7 +52,7 @@ def is_url(url, check=False): valid = is_url("https://www.example.com") ``` """ - with contextlib.suppress(Exception): + try: url = str(url) result = parse.urlparse(url) assert all([result.scheme, result.netloc]) # check if is url @@ -61,7 +60,8 @@ def is_url(url, check=False): with request.urlopen(url) as response: return response.getcode() == 200 # check if exists online return True - return False + except: # noqa E722 + return False def delete_dsstore(path, files_to_delete=(".DS_Store", "__MACOSX")): @@ -425,7 +425,7 @@ def attempt_download_asset(file, repo="ultralytics/assets", release="v8.3.0", ** Example: ```python - file_path = attempt_download_asset("yolov8n.pt", repo="ultralytics/assets", release="latest") + file_path = attempt_download_asset("yolo11n.pt", repo="ultralytics/assets", release="latest") ``` """ from ultralytics.utils import SETTINGS # scoped for circular import diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py index 29c68d48de..059e95886e 100644 --- a/ultralytics/utils/files.py +++ b/ultralytics/utils/files.py @@ -183,7 +183,7 @@ def get_latest_run(search_dir="."): return max(last_list, key=os.path.getctime) if last_list else "" -def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_names=False): +def update_models(model_names=("yolo11n.pt",), source_dir=Path("."), update_names=False): """ Updates and re-saves specified YOLO models in an 'updated_models' subdirectory. @@ -195,7 +195,7 @@ def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_name Examples: Update specified YOLO models and save them in 'updated_models' subdirectory: >>> from ultralytics.utils.files import update_models - >>> model_names = ("yolov8n.pt", "yolov8s.pt") + >>> model_names = ("yolo11n.pt", "yolov8s.pt") >>> update_models(model_names, source_dir=Path("/models"), update_names=True) """ from ultralytics import YOLO diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py index 9d3051239c..b622bcc8cb 100644 --- a/ultralytics/utils/plotting.py +++ b/ultralytics/utils/plotting.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import math import warnings from pathlib import Path @@ -13,8 +12,8 @@ import torch from PIL import Image, ImageDraw, ImageFont from PIL import __version__ as pil_version -from ultralytics.utils import IS_JUPYTER, LOGGER, TryExcept, ops, plt_settings, threaded -from ultralytics.utils.checks import check_font, check_requirements, check_version, is_ascii +from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, TryExcept, ops, plt_settings, threaded +from ultralytics.utils.checks import check_font, check_version, is_ascii from ultralytics.utils.files import increment_path @@ -525,16 +524,12 @@ class Annotator: def show(self, title=None): """Show the annotated image.""" im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert numpy array to PIL Image with RGB to BGR - if IS_JUPYTER: - check_requirements("ipython") + if IS_COLAB or IS_KAGGLE: # can not use IS_JUPYTER as will run for all ipython environments try: - from IPython.display import display - - display(im) + display(im) # noqa - display() function only available in ipython environments except ImportError as e: LOGGER.warning(f"Unable to display image in Jupyter notebooks: {e}") else: - # Convert numpy array to PIL Image and show im.show(title=title) def save(self, filename="image.jpg"): @@ -697,14 +692,13 @@ class Annotator: angle = 360 - angle return angle - def draw_specific_points(self, keypoints, indices=None, shape=(640, 640), radius=2, conf_thres=0.25): + def draw_specific_points(self, keypoints, indices=None, radius=2, conf_thres=0.25): """ Draw specific keypoints for gym steps counting. Args: keypoints (list): Keypoints data to be plotted. indices (list, optional): Keypoint indices to be plotted. Defaults to [2, 5, 7]. - shape (tuple, optional): Image size for model inference. Defaults to (640, 640). radius (int, optional): Keypoint radius. Defaults to 2. conf_thres (float, optional): Confidence threshold for keypoints. Defaults to 0.25. @@ -715,90 +709,71 @@ class Annotator: Keypoint format: [x, y] or [x, y, confidence]. Modifies self.im in-place. """ - if indices is None: - indices = [2, 5, 7] - for i, k in enumerate(keypoints): - if i in indices: - x_coord, y_coord = k[0], k[1] - if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: - if len(k) == 3: - conf = k[2] - if conf < conf_thres: - continue - cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, (0, 255, 0), -1, lineType=cv2.LINE_AA) + indices = indices or [2, 5, 7] + points = [(int(k[0]), int(k[1])) for i, k in enumerate(keypoints) if i in indices and k[2] >= conf_thres] + + # Draw lines between consecutive points + for start, end in zip(points[:-1], points[1:]): + cv2.line(self.im, start, end, (0, 255, 0), 2, lineType=cv2.LINE_AA) + + # Draw circles for keypoints + for pt in points: + cv2.circle(self.im, pt, radius, (0, 0, 255), -1, lineType=cv2.LINE_AA) + return self.im - def plot_angle_and_count_and_stage( - self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255) - ): + def plot_workout_information(self, display_text, position, color=(104, 31, 17), txt_color=(255, 255, 255)): """ - Plot the pose angle, count value and step stage. + Draw text with a background on the image. Args: - angle_text (str): angle value for workout monitoring - count_text (str): counts value for workout monitoring - stage_text (str): stage decision for workout monitoring - center_kpt (list): centroid pose index for workout monitoring - color (tuple): text background color for workout monitoring - txt_color (tuple): text foreground color for workout monitoring + display_text (str): The text to be displayed. + position (tuple): Coordinates (x, y) on the image where the text will be placed. + color (tuple, optional): Text background color + txt_color (tuple, optional): Text foreground color """ - angle_text, count_text, stage_text = (f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}") + (text_width, text_height), _ = cv2.getTextSize(display_text, 0, self.sf, self.tf) - # Draw angle - (angle_text_width, angle_text_height), _ = cv2.getTextSize(angle_text, 0, self.sf, self.tf) - angle_text_position = (int(center_kpt[0]), int(center_kpt[1])) - angle_background_position = (angle_text_position[0], angle_text_position[1] - angle_text_height - 5) - angle_background_size = (angle_text_width + 2 * 5, angle_text_height + 2 * 5 + (self.tf * 2)) + # Draw background rectangle cv2.rectangle( self.im, - angle_background_position, - ( - angle_background_position[0] + angle_background_size[0], - angle_background_position[1] + angle_background_size[1], - ), + (position[0], position[1] - text_height - 5), + (position[0] + text_width + 10, position[1] - text_height - 5 + text_height + 10 + self.tf), color, -1, ) - cv2.putText(self.im, angle_text, angle_text_position, 0, self.sf, txt_color, self.tf) - - # Draw Counts - (count_text_width, count_text_height), _ = cv2.getTextSize(count_text, 0, self.sf, self.tf) - count_text_position = (angle_text_position[0], angle_text_position[1] + angle_text_height + 20) - count_background_position = ( - angle_background_position[0], - angle_background_position[1] + angle_background_size[1] + 5, - ) - count_background_size = (count_text_width + 10, count_text_height + 10 + self.tf) + # Draw text + cv2.putText(self.im, display_text, position, 0, self.sf, txt_color, self.tf) - cv2.rectangle( - self.im, - count_background_position, - ( - count_background_position[0] + count_background_size[0], - count_background_position[1] + count_background_size[1], - ), - color, - -1, - ) - cv2.putText(self.im, count_text, count_text_position, 0, self.sf, txt_color, self.tf) + return text_height - # Draw Stage - (stage_text_width, stage_text_height), _ = cv2.getTextSize(stage_text, 0, self.sf, self.tf) - stage_text_position = (int(center_kpt[0]), int(center_kpt[1]) + angle_text_height + count_text_height + 40) - stage_background_position = (stage_text_position[0], stage_text_position[1] - stage_text_height - 5) - stage_background_size = (stage_text_width + 10, stage_text_height + 10) + def plot_angle_and_count_and_stage( + self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255) + ): + """ + Plot the pose angle, count value, and step stage. - cv2.rectangle( - self.im, - stage_background_position, - ( - stage_background_position[0] + stage_background_size[0], - stage_background_position[1] + stage_background_size[1], - ), - color, - -1, + Args: + angle_text (str): Angle value for workout monitoring + count_text (str): Counts value for workout monitoring + stage_text (str): Stage decision for workout monitoring + center_kpt (list): Centroid pose index for workout monitoring + color (tuple, optional): Text background color + txt_color (tuple, optional): Text foreground color + """ + # Format text + angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}" + + # Draw angle, count and stage text + angle_height = self.plot_workout_information( + angle_text, (int(center_kpt[0]), int(center_kpt[1])), color, txt_color + ) + count_height = self.plot_workout_information( + count_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + 20), color, txt_color + ) + self.plot_workout_information( + stage_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + count_height + 40), color, txt_color ) - cv2.putText(self.im, stage_text, stage_text_position, 0, self.sf, txt_color, self.tf) def seg_bbox(self, mask, mask_color=(255, 0, 255), label=None, txt_color=(255, 255, 255)): """ @@ -1139,10 +1114,12 @@ def plot_images( mask = mask.astype(bool) else: mask = image_masks[j].astype(bool) - with contextlib.suppress(Exception): + try: im[y : y + h, x : x + w, :][mask] = ( im[y : y + h, x : x + w, :][mask] * 0.4 + np.array(color) * 0.6 ) + except: # noqa E722 + pass annotator.fromarray(im) if not save: return np.asarray(annotator.im) diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py index 00176d3033..52e812757a 100644 --- a/ultralytics/utils/torch_utils.py +++ b/ultralytics/utils/torch_utils.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import gc import math import os @@ -113,16 +112,24 @@ def get_cpu_info(): from ultralytics.utils import PERSISTENT_CACHE # avoid circular import error if "cpu_info" not in PERSISTENT_CACHE: - with contextlib.suppress(Exception): + try: import cpuinfo # pip install py-cpuinfo k = "brand_raw", "hardware_raw", "arch_string_raw" # keys sorted by preference info = cpuinfo.get_cpu_info() # info dict string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown") PERSISTENT_CACHE["cpu_info"] = string.replace("(R)", "").replace("CPU ", "").replace("@ ", "") + except: # noqa E722 + pass return PERSISTENT_CACHE.get("cpu_info", "unknown") +def get_gpu_info(index): + """Return a string with system GPU information, i.e. 'Tesla T4, 15102MiB'.""" + properties = torch.cuda.get_device_properties(index) + return f"{properties.name}, {properties.total_memory / (1 << 20):.0f}MiB" + + def select_device(device="", batch=0, newline=False, verbose=True): """ Selects the appropriate PyTorch device based on the provided arguments. @@ -208,8 +215,7 @@ def select_device(device="", batch=0, newline=False, verbose=True): ) space = " " * (len(s) + 1) for i, d in enumerate(devices): - p = torch.cuda.get_device_properties(i) - s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB + s += f"{'' if i == 0 else space}CUDA:{d} ({get_gpu_info(i)})\n" # bytes to MB arg = "cuda:0" elif mps and TORCH_2_0 and torch.backends.mps.is_available(): # Prefer MPS if available @@ -638,7 +644,8 @@ def profile(input, ops, n=10, device=None): f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" f"{'input':>24s}{'output':>24s}" ) - + gc.collect() # attempt to free unused memory + torch.cuda.empty_cache() for x in input if isinstance(input, list) else [input]: x = x.to(device) x.requires_grad = True @@ -672,8 +679,9 @@ def profile(input, ops, n=10, device=None): except Exception as e: LOGGER.info(e) results.append(None) - gc.collect() # attempt to free unused memory - torch.cuda.empty_cache() + finally: + gc.collect() # attempt to free unused memory + torch.cuda.empty_cache() return results diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py index 1329bfe6ec..c60022c0b8 100644 --- a/ultralytics/utils/tuner.py +++ b/ultralytics/utils/tuner.py @@ -28,7 +28,7 @@ def run_ray_tune( from ultralytics import YOLO # Load a YOLOv8n model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset result_grid = model.tune(data="coco8.yaml", use_ray=True)