commit
17c6800cc0
74 changed files with 2096 additions and 653 deletions
@ -0,0 +1,34 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
# Builds ultralytics/ultralytics:latest-jupyter image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics |
||||
# Image provides JupyterLab interface for interactive YOLO development and includes tutorial notebooks |
||||
|
||||
# Start from Python-based Ultralytics image for full Python environment |
||||
FROM ultralytics/ultralytics:latest-python |
||||
|
||||
# Install JupyterLab for interactive development |
||||
RUN /usr/local/bin/pip install jupyterlab |
||||
|
||||
# Create persistent data directory structure |
||||
RUN mkdir /data |
||||
|
||||
# Configure YOLO directory paths |
||||
RUN mkdir /data/datasets && /usr/local/bin/yolo settings datasets_dir="/data/datasets" |
||||
RUN mkdir /data/weights && /usr/local/bin/yolo settings weights_dir="/data/weights" |
||||
RUN mkdir /data/runs && /usr/local/bin/yolo settings runs_dir="/data/runs" |
||||
|
||||
# Start JupyterLab with tutorial notebook |
||||
ENTRYPOINT ["/usr/local/bin/jupyter", "lab", "--allow-root", "/ultralytics/examples/tutorial.ipynb"] |
||||
|
||||
# Usage Examples ------------------------------------------------------------------------------------------------------- |
||||
|
||||
# Build and Push |
||||
# t=ultralytics/ultralytics:latest-jupyter && sudo docker build -f docker/Dockerfile-jupyter -t $t . && sudo docker push $t |
||||
|
||||
# Run |
||||
# t=ultralytics/ultralytics:latest-jupyter && sudo docker run -it --ipc=host -p 8888:8888 $t |
||||
|
||||
# Pull and Run |
||||
# t=ultralytics/ultralytics:latest-jupyter && sudo docker pull $t && sudo docker run -it --ipc=host -p 8888:8888 $t |
||||
|
||||
# Pull and Run with local volume mounted |
||||
# t=ultralytics/ultralytics:latest-jupyter && sudo docker pull $t && sudo docker run -it --ipc=host -p 8888:8888 -v "$(pwd)"/datasets:/data/datasets $t |
@ -0,0 +1,160 @@ |
||||
--- |
||||
comments: true |
||||
description: Learn how to use Albumentations with YOLO11 to enhance data augmentation, improve model performance, and streamline your computer vision projects. |
||||
keywords: Albumentations, YOLO11, data augmentation, Ultralytics, computer vision, object detection, model training, image transformations, machine learning |
||||
--- |
||||
|
||||
# Enhance Your Dataset to Train YOLO11 Using Albumentations |
||||
|
||||
When you are building [computer vision models](../models/index.md), the quality and variety of your [training data](../datasets/index.md) can play a big role in how well your model performs. Albumentations offers a fast, flexible, and efficient way to apply a wide range of image transformations that can improve your model's ability to adapt to real-world scenarios. It easily integrates with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) and can help you create robust datasets for [object detection](../tasks/detect.md), [segmentation](../tasks/segment.md), and [classification](../tasks/classify.md) tasks. |
||||
|
||||
By using Albumentations, you can boost your YOLO11 training data with techniques like geometric transformations and color adjustments. In this article, we'll see how Albumentations can improve your [data augmentation](../guides/preprocessing_annotated_data.md) process and make your [YOLO11 projects](../solutions/index.md) even more impactful. Let's get started! |
||||
|
||||
## Albumentations for Image Augmentation |
||||
|
||||
[Albumentations](https://albumentations.ai/) is an open-source image augmentation library created in [June 2018](https://arxiv.org/pdf/1809.06839). It is designed to simplify and accelerate the image augmentation process in [computer vision](https://www.ultralytics.com/blog/exploring-image-processing-computer-vision-and-machine-vision). Created with [performance](https://www.ultralytics.com/blog/measuring-ai-performance-to-weigh-the-impact-of-your-innovations) and flexibility in mind, it supports many diverse augmentation techniques, ranging from simple transformations like rotations and flips to more complex adjustments like brightness and contrast changes. Albumentations helps developers generate rich, varied datasets for tasks like [image classification](https://www.youtube.com/watch?v=5BO0Il_YYAg), [object detection](https://www.youtube.com/watch?v=5ku7npMrW40&t=1s), and [segmentation](https://www.youtube.com/watch?v=o4Zd-IeMlSY). |
||||
|
||||
You can use Albumentations to easily apply augmentations to images, [segmentation masks](https://www.ultralytics.com/glossary/image-segmentation), [bounding boxes](https://www.ultralytics.com/glossary/bounding-box), and [key points](../datasets/pose/index.md), and make sure that all elements of your dataset are transformed together. It works seamlessly with popular deep learning frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md), making it accessible for a wide range of projects. |
||||
|
||||
Also, Albumentations is a great option for augmentation whether you're handling small datasets or large-scale [computer vision tasks](../tasks/index.md). It ensures fast and efficient processing, cutting down the time spent on data preparation. At the same time, it helps improve [model performance](../guides/yolo-performance-metrics.md), making your models more effective in real-world applications. |
||||
|
||||
## Key Features of Albumentations |
||||
|
||||
Albumentations offers many useful features that simplify complex image augmentations for a wide range of [computer vision applications](https://www.ultralytics.com/blog/exploring-how-the-applications-of-computer-vision-work). Here are some of the key features: |
||||
|
||||
- **Wide Range of Transformations**: Albumentations offers over [70 different transformations](https://github.com/albumentations-team/albumentations?tab=readme-ov-file#list-of-augmentations), including geometric changes (e.g., rotation, flipping), color adjustments (e.g., brightness, contrast), and noise addition (e.g., Gaussian noise). Having multiple options enables the creation of highly diverse and robust training datasets. |
||||
|
||||
<p align="center"> |
||||
<img width="100%" src="https://github.com/ultralytics/docs/releases/download/0/albumentations-augmentation.avif" alt="Example of Image Augmentations"> |
||||
</p> |
||||
|
||||
- **High Performance Optimization**: Built on OpenCV and NumPy, Albumentations uses advanced optimization techniques like SIMD (Single Instruction, Multiple Data), which processes multiple data points simultaneously to speed up processing. It handles large datasets quickly, making it one of the fastest options available for image augmentation. |
||||
|
||||
- **Three Levels of Augmentation**: Albumentations supports three levels of augmentation: pixel-level transformations, spatial-level transformations, and mixing-level transformation. Pixel-level transformations only affect the input images without altering masks, bounding boxes, or key points. Meanwhile, both the image and its elements, like masks and bounding boxes, are transformed using spatial-level transformations. Furthermore, mixing-level transformations are a unique way to augment data as it combines multiple images into one. |
||||
|
||||
![Overview of the Different Levels of Augmentations](https://github.com/ultralytics/docs/releases/download/0/levels-of-augmentation.avif) |
||||
|
||||
- **[Benchmarking Results](https://albumentations.ai/docs/benchmarking_results/)**: When it comes to benchmarking, Albumentations consistently outperforms other libraries, especially with large datasets. |
||||
|
||||
## Why Should You Use Albumentations for Your Vision AI Projects? |
||||
|
||||
With respect to image augmentation, Albumentations stands out as a reliable tool for computer vision tasks. Here are a few key reasons why you should consider using it for your Vision AI projects: |
||||
|
||||
- **Easy-to-Use API**: Albumentations provides a single, straightforward API for applying a wide range of augmentations to images, masks, bounding boxes, and keypoints. It's designed to adapt easily to different datasets, making [data preparation](../guides/data-collection-and-annotation.md) simpler and more efficient. |
||||
|
||||
- **Rigorous Bug Testing**: Bugs in the augmentation pipeline can silently corrupt input data, often going unnoticed but ultimately degrading model performance. Albumentations addresses this with a thorough test suite that helps catch bugs early in development. |
||||
|
||||
- **Extensibility**: Albumentations can be used to easily add new augmentations and use them in computer vision pipelines through a single interface along with built-in transformations. |
||||
|
||||
## How to Use Albumentations to Augment Data for YOLO11 Training |
||||
|
||||
Now that we've covered what Albumentations is and what it can do, let's look at how to use it to augment your data for YOLO11 model training. It's easy to set up because it integrates directly into [Ultralytics' training mode](../modes/train.md) and applies automatically if you have the Albumentations package installed. |
||||
|
||||
### Installation |
||||
|
||||
To use Albumentations with YOLOv11, start by making sure you have the necessary packages installed. If Albumentations isn't installed, the augmentations won't be applied during training. Once set up, you'll be ready to create an augmented dataset for training, with Albumentations integrated to enhance your model automatically. |
||||
|
||||
!!! tip "Installation" |
||||
|
||||
=== "CLI" |
||||
|
||||
```bash |
||||
# Install the required packages |
||||
pip install albumentations ultralytics |
||||
``` |
||||
|
||||
For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. |
||||
|
||||
### Usage |
||||
|
||||
After installing the necessary packages, you're ready to start using Albumentations with YOLO11. When you train YOLOv11, a set of augmentations is automatically applied through its integration with Albumentations, making it easy to enhance your model's performance. |
||||
|
||||
!!! example "Usage" |
||||
|
||||
=== "Python" |
||||
|
||||
```python |
||||
from ultralytics import YOLO |
||||
|
||||
# Load a pre-trained model |
||||
model = YOLO("yolo11n.pt") |
||||
|
||||
# Train the model |
||||
results = model.train(data="coco8.yaml", epochs=100, imgsz=640) |
||||
``` |
||||
|
||||
Next, let's take look a closer look at the specific augmentations that are applied during training. |
||||
|
||||
### Blur |
||||
|
||||
The Blur transformation in Albumentations applies a simple blur effect to the image by averaging pixel values within a small square area, or kernel. This is done using OpenCV's `cv2.blur` function, which helps reduce noise in the image, though it also slightly reduces image details. |
||||
|
||||
Here are the parameters and values used in this integration: |
||||
|
||||
- **blur_limit**: This controls the size range of the blur effect. The default range is (3, 7), meaning the kernel size for the blur can vary between 3 and 7 pixels, with only odd numbers allowed to keep the blur centered. |
||||
|
||||
- **p**: The probability of applying the blur. In the integration, p=0.01, so there's a 1% chance that this blur will be applied to each image. The low probability allows for occasional blur effects, introducing a bit of variation to help the model generalize without over-blurring the images. |
||||
|
||||
<img width="776" alt="An Example of the Blur Augmentation" src="https://github.com/ultralytics/docs/releases/download/0/albumentations-blur.avif"> |
||||
|
||||
### Median Blur |
||||
|
||||
The MedianBlur transformation in Albumentations applies a median blur effect to the image, which is particularly useful for reducing noise while preserving edges. Unlike typical blurring methods, MedianBlur uses a median filter, which is especially effective at removing salt-and-pepper noise while maintaining sharpness around the edges. |
||||
|
||||
Here are the parameters and values used in this integration: |
||||
|
||||
- **blur_limit**: This parameter controls the maximum size of the blurring kernel. In this integration, it defaults to a range of (3, 7), meaning the kernel size for the blur is randomly chosen between 3 and 7 pixels, with only odd values allowed to ensure proper alignment. |
||||
|
||||
- **p**: Sets the probability of applying the median blur. Here, p=0.01, so the transformation has a 1% chance of being applied to each image. This low probability ensures that the median blur is used sparingly, helping the model generalize by occasionally seeing images with reduced noise and preserved edges. |
||||
|
||||
The image below shows an example of this augmentation applied to an image. |
||||
|
||||
<img width="764" alt="An Example of the MedianBlur Augmentation" src="https://github.com/ultralytics/docs/releases/download/0/albumentations-median-blur.avif"> |
||||
|
||||
### Grayscale |
||||
|
||||
The ToGray transformation in Albumentations converts an image to grayscale, reducing it to a single-channel format and optionally replicating this channel to match a specified number of output channels. Different methods can be used to adjust how grayscale brightness is calculated, ranging from simple averaging to more advanced techniques for realistic perception of contrast and brightness. |
||||
|
||||
Here are the parameters and values used in this integration: |
||||
|
||||
- **num_output_channels**: Sets the number of channels in the output image. If this value is more than 1, the single grayscale channel will be replicated to create a multi-channel grayscale image. By default, it's set to 3, giving a grayscale image with three identical channels. |
||||
|
||||
- **method**: Defines the grayscale conversion method. The default method, "weighted_average", applies a formula (0.299R + 0.587G + 0.114B) that closely aligns with human perception, providing a natural-looking grayscale effect. Other options, like "from_lab", "desaturation", "average", "max", and "pca", offer alternative ways to create grayscale images based on various needs for speed, brightness emphasis, or detail preservation. |
||||
|
||||
- **p**: Controls how often the grayscale transformation is applied. With p=0.01, there is a 1% chance of converting each image to grayscale, making it possible for a mix of color and grayscale images to help the model generalize better. |
||||
|
||||
The image below shows an example of this grayscale transformation applied. |
||||
|
||||
<img width="759" alt="An Example of the ToGray Augmentation" src="https://github.com/ultralytics/docs/releases/download/0/albumentations-grayscale.avif"> |
||||
|
||||
### Contrast Limited Adaptive Histogram Equalization (CLAHE) |
||||
|
||||
The CLAHE transformation in Albumentations applies Contrast Limited Adaptive Histogram Equalization (CLAHE), a technique that enhances image contrast by equalizing the histogram in localized regions (tiles) instead of across the whole image. CLAHE produces a balanced enhancement effect, avoiding the overly amplified contrast that can result from standard histogram equalization, especially in areas with initially low contrast. |
||||
|
||||
Here are the parameters and values used in this integration: |
||||
|
||||
- **clip_limit**: Controls the contrast enhancement range. Set to a default range of (1, 4), it determines the maximum contrast allowed in each tile. Higher values are used for more contrast but may also introduce noise. |
||||
|
||||
- **tile_grid_size**: Defines the size of the grid of tiles, typically as (rows, columns). The default value is (8, 8), meaning the image is divided into an 8x8 grid. Smaller tile sizes provide more localized adjustments, while larger ones create effects closer to global equalization. |
||||
|
||||
- **p**: The probability of applying CLAHE. Here, p=0.01 introduces the enhancement effect only 1% of the time, ensuring that contrast adjustments are applied sparingly for occasional variation in training images. |
||||
|
||||
The image below shows an example of the CLAHE transformation applied. |
||||
|
||||
<img width="760" alt="An Example of the CLAHE Augmentation" src="https://github.com/ultralytics/docs/releases/download/0/albumentations-CLAHE.avif"> |
||||
|
||||
## Keep Learning about Albumentations |
||||
|
||||
If you are interested in learning more about Albumentations, check out the following resources for more in-depth instructions and examples: |
||||
|
||||
- **[Albumentations Documentation](https://albumentations.ai/docs/)**: The official documentation provides a full range of supported transformations and advanced usage techniques. |
||||
|
||||
- **[Ultralytics Albumentations Guide](https://docs.ultralytics.com/reference/data/augment/?h=albumentation#ultralytics.data.augment.Albumentations)**: Get a closer look at the details of the function that facilitate this integration. |
||||
|
||||
- **[Albumentations GitHub Repository](https://github.com/albumentations-team/albumentations/)**: The repository includes examples, benchmarks, and discussions to help you get started with customizing augmentations. |
||||
|
||||
## Key Takeaways |
||||
|
||||
In this guide, we explored the key aspects of Albumentations, a great Python library for image augmentation. We discussed its wide range of transformations, optimized performance, and how you can use it in your next YOLO11 project. |
||||
|
||||
Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find valuable resources and insights there. |
@ -0,0 +1,342 @@ |
||||
--- |
||||
comments: true |
||||
description: Optimize YOLO11 models for mobile and embedded devices by exporting to MNN format. |
||||
keywords: Ultralytics, YOLO11, MNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models |
||||
--- |
||||
|
||||
# MNN Export for YOLO11 Models and Deploy |
||||
|
||||
## MNN |
||||
|
||||
<p align="center"> |
||||
<img width="100%" src="https://mnn-docs.readthedocs.io/en/latest/_images/architecture.png" alt="MNN architecture"> |
||||
</p> |
||||
|
||||
[MNN](https://github.com/alibaba/MNN) is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. At present, MNN has been integrated into more than 30 apps of Alibaba Inc, such as Taobao, Tmall, Youku, DingTalk, Xianyu, etc., covering more than 70 usage scenarios such as live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control. In addition, MNN is also used on embedded devices, such as IoT. |
||||
|
||||
## Export to MNN: Converting Your YOLO11 Model |
||||
|
||||
You can expand model compatibility and deployment flexibility by converting YOLO11 models to MNN format. |
||||
|
||||
### Installation |
||||
|
||||
To install the required packages, run: |
||||
|
||||
!!! tip "Installation" |
||||
|
||||
=== "CLI" |
||||
|
||||
```bash |
||||
# Install the required package for YOLO11 and MNN |
||||
pip install ultralytics |
||||
pip install MNN |
||||
``` |
||||
|
||||
### Usage |
||||
|
||||
Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). |
||||
|
||||
!!! example "Usage" |
||||
|
||||
=== "Python" |
||||
|
||||
```python |
||||
from ultralytics import YOLO |
||||
|
||||
# Load the YOLO11 model |
||||
model = YOLO("yolo11n.pt") |
||||
|
||||
# Export the model to MNN format |
||||
model.export(format="mnn") # creates 'yolo11n.mnn' |
||||
|
||||
# Load the exported MNN model |
||||
mnn_model = YOLO("yolo11n.mnn") |
||||
|
||||
# Run inference |
||||
results = mnn_model("https://ultralytics.com/images/bus.jpg") |
||||
``` |
||||
|
||||
=== "CLI" |
||||
|
||||
```bash |
||||
# Export a YOLO11n PyTorch model to MNN format |
||||
yolo export model=yolo11n.pt format=mnn # creates 'yolo11n.mnn' |
||||
|
||||
# Run inference with the exported model |
||||
yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' |
||||
``` |
||||
|
||||
For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). |
||||
|
||||
### MNN-Only Inference |
||||
|
||||
A function that relies solely on MNN for YOLO11 inference and preprocessing is implemented, providing both Python and C++ versions for easy deployment in any scenario. |
||||
|
||||
!!! example "MNN" |
||||
|
||||
=== "Python" |
||||
|
||||
```python |
||||
import argparse |
||||
|
||||
import MNN |
||||
import MNN.cv as cv2 |
||||
import MNN.numpy as np |
||||
|
||||
|
||||
def inference(model, img, precision, backend, thread): |
||||
config = {} |
||||
config["precision"] = precision |
||||
config["backend"] = backend |
||||
config["numThread"] = thread |
||||
rt = MNN.nn.create_runtime_manager((config,)) |
||||
# net = MNN.nn.load_module_from_file(model, ['images'], ['output0'], runtime_manager=rt) |
||||
net = MNN.nn.load_module_from_file(model, [], [], runtime_manager=rt) |
||||
original_image = cv2.imread(img) |
||||
ih, iw, _ = original_image.shape |
||||
length = max((ih, iw)) |
||||
scale = length / 640 |
||||
image = np.pad(original_image, [[0, length - ih], [0, length - iw], [0, 0]], "constant") |
||||
image = cv2.resize( |
||||
image, (640, 640), 0.0, 0.0, cv2.INTER_LINEAR, -1, [0.0, 0.0, 0.0], [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0] |
||||
) |
||||
input_var = np.expand_dims(image, 0) |
||||
input_var = MNN.expr.convert(input_var, MNN.expr.NC4HW4) |
||||
output_var = net.forward(input_var) |
||||
output_var = MNN.expr.convert(output_var, MNN.expr.NCHW) |
||||
output_var = output_var.squeeze() |
||||
# output_var shape: [84, 8400]; 84 means: [cx, cy, w, h, prob * 80] |
||||
cx = output_var[0] |
||||
cy = output_var[1] |
||||
w = output_var[2] |
||||
h = output_var[3] |
||||
probs = output_var[4:] |
||||
# [cx, cy, w, h] -> [y0, x0, y1, x1] |
||||
x0 = cx - w * 0.5 |
||||
y0 = cy - h * 0.5 |
||||
x1 = cx + w * 0.5 |
||||
y1 = cy + h * 0.5 |
||||
boxes = np.stack([x0, y0, x1, y1], axis=1) |
||||
# get max prob and idx |
||||
scores = np.max(probs, 0) |
||||
class_ids = np.argmax(probs, 0) |
||||
result_ids = MNN.expr.nms(boxes, scores, 100, 0.45, 0.25) |
||||
print(result_ids.shape) |
||||
# nms result box, score, ids |
||||
result_boxes = boxes[result_ids] |
||||
result_scores = scores[result_ids] |
||||
result_class_ids = class_ids[result_ids] |
||||
for i in range(len(result_boxes)): |
||||
x0, y0, x1, y1 = result_boxes[i].read_as_tuple() |
||||
y0 = int(y0 * scale) |
||||
y1 = int(y1 * scale) |
||||
x0 = int(x0 * scale) |
||||
x1 = int(x1 * scale) |
||||
print(result_class_ids[i]) |
||||
cv2.rectangle(original_image, (x0, y0), (x1, y1), (0, 0, 255), 2) |
||||
cv2.imwrite("res.jpg", original_image) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser() |
||||
parser.add_argument("--model", type=str, required=True, help="the yolo11 model path") |
||||
parser.add_argument("--img", type=str, required=True, help="the input image path") |
||||
parser.add_argument("--precision", type=str, default="normal", help="inference precision: normal, low, high, lowBF") |
||||
parser.add_argument( |
||||
"--backend", |
||||
type=str, |
||||
default="CPU", |
||||
help="inference backend: CPU, OPENCL, OPENGL, NN, VULKAN, METAL, TRT, CUDA, HIAI", |
||||
) |
||||
parser.add_argument("--thread", type=int, default=4, help="inference using thread: int") |
||||
args = parser.parse_args() |
||||
inference(args.model, args.img, args.precision, args.backend, args.thread) |
||||
``` |
||||
|
||||
=== "CPP" |
||||
|
||||
```cpp |
||||
#include <stdio.h> |
||||
#include <MNN/ImageProcess.hpp> |
||||
#include <MNN/expr/Module.hpp> |
||||
#include <MNN/expr/Executor.hpp> |
||||
#include <MNN/expr/ExprCreator.hpp> |
||||
#include <MNN/expr/Executor.hpp> |
||||
|
||||
#include <cv/cv.hpp> |
||||
|
||||
using namespace MNN; |
||||
using namespace MNN::Express; |
||||
using namespace MNN::CV; |
||||
|
||||
int main(int argc, const char* argv[]) { |
||||
if (argc < 3) { |
||||
MNN_PRINT("Usage: ./yolo11_demo.out model.mnn input.jpg [forwardType] [precision] [thread]\n"); |
||||
return 0; |
||||
} |
||||
int thread = 4; |
||||
int precision = 0; |
||||
int forwardType = MNN_FORWARD_CPU; |
||||
if (argc >= 4) { |
||||
forwardType = atoi(argv[3]); |
||||
} |
||||
if (argc >= 5) { |
||||
precision = atoi(argv[4]); |
||||
} |
||||
if (argc >= 6) { |
||||
thread = atoi(argv[5]); |
||||
} |
||||
MNN::ScheduleConfig sConfig; |
||||
sConfig.type = static_cast<MNNForwardType>(forwardType); |
||||
sConfig.numThread = thread; |
||||
BackendConfig bConfig; |
||||
bConfig.precision = static_cast<BackendConfig::PrecisionMode>(precision); |
||||
sConfig.backendConfig = &bConfig; |
||||
std::shared_ptr<Executor::RuntimeManager> rtmgr = std::shared_ptr<Executor::RuntimeManager>(Executor::RuntimeManager::createRuntimeManager(sConfig)); |
||||
if(rtmgr == nullptr) { |
||||
MNN_ERROR("Empty RuntimeManger\n"); |
||||
return 0; |
||||
} |
||||
rtmgr->setCache(".cachefile"); |
||||
|
||||
std::shared_ptr<Module> net(Module::load(std::vector<std::string>{}, std::vector<std::string>{}, argv[1], rtmgr)); |
||||
auto original_image = imread(argv[2]); |
||||
auto dims = original_image->getInfo()->dim; |
||||
int ih = dims[0]; |
||||
int iw = dims[1]; |
||||
int len = ih > iw ? ih : iw; |
||||
float scale = len / 640.0; |
||||
std::vector<int> padvals { 0, len - ih, 0, len - iw, 0, 0 }; |
||||
auto pads = _Const(static_cast<void*>(padvals.data()), {3, 2}, NCHW, halide_type_of<int>()); |
||||
auto image = _Pad(original_image, pads, CONSTANT); |
||||
image = resize(image, Size(640, 640), 0, 0, INTER_LINEAR, -1, {0., 0., 0.}, {1./255., 1./255., 1./255.}); |
||||
auto input = _Unsqueeze(image, {0}); |
||||
input = _Convert(input, NC4HW4); |
||||
auto outputs = net->onForward({input}); |
||||
auto output = _Convert(outputs[0], NCHW); |
||||
output = _Squeeze(output); |
||||
// output shape: [84, 8400]; 84 means: [cx, cy, w, h, prob * 80] |
||||
auto cx = _Gather(output, _Scalar<int>(0)); |
||||
auto cy = _Gather(output, _Scalar<int>(1)); |
||||
auto w = _Gather(output, _Scalar<int>(2)); |
||||
auto h = _Gather(output, _Scalar<int>(3)); |
||||
std::vector<int> startvals { 4, 0 }; |
||||
auto start = _Const(static_cast<void*>(startvals.data()), {2}, NCHW, halide_type_of<int>()); |
||||
std::vector<int> sizevals { -1, -1 }; |
||||
auto size = _Const(static_cast<void*>(sizevals.data()), {2}, NCHW, halide_type_of<int>()); |
||||
auto probs = _Slice(output, start, size); |
||||
// [cx, cy, w, h] -> [y0, x0, y1, x1] |
||||
auto x0 = cx - w * _Const(0.5); |
||||
auto y0 = cy - h * _Const(0.5); |
||||
auto x1 = cx + w * _Const(0.5); |
||||
auto y1 = cy + h * _Const(0.5); |
||||
auto boxes = _Stack({x0, y0, x1, y1}, 1); |
||||
auto scores = _ReduceMax(probs, {0}); |
||||
auto ids = _ArgMax(probs, 0); |
||||
auto result_ids = _Nms(boxes, scores, 100, 0.45, 0.25); |
||||
auto result_ptr = result_ids->readMap<int>(); |
||||
auto box_ptr = boxes->readMap<float>(); |
||||
auto ids_ptr = ids->readMap<int>(); |
||||
auto score_ptr = scores->readMap<float>(); |
||||
for (int i = 0; i < 100; i++) { |
||||
auto idx = result_ptr[i]; |
||||
if (idx < 0) break; |
||||
auto x0 = box_ptr[idx * 4 + 0] * scale; |
||||
auto y0 = box_ptr[idx * 4 + 1] * scale; |
||||
auto x1 = box_ptr[idx * 4 + 2] * scale; |
||||
auto y1 = box_ptr[idx * 4 + 3] * scale; |
||||
auto class_idx = ids_ptr[idx]; |
||||
auto score = score_ptr[idx]; |
||||
rectangle(original_image, {x0, y0}, {x1, y1}, {0, 0, 255}, 2); |
||||
} |
||||
if (imwrite("res.jpg", original_image)) { |
||||
MNN_PRINT("result image write to `res.jpg`.\n"); |
||||
} |
||||
rtmgr->updateCache(); |
||||
return 0; |
||||
} |
||||
``` |
||||
|
||||
## Summary |
||||
|
||||
In this guide, we introduce how to export the Ultralytics YOLO11 model to MNN and use MNN for inference. |
||||
|
||||
For more usage, please refer to the [MNN documentation](https://mnn-docs.readthedocs.io/en/latest). |
||||
|
||||
## FAQ |
||||
|
||||
### How do I export Ultralytics YOLO11 models to MNN format? |
||||
|
||||
To export your Ultralytics YOLO11 model to MNN format, follow these steps: |
||||
|
||||
!!! example "Export" |
||||
|
||||
=== "Python" |
||||
|
||||
```python |
||||
from ultralytics import YOLO |
||||
|
||||
# Load the YOLO11 model |
||||
model = YOLO("yolo11n.pt") |
||||
|
||||
# Export to MNN format |
||||
model.export(format="mnn") # creates 'yolo11n.mnn' with fp32 weight |
||||
model.export(format="mnn", half=True) # creates 'yolo11n.mnn' with fp16 weight |
||||
model.export(format="mnn", int8=True) # creates 'yolo11n.mnn' with int8 weight |
||||
``` |
||||
|
||||
=== "CLI" |
||||
|
||||
```bash |
||||
yolo export model=yolo11n.pt format=mnn # creates 'yolo11n.mnn' with fp32 weight |
||||
yolo export model=yolo11n.pt format=mnn half=True # creates 'yolo11n.mnn' with fp16 weight |
||||
yolo export model=yolo11n.pt format=mnn int8=True # creates 'yolo11n.mnn' with int8 weight |
||||
``` |
||||
|
||||
For detailed export options, check the [Export](../modes/export.md) page in the documentation. |
||||
|
||||
### How do I predict with an exported YOLO11 MNN model? |
||||
|
||||
To predict with an exported YOLO11 MNN model, use the `predict` function from the YOLO class. |
||||
|
||||
!!! example "Predict" |
||||
|
||||
=== "Python" |
||||
|
||||
```python |
||||
from ultralytics import YOLO |
||||
|
||||
# Load the YOLO11 MNN model |
||||
model = YOLO("yolo11n.mnn") |
||||
|
||||
# Export to MNN format |
||||
results = mnn_model("https://ultralytics.com/images/bus.jpg") # predict with `fp32` |
||||
results = mnn_model("https://ultralytics.com/images/bus.jpg", half=True) # predict with `fp16` if device support |
||||
|
||||
for result in results: |
||||
result.show() # display to screen |
||||
result.save(filename="result.jpg") # save to disk |
||||
``` |
||||
|
||||
=== "CLI" |
||||
|
||||
```bash |
||||
yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' # predict with `fp32` |
||||
yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' --half=True # predict with `fp16` if device support |
||||
``` |
||||
|
||||
### What platforms are supported for MNN? |
||||
|
||||
MNN is versatile and supports various platforms: |
||||
|
||||
- **Mobile**: Android, iOS, Harmony. |
||||
- **Embedded Systems and IoT Devices**: Devices like Raspberry Pi and NVIDIA Jetson. |
||||
- **Desktop and Servers**: Linux, Windows, and macOS. |
||||
|
||||
### How can I deploy Ultralytics YOLO11 MNN models on Mobile Devices? |
||||
|
||||
To deploy your YOLO11 models on Mobile devices: |
||||
|
||||
1. **Build for Android**: Follow the [MNN Android](https://github.com/alibaba/MNN/tree/master/project/android). |
||||
2. **Build for iOS**: Follow the [MNN iOS](https://github.com/alibaba/MNN/tree/master/project/ios). |
||||
3. **Build for Harmony**: Follow the [MNN Harmony](https://github.com/alibaba/MNN/tree/master/project/harmony). |
@ -0,0 +1,12 @@ |
||||
[package] |
||||
name = "YOLO-ONNXRuntime-Rust" |
||||
version = "0.1.0" |
||||
edition = "2021" |
||||
authors = ["Jamjamjon <xxyydzml@outlook.com>"] |
||||
|
||||
[dependencies] |
||||
anyhow = "1.0.92" |
||||
clap = "4.5.20" |
||||
tracing = "0.1.40" |
||||
tracing-subscriber = "0.3.18" |
||||
usls = { version = "0.0.19", features = ["auto"] } |
@ -0,0 +1,94 @@ |
||||
# YOLO-Series ONNXRuntime Rust Demo for Core YOLO Tasks |
||||
|
||||
This repository provides a Rust demo for key YOLO-Series tasks such as `Classification`, `Segmentation`, `Detection`, `Pose Detection`, and `OBB` using ONNXRuntime. It supports various YOLO models (v5 - 11) across multiple vision tasks. |
||||
|
||||
## Introduction |
||||
|
||||
- This example leverages the latest versions of both ONNXRuntime and YOLO models. |
||||
- We utilize the [usls](https://github.com/jamjamjon/usls/tree/main) crate to streamline YOLO model inference, providing efficient data loading, visualization, and optimized inference performance. |
||||
|
||||
## Features |
||||
|
||||
- **Extensive Model Compatibility**: Supports `YOLOv5`, `YOLOv6`, `YOLOv7`, `YOLOv8`, `YOLOv9`, `YOLOv10`, `YOLO11`, `YOLO-world`, `RTDETR`, and others, covering a wide range of YOLO versions. |
||||
- **Versatile Task Coverage**: Includes `Classification`, `Segmentation`, `Detection`, `Pose`, and `OBB`. |
||||
- **Precision Flexibility**: Works with `FP16` and `FP32` ONNX models. |
||||
- **Execution Providers**: Accelerated support for `CPU`, `CUDA`, `CoreML`, and `TensorRT`. |
||||
- **Dynamic Input Shapes**: Dynamically adjusts to variable `batch`, `width`, and `height` dimensions for flexible model input. |
||||
- **Flexible Data Loading**: The `DataLoader` handles images, folders, videos, and video streams. |
||||
- **Real-Time Display and Video Export**: `Viewer` provides real-time frame visualization and video export functions, similar to OpenCV’s `imshow()` and `imwrite()`. |
||||
- **Enhanced Annotation and Visualization**: The `Annotator` facilitates comprehensive result rendering, with support for bounding boxes (HBB), oriented bounding boxes (OBB), polygons, masks, keypoints, and text labels. |
||||
|
||||
## Setup Instructions |
||||
|
||||
### 1. ONNXRuntime Linking |
||||
|
||||
<details> |
||||
<summary>You have two options to link the ONNXRuntime library:</summary> |
||||
|
||||
- **Option 1: Manual Linking** |
||||
|
||||
- For detailed setup, consult the [ONNX Runtime linking documentation](https://ort.pyke.io/setup/linking). |
||||
- **Linux or macOS**: |
||||
1. Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases). |
||||
2. Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable: |
||||
```shell |
||||
export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0 |
||||
``` |
||||
|
||||
- **Option 2: Automatic Download** |
||||
- Use the `--features auto` flag to handle downloading automatically: |
||||
```shell |
||||
cargo run -r --example yolo --features auto |
||||
``` |
||||
|
||||
</details> |
||||
|
||||
### 2. \[Optional\] Install CUDA, CuDNN, and TensorRT |
||||
|
||||
- The CUDA execution provider requires CUDA version `12.x`. |
||||
- The TensorRT execution provider requires both CUDA `12.x` and TensorRT `10.x`. |
||||
|
||||
### 3. \[Optional\] Install ffmpeg |
||||
|
||||
To view video frames and save video inferences, install `rust-ffmpeg`. For instructions, see: |
||||
[https://github.com/zmwangx/rust-ffmpeg/wiki/Notes-on-building#dependencies](https://github.com/zmwangx/rust-ffmpeg/wiki/Notes-on-building#dependencies) |
||||
|
||||
## Get Started |
||||
|
||||
```Shell |
||||
# customized |
||||
cargo run -r -- --task detect --ver v8 --nc 6 --model xxx.onnx # YOLOv8 |
||||
|
||||
# Classify |
||||
cargo run -r -- --task classify --ver v5 --scale s --width 224 --height 224 --nc 1000 # YOLOv5 |
||||
cargo run -r -- --task classify --ver v8 --scale n --width 224 --height 224 --nc 1000 # YOLOv8 |
||||
cargo run -r -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLOv11 |
||||
|
||||
# Detect |
||||
cargo run -r -- --task detect --ver v5 --scale n # YOLOv5 |
||||
cargo run -r -- --task detect --ver v6 --scale n # YOLOv6 |
||||
cargo run -r -- --task detect --ver v7 --scale t # YOLOv7 |
||||
cargo run -r -- --task detect --ver v8 --scale n # YOLOv8 |
||||
cargo run -r -- --task detect --ver v9 --scale t # YOLOv9 |
||||
cargo run -r -- --task detect --ver v10 --scale n # YOLOv10 |
||||
cargo run -r -- --task detect --ver v11 --scale n # YOLOv11 |
||||
cargo run -r -- --task detect --ver rtdetr --scale l # RTDETR |
||||
|
||||
# Pose |
||||
cargo run -r -- --task pose --ver v8 --scale n # YOLOv8-Pose |
||||
cargo run -r -- --task pose --ver v11 --scale n # YOLOv11-Pose |
||||
|
||||
# Segment |
||||
cargo run -r -- --task segment --ver v5 --scale n # YOLOv5-Segment |
||||
cargo run -r -- --task segment --ver v8 --scale n # YOLOv8-Segment |
||||
cargo run -r -- --task segment --ver v11 --scale n # YOLOv8-Segment |
||||
cargo run -r -- --task segment --ver v8 --model yolo/FastSAM-s-dyn-f16.onnx # FastSAM |
||||
|
||||
# OBB |
||||
cargo run -r -- --ver v8 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv8-Obb |
||||
cargo run -r -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv11-Obb |
||||
``` |
||||
|
||||
**`cargo run -- --help` for more options** |
||||
|
||||
For more details, please refer to [usls-yolo](https://github.com/jamjamjon/usls/tree/main/examples/yolo). |
@ -0,0 +1,236 @@ |
||||
use anyhow::Result; |
||||
use clap::Parser; |
||||
|
||||
use usls::{ |
||||
models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOScale, YOLOTask, |
||||
YOLOVersion, COCO_SKELETONS_16, |
||||
}; |
||||
|
||||
#[derive(Parser, Clone)] |
||||
#[command(author, version, about, long_about = None)] |
||||
pub struct Args { |
||||
/// Path to the ONNX model
|
||||
#[arg(long)] |
||||
pub model: Option<String>, |
||||
|
||||
/// Input source path
|
||||
#[arg(long, default_value_t = String::from("../../ultralytics/assets/bus.jpg"))] |
||||
pub source: String, |
||||
|
||||
/// YOLO Task
|
||||
#[arg(long, value_enum, default_value_t = YOLOTask::Detect)] |
||||
pub task: YOLOTask, |
||||
|
||||
/// YOLO Version
|
||||
#[arg(long, value_enum, default_value_t = YOLOVersion::V8)] |
||||
pub ver: YOLOVersion, |
||||
|
||||
/// YOLO Scale
|
||||
#[arg(long, value_enum, default_value_t = YOLOScale::N)] |
||||
pub scale: YOLOScale, |
||||
|
||||
/// Batch size
|
||||
#[arg(long, default_value_t = 1)] |
||||
pub batch_size: usize, |
||||
|
||||
/// Minimum input width
|
||||
#[arg(long, default_value_t = 224)] |
||||
pub width_min: isize, |
||||
|
||||
/// Input width
|
||||
#[arg(long, default_value_t = 640)] |
||||
pub width: isize, |
||||
|
||||
/// Maximum input width
|
||||
#[arg(long, default_value_t = 1024)] |
||||
pub width_max: isize, |
||||
|
||||
/// Minimum input height
|
||||
#[arg(long, default_value_t = 224)] |
||||
pub height_min: isize, |
||||
|
||||
/// Input height
|
||||
#[arg(long, default_value_t = 640)] |
||||
pub height: isize, |
||||
|
||||
/// Maximum input height
|
||||
#[arg(long, default_value_t = 1024)] |
||||
pub height_max: isize, |
||||
|
||||
/// Number of classes
|
||||
#[arg(long, default_value_t = 80)] |
||||
pub nc: usize, |
||||
|
||||
/// Class confidence
|
||||
#[arg(long)] |
||||
pub confs: Vec<f32>, |
||||
|
||||
/// Enable TensorRT support
|
||||
#[arg(long)] |
||||
pub trt: bool, |
||||
|
||||
/// Enable CUDA support
|
||||
#[arg(long)] |
||||
pub cuda: bool, |
||||
|
||||
/// Enable CoreML support
|
||||
#[arg(long)] |
||||
pub coreml: bool, |
||||
|
||||
/// Use TensorRT half precision
|
||||
#[arg(long)] |
||||
pub half: bool, |
||||
|
||||
/// Device ID to use
|
||||
#[arg(long, default_value_t = 0)] |
||||
pub device_id: usize, |
||||
|
||||
/// Enable performance profiling
|
||||
#[arg(long)] |
||||
pub profile: bool, |
||||
|
||||
/// Disable contour drawing, for saving time
|
||||
#[arg(long)] |
||||
pub no_contours: bool, |
||||
|
||||
/// Show result
|
||||
#[arg(long)] |
||||
pub view: bool, |
||||
|
||||
/// Do not save output
|
||||
#[arg(long)] |
||||
pub nosave: bool, |
||||
} |
||||
|
||||
fn main() -> Result<()> { |
||||
let args = Args::parse(); |
||||
|
||||
// logger
|
||||
if args.profile { |
||||
tracing_subscriber::fmt() |
||||
.with_max_level(tracing::Level::INFO) |
||||
.init(); |
||||
} |
||||
|
||||
// model path
|
||||
let path = match &args.model { |
||||
None => format!( |
||||
"yolo/{}-{}-{}.onnx", |
||||
args.ver.name(), |
||||
args.scale.name(), |
||||
args.task.name() |
||||
), |
||||
Some(x) => x.to_string(), |
||||
}; |
||||
|
||||
// saveout
|
||||
let saveout = match &args.model { |
||||
None => format!( |
||||
"{}-{}-{}", |
||||
args.ver.name(), |
||||
args.scale.name(), |
||||
args.task.name() |
||||
), |
||||
Some(x) => { |
||||
let p = std::path::PathBuf::from(&x); |
||||
p.file_stem().unwrap().to_str().unwrap().to_string() |
||||
} |
||||
}; |
||||
|
||||
// device
|
||||
let device = if args.cuda { |
||||
Device::Cuda(args.device_id) |
||||
} else if args.trt { |
||||
Device::Trt(args.device_id) |
||||
} else if args.coreml { |
||||
Device::CoreML(args.device_id) |
||||
} else { |
||||
Device::Cpu(args.device_id) |
||||
}; |
||||
|
||||
// build options
|
||||
let options = Options::new() |
||||
.with_model(&path)? |
||||
.with_yolo_version(args.ver) |
||||
.with_yolo_task(args.task) |
||||
.with_device(device) |
||||
.with_trt_fp16(args.half) |
||||
.with_ixx(0, 0, (1, args.batch_size as _, 4).into()) |
||||
.with_ixx(0, 2, (args.height_min, args.height, args.height_max).into()) |
||||
.with_ixx(0, 3, (args.width_min, args.width, args.width_max).into()) |
||||
.with_confs(if args.confs.is_empty() { |
||||
&[0.2, 0.15] |
||||
} else { |
||||
&args.confs |
||||
}) |
||||
.with_nc(args.nc) |
||||
.with_find_contours(!args.no_contours) // find contours or not
|
||||
// .with_names(&COCO_CLASS_NAMES_80) // detection class names
|
||||
// .with_names2(&COCO_KEYPOINTS_17) // keypoints class names
|
||||
// .exclude_classes(&[0])
|
||||
// .retain_classes(&[0, 5])
|
||||
.with_profile(args.profile); |
||||
|
||||
// build model
|
||||
let mut model = YOLO::new(options)?; |
||||
|
||||
// build dataloader
|
||||
let dl = DataLoader::new(&args.source)? |
||||
.with_batch(model.batch() as _) |
||||
.build()?; |
||||
|
||||
// build annotator
|
||||
let annotator = Annotator::default() |
||||
.with_skeletons(&COCO_SKELETONS_16) |
||||
.without_masks(true) // no masks plotting when doing segment task
|
||||
.with_bboxes_thickness(3) |
||||
.with_keypoints_name(false) // enable keypoints names
|
||||
.with_saveout_subs(&["YOLO"]) |
||||
.with_saveout(&saveout); |
||||
|
||||
// build viewer
|
||||
let mut viewer = if args.view { |
||||
Some(Viewer::new().with_delay(5).with_scale(1.).resizable(true)) |
||||
} else { |
||||
None |
||||
}; |
||||
|
||||
// run & annotate
|
||||
for (xs, _paths) in dl { |
||||
let ys = model.forward(&xs, args.profile)?; |
||||
let images_plotted = annotator.plot(&xs, &ys, !args.nosave)?; |
||||
|
||||
// show image
|
||||
match &mut viewer { |
||||
Some(viewer) => viewer.imshow(&images_plotted)?, |
||||
None => continue, |
||||
} |
||||
|
||||
// check out window and key event
|
||||
match &mut viewer { |
||||
Some(viewer) => { |
||||
if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) { |
||||
break; |
||||
} |
||||
} |
||||
None => continue, |
||||
} |
||||
|
||||
// write video
|
||||
if !args.nosave { |
||||
match &mut viewer { |
||||
Some(viewer) => viewer.write_batch(&images_plotted)?, |
||||
None => continue, |
||||
} |
||||
} |
||||
} |
||||
|
||||
// finish video write
|
||||
if !args.nosave { |
||||
if let Some(viewer) = &mut viewer { |
||||
viewer.finish_write()?; |
||||
} |
||||
} |
||||
|
||||
Ok(()) |
||||
} |
@ -1,65 +0,0 @@ |
||||
# YOLOv8 - Int8-TFLite Runtime |
||||
|
||||
Welcome to the YOLOv8 Int8 TFLite Runtime for efficient and optimized object detection project. This README provides comprehensive instructions for installing and using our YOLOv8 implementation. |
||||
|
||||
## Installation |
||||
|
||||
Ensure a smooth setup by following these steps to install necessary dependencies. |
||||
|
||||
### Installing Required Dependencies |
||||
|
||||
Install all required dependencies with this simple command: |
||||
|
||||
```bash |
||||
pip install -r requirements.txt |
||||
``` |
||||
|
||||
### Installing `tflite-runtime` |
||||
|
||||
To load TFLite models, install the `tflite-runtime` package using: |
||||
|
||||
```bash |
||||
pip install tflite-runtime |
||||
``` |
||||
|
||||
### Installing `tensorflow-gpu` (For NVIDIA GPU Users) |
||||
|
||||
Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`: |
||||
|
||||
```bash |
||||
pip install tensorflow-gpu |
||||
``` |
||||
|
||||
**Note:** Ensure you have compatible GPU drivers installed on your system. |
||||
|
||||
### Installing `tensorflow` (CPU Version) |
||||
|
||||
For CPU usage or non-NVIDIA GPUs, install TensorFlow with: |
||||
|
||||
```bash |
||||
pip install tensorflow |
||||
``` |
||||
|
||||
## Usage |
||||
|
||||
Follow these instructions to run YOLOv8 after successful installation. |
||||
|
||||
Convert the YOLOv8 model to Int8 TFLite format: |
||||
|
||||
```bash |
||||
yolo export model=yolov8n.pt imgsz=640 format=tflite int8 |
||||
``` |
||||
|
||||
Locate the Int8 TFLite model in `yolov8n_saved_model`. Choose `best_full_integer_quant` or verify quantization at [Netron](https://netron.app/). Then, execute the following in your terminal: |
||||
|
||||
```bash |
||||
python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf-thres 0.5 --iou-thres 0.5 |
||||
``` |
||||
|
||||
Replace `best_full_integer_quant.tflite` with your model file's path, `image.jpg` with your input image, and adjust the confidence (conf-thres) and IoU thresholds (iou-thres) as necessary. |
||||
|
||||
### Output |
||||
|
||||
The output is displayed as annotated images, showcasing the model's detection capabilities: |
||||
|
||||
![image](https://github.com/wamiqraza/Attribute-recognition-and-reidentification-Market1501-dataset/blob/main/img/bus.jpg) |
@ -1,308 +0,0 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
import argparse |
||||
|
||||
import cv2 |
||||
import numpy as np |
||||
from tflite_runtime import interpreter as tflite |
||||
|
||||
from ultralytics.utils import ASSETS, yaml_load |
||||
from ultralytics.utils.checks import check_yaml |
||||
|
||||
# Declare as global variables, can be updated based trained model image size |
||||
img_width = 640 |
||||
img_height = 640 |
||||
|
||||
|
||||
class LetterBox: |
||||
"""Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models.""" |
||||
|
||||
def __init__( |
||||
self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32 |
||||
): |
||||
"""Initializes LetterBox with parameters for reshaping and transforming image while maintaining aspect ratio.""" |
||||
self.new_shape = new_shape |
||||
self.auto = auto |
||||
self.scaleFill = scaleFill |
||||
self.scaleup = scaleup |
||||
self.stride = stride |
||||
self.center = center # Put the image in the middle or top-left |
||||
|
||||
def __call__(self, labels=None, image=None): |
||||
"""Return updated labels and image with added border.""" |
||||
if labels is None: |
||||
labels = {} |
||||
img = labels.get("img") if image is None else image |
||||
shape = img.shape[:2] # current shape [height, width] |
||||
new_shape = labels.pop("rect_shape", self.new_shape) |
||||
if isinstance(new_shape, int): |
||||
new_shape = (new_shape, new_shape) |
||||
|
||||
# Scale ratio (new / old) |
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) |
||||
if not self.scaleup: # only scale down, do not scale up (for better val mAP) |
||||
r = min(r, 1.0) |
||||
|
||||
# Compute padding |
||||
ratio = r, r # width, height ratios |
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) |
||||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding |
||||
if self.auto: # minimum rectangle |
||||
dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding |
||||
elif self.scaleFill: # stretch |
||||
dw, dh = 0.0, 0.0 |
||||
new_unpad = (new_shape[1], new_shape[0]) |
||||
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios |
||||
|
||||
if self.center: |
||||
dw /= 2 # divide padding into 2 sides |
||||
dh /= 2 |
||||
|
||||
if shape[::-1] != new_unpad: # resize |
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) |
||||
top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1)) |
||||
left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1)) |
||||
img = cv2.copyMakeBorder( |
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) |
||||
) # add border |
||||
if labels.get("ratio_pad"): |
||||
labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation |
||||
|
||||
if len(labels): |
||||
labels = self._update_labels(labels, ratio, dw, dh) |
||||
labels["img"] = img |
||||
labels["resized_shape"] = new_shape |
||||
return labels |
||||
else: |
||||
return img |
||||
|
||||
def _update_labels(self, labels, ratio, padw, padh): |
||||
"""Update labels.""" |
||||
labels["instances"].convert_bbox(format="xyxy") |
||||
labels["instances"].denormalize(*labels["img"].shape[:2][::-1]) |
||||
labels["instances"].scale(*ratio) |
||||
labels["instances"].add_padding(padw, padh) |
||||
return labels |
||||
|
||||
|
||||
class Yolov8TFLite: |
||||
"""Class for performing object detection using YOLOv8 model converted to TensorFlow Lite format.""" |
||||
|
||||
def __init__(self, tflite_model, input_image, confidence_thres, iou_thres): |
||||
""" |
||||
Initializes an instance of the Yolov8TFLite class. |
||||
|
||||
Args: |
||||
tflite_model: Path to the TFLite model. |
||||
input_image: Path to the input image. |
||||
confidence_thres: Confidence threshold for filtering detections. |
||||
iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression. |
||||
""" |
||||
self.tflite_model = tflite_model |
||||
self.input_image = input_image |
||||
self.confidence_thres = confidence_thres |
||||
self.iou_thres = iou_thres |
||||
|
||||
# Load the class names from the COCO dataset |
||||
self.classes = yaml_load(check_yaml("coco8.yaml"))["names"] |
||||
|
||||
# Generate a color palette for the classes |
||||
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) |
||||
|
||||
def draw_detections(self, img, box, score, class_id): |
||||
""" |
||||
Draws bounding boxes and labels on the input image based on the detected objects. |
||||
|
||||
Args: |
||||
img: The input image to draw detections on. |
||||
box: Detected bounding box. |
||||
score: Corresponding detection score. |
||||
class_id: Class ID for the detected object. |
||||
|
||||
Returns: |
||||
None |
||||
""" |
||||
# Extract the coordinates of the bounding box |
||||
x1, y1, w, h = box |
||||
|
||||
# Retrieve the color for the class ID |
||||
color = self.color_palette[class_id] |
||||
|
||||
# Draw the bounding box on the image |
||||
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) |
||||
|
||||
# Create the label text with class name and score |
||||
label = f"{self.classes[class_id]}: {score:.2f}" |
||||
|
||||
# Calculate the dimensions of the label text |
||||
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
||||
|
||||
# Calculate the position of the label text |
||||
label_x = x1 |
||||
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 |
||||
|
||||
# Draw a filled rectangle as the background for the label text |
||||
cv2.rectangle( |
||||
img, |
||||
(int(label_x), int(label_y - label_height)), |
||||
(int(label_x + label_width), int(label_y + label_height)), |
||||
color, |
||||
cv2.FILLED, |
||||
) |
||||
|
||||
# Draw the label text on the image |
||||
cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) |
||||
|
||||
def preprocess(self): |
||||
""" |
||||
Preprocesses the input image before performing inference. |
||||
|
||||
Returns: |
||||
image_data: Preprocessed image data ready for inference. |
||||
""" |
||||
# Read the input image using OpenCV |
||||
self.img = cv2.imread(self.input_image) |
||||
|
||||
print("image before", self.img) |
||||
# Get the height and width of the input image |
||||
self.img_height, self.img_width = self.img.shape[:2] |
||||
|
||||
letterbox = LetterBox(new_shape=[img_width, img_height], auto=False, stride=32) |
||||
image = letterbox(image=self.img) |
||||
image = [image] |
||||
image = np.stack(image) |
||||
image = image[..., ::-1].transpose((0, 3, 1, 2)) |
||||
img = np.ascontiguousarray(image) |
||||
# n, h, w, c |
||||
image = img.astype(np.float32) |
||||
return image / 255 |
||||
|
||||
def postprocess(self, input_image, output): |
||||
""" |
||||
Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. |
||||
|
||||
Args: |
||||
input_image (numpy.ndarray): The input image. |
||||
output (numpy.ndarray): The output of the model. |
||||
|
||||
Returns: |
||||
numpy.ndarray: The input image with detections drawn on it. |
||||
""" |
||||
# Transpose predictions outside the loop |
||||
output = [np.transpose(pred) for pred in output] |
||||
|
||||
boxes = [] |
||||
scores = [] |
||||
class_ids = [] |
||||
|
||||
# Vectorize extraction of bounding boxes, scores, and class IDs |
||||
for pred in output: |
||||
x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] |
||||
x1 = x - w / 2 |
||||
y1 = y - h / 2 |
||||
boxes.extend(np.column_stack([x1, y1, w, h])) |
||||
|
||||
# Argmax and score extraction for all predictions at once |
||||
idx = np.argmax(pred[:, 4:], axis=1) |
||||
scores.extend(pred[np.arange(pred.shape[0]), idx + 4]) |
||||
class_ids.extend(idx) |
||||
|
||||
# Precompute gain and pad once |
||||
img_height, img_width = input_image.shape[:2] |
||||
gain = min(img_width / self.img_width, img_height / self.img_height) |
||||
pad = ( |
||||
round((img_width - self.img_width * gain) / 2 - 0.1), |
||||
round((img_height - self.img_height * gain) / 2 - 0.1), |
||||
) |
||||
|
||||
# Non-Maximum Suppression (NMS) in one go |
||||
indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres) |
||||
|
||||
# Process selected indices |
||||
for i in indices.flatten(): |
||||
box = boxes[i] |
||||
box[0] = (box[0] - pad[0]) / gain |
||||
box[1] = (box[1] - pad[1]) / gain |
||||
box[2] = box[2] / gain |
||||
box[3] = box[3] / gain |
||||
|
||||
score = scores[i] |
||||
class_id = class_ids[i] |
||||
|
||||
if score > 0.25: |
||||
# Draw the detection on the input image |
||||
self.draw_detections(input_image, box, score, class_id) |
||||
|
||||
return input_image |
||||
|
||||
def main(self): |
||||
""" |
||||
Performs inference using a TFLite model and returns the output image with drawn detections. |
||||
|
||||
Returns: |
||||
output_img: The output image with drawn detections. |
||||
""" |
||||
# Create an interpreter for the TFLite model |
||||
interpreter = tflite.Interpreter(model_path=self.tflite_model) |
||||
self.model = interpreter |
||||
interpreter.allocate_tensors() |
||||
|
||||
# Get the model inputs |
||||
input_details = interpreter.get_input_details() |
||||
output_details = interpreter.get_output_details() |
||||
|
||||
# Store the shape of the input for later use |
||||
input_shape = input_details[0]["shape"] |
||||
self.input_width = input_shape[1] |
||||
self.input_height = input_shape[2] |
||||
|
||||
# Preprocess the image data |
||||
img_data = self.preprocess() |
||||
img_data = img_data |
||||
# img_data = img_data.cpu().numpy() |
||||
# Set the input tensor to the interpreter |
||||
print(input_details[0]["index"]) |
||||
print(img_data.shape) |
||||
img_data = img_data.transpose((0, 2, 3, 1)) |
||||
|
||||
scale, zero_point = input_details[0]["quantization"] |
||||
img_data_int8 = (img_data / scale + zero_point).astype(np.int8) |
||||
interpreter.set_tensor(input_details[0]["index"], img_data_int8) |
||||
|
||||
# Run inference |
||||
interpreter.invoke() |
||||
|
||||
# Get the output tensor from the interpreter |
||||
output = interpreter.get_tensor(output_details[0]["index"]) |
||||
scale, zero_point = output_details[0]["quantization"] |
||||
output = (output.astype(np.float32) - zero_point) * scale |
||||
|
||||
output[:, [0, 2]] *= img_width |
||||
output[:, [1, 3]] *= img_height |
||||
print(output) |
||||
# Perform post-processing on the outputs to obtain output image. |
||||
return self.postprocess(self.img, output) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
# Create an argument parser to handle command-line arguments |
||||
parser = argparse.ArgumentParser() |
||||
parser.add_argument( |
||||
"--model", type=str, default="yolov8n_full_integer_quant.tflite", help="Input your TFLite model." |
||||
) |
||||
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.") |
||||
parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold") |
||||
parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold") |
||||
args = parser.parse_args() |
||||
|
||||
# Create an instance of the Yolov8TFLite class with the specified arguments |
||||
detection = Yolov8TFLite(args.model, args.img, args.conf_thres, args.iou_thres) |
||||
|
||||
# Perform object detection and obtain the output image |
||||
output_image = detection.main() |
||||
|
||||
# Display the output image in a window |
||||
cv2.imshow("Output", output_image) |
||||
|
||||
# Wait for a key press to exit |
||||
cv2.waitKey(0) |
@ -0,0 +1,55 @@ |
||||
# YOLOv8 - TFLite Runtime |
||||
|
||||
This example shows how to run inference with YOLOv8 TFLite model. It supports FP32, FP16 and INT8 models. |
||||
|
||||
## Installation |
||||
|
||||
### Installing `tflite-runtime` |
||||
|
||||
To load TFLite models, install the `tflite-runtime` package using: |
||||
|
||||
```bash |
||||
pip install tflite-runtime |
||||
``` |
||||
|
||||
### Installing `tensorflow-gpu` (For NVIDIA GPU Users) |
||||
|
||||
Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`: |
||||
|
||||
```bash |
||||
pip install tensorflow-gpu |
||||
``` |
||||
|
||||
**Note:** Ensure you have compatible GPU drivers installed on your system. |
||||
|
||||
### Installing `tensorflow` (CPU Version) |
||||
|
||||
For CPU usage or non-NVIDIA GPUs, install TensorFlow with: |
||||
|
||||
```bash |
||||
pip install tensorflow |
||||
``` |
||||
|
||||
## Usage |
||||
|
||||
Follow these instructions to run YOLOv8 after successful installation. |
||||
|
||||
Convert the YOLOv8 model to TFLite format: |
||||
|
||||
```bash |
||||
yolo export model=yolov8n.pt imgsz=640 format=tflite int8 |
||||
``` |
||||
|
||||
Locate the TFLite model in `yolov8n_saved_model`. Then, execute the following in your terminal: |
||||
|
||||
```bash |
||||
python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf 0.25 --iou 0.45 --metadata "metadata.yaml" |
||||
``` |
||||
|
||||
Replace `best_full_integer_quant.tflite` with the TFLite model path, `image.jpg` with the input image path, `metadata.yaml` with the one generated by `ultralytics` during export, and adjust the confidence (conf) and IoU thresholds (iou) as necessary. |
||||
|
||||
### Output |
||||
|
||||
The output would show the detections along with the class labels and confidences of each detected object. |
||||
|
||||
![image](https://github.com/wamiqraza/Attribute-recognition-and-reidentification-Market1501-dataset/blob/main/img/bus.jpg) |
@ -0,0 +1,221 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
import argparse |
||||
from typing import Tuple, Union |
||||
|
||||
import cv2 |
||||
import numpy as np |
||||
import tensorflow as tf |
||||
import yaml |
||||
|
||||
from ultralytics.utils import ASSETS |
||||
|
||||
try: |
||||
from tflite_runtime.interpreter import Interpreter |
||||
except ImportError: |
||||
import tensorflow as tf |
||||
|
||||
Interpreter = tf.lite.Interpreter |
||||
|
||||
|
||||
class YOLOv8TFLite: |
||||
""" |
||||
YOLOv8TFLite. |
||||
|
||||
A class for performing object detection using the YOLOv8 model with TensorFlow Lite. |
||||
|
||||
Attributes: |
||||
model (str): Path to the TensorFlow Lite model file. |
||||
conf (float): Confidence threshold for filtering detections. |
||||
iou (float): Intersection over Union threshold for non-maximum suppression. |
||||
metadata (Optional[str]): Path to the metadata file, if any. |
||||
|
||||
Methods: |
||||
detect(img_path: str) -> np.ndarray: |
||||
Performs inference and returns the output image with drawn detections. |
||||
""" |
||||
|
||||
def __init__(self, model: str, conf: float = 0.25, iou: float = 0.45, metadata: Union[str, None] = None): |
||||
""" |
||||
Initializes an instance of the YOLOv8TFLite class. |
||||
|
||||
Args: |
||||
model (str): Path to the TFLite model. |
||||
conf (float, optional): Confidence threshold for filtering detections. Defaults to 0.25. |
||||
iou (float, optional): IoU (Intersection over Union) threshold for non-maximum suppression. Defaults to 0.45. |
||||
metadata (Union[str, None], optional): Path to the metadata file or None if not used. Defaults to None. |
||||
""" |
||||
self.conf = conf |
||||
self.iou = iou |
||||
if metadata is None: |
||||
self.classes = {i: i for i in range(1000)} |
||||
else: |
||||
with open(metadata) as f: |
||||
self.classes = yaml.safe_load(f)["names"] |
||||
np.random.seed(42) |
||||
self.color_palette = np.random.uniform(128, 255, size=(len(self.classes), 3)) |
||||
|
||||
self.model = Interpreter(model_path=model) |
||||
self.model.allocate_tensors() |
||||
|
||||
input_details = self.model.get_input_details()[0] |
||||
|
||||
self.in_width, self.in_height = input_details["shape"][1:3] |
||||
self.in_index = input_details["index"] |
||||
self.in_scale, self.in_zero_point = input_details["quantization"] |
||||
self.int8 = input_details["dtype"] == np.int8 |
||||
|
||||
output_details = self.model.get_output_details()[0] |
||||
self.out_index = output_details["index"] |
||||
self.out_scale, self.out_zero_point = output_details["quantization"] |
||||
|
||||
def letterbox(self, img: np.ndarray, new_shape: Tuple = (640, 640)) -> Tuple[np.ndarray, Tuple[float, float]]: |
||||
"""Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models.""" |
||||
shape = img.shape[:2] # current shape [height, width] |
||||
|
||||
# Scale ratio (new / old) |
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) |
||||
|
||||
# Compute padding |
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) |
||||
dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding |
||||
|
||||
if shape[::-1] != new_unpad: # resize |
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) |
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) |
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) |
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) |
||||
|
||||
return img, (top / img.shape[0], left / img.shape[1]) |
||||
|
||||
def draw_detections(self, img: np.ndarray, box: np.ndarray, score: np.float32, class_id: int) -> None: |
||||
""" |
||||
Draws bounding boxes and labels on the input image based on the detected objects. |
||||
|
||||
Args: |
||||
img (np.ndarray): The input image to draw detections on. |
||||
box (np.ndarray): Detected bounding box in the format [x1, y1, width, height]. |
||||
score (np.float32): Corresponding detection score. |
||||
class_id (int): Class ID for the detected object. |
||||
|
||||
Returns: |
||||
None |
||||
""" |
||||
x1, y1, w, h = box |
||||
color = self.color_palette[class_id] |
||||
|
||||
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) |
||||
|
||||
label = f"{self.classes[class_id]}: {score:.2f}" |
||||
|
||||
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
||||
|
||||
label_x = x1 |
||||
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 |
||||
|
||||
cv2.rectangle( |
||||
img, |
||||
(int(label_x), int(label_y - label_height)), |
||||
(int(label_x + label_width), int(label_y + label_height)), |
||||
color, |
||||
cv2.FILLED, |
||||
) |
||||
|
||||
cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) |
||||
|
||||
def preprocess(self, img: np.ndarray) -> Tuple[np.ndarray, Tuple[float, float]]: |
||||
""" |
||||
Preprocesses the input image before performing inference. |
||||
|
||||
Args: |
||||
img (np.ndarray): The input image to be preprocessed. |
||||
|
||||
Returns: |
||||
Tuple[np.ndarray, Tuple[float, float]]: A tuple containing: |
||||
- The preprocessed image (np.ndarray). |
||||
- A tuple of two float values representing the padding applied (top/bottom, left/right). |
||||
""" |
||||
img, pad = self.letterbox(img, (self.in_width, self.in_height)) |
||||
img = img[..., ::-1][None] # N,H,W,C for TFLite |
||||
img = np.ascontiguousarray(img) |
||||
img = img.astype(np.float32) |
||||
return img / 255, pad |
||||
|
||||
def postprocess(self, img: np.ndarray, outputs: np.ndarray, pad: Tuple[float, float]) -> np.ndarray: |
||||
""" |
||||
Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. |
||||
|
||||
Args: |
||||
img (numpy.ndarray): The input image. |
||||
outputs (numpy.ndarray): The output of the model. |
||||
pad (Tuple[float, float]): Padding used by letterbox. |
||||
|
||||
Returns: |
||||
numpy.ndarray: The input image with detections drawn on it. |
||||
""" |
||||
outputs[:, 0] -= pad[1] |
||||
outputs[:, 1] -= pad[0] |
||||
outputs[:, :4] *= max(img.shape) |
||||
|
||||
outputs = outputs.transpose(0, 2, 1) |
||||
outputs[..., 0] -= outputs[..., 2] / 2 |
||||
outputs[..., 1] -= outputs[..., 3] / 2 |
||||
|
||||
for out in outputs: |
||||
scores = out[:, 4:].max(-1) |
||||
keep = scores > self.conf |
||||
boxes = out[keep, :4] |
||||
scores = scores[keep] |
||||
class_ids = out[keep, 4:].argmax(-1) |
||||
|
||||
indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf, self.iou).flatten() |
||||
|
||||
[self.draw_detections(img, boxes[i], scores[i], class_ids[i]) for i in indices] |
||||
|
||||
return img |
||||
|
||||
def detect(self, img_path: str) -> np.ndarray: |
||||
""" |
||||
Performs inference using a TFLite model and returns the output image with drawn detections. |
||||
|
||||
Args: |
||||
img_path (str): The path to the input image file. |
||||
|
||||
Returns: |
||||
np.ndarray: The output image with drawn detections. |
||||
""" |
||||
img = cv2.imread(img_path) |
||||
x, pad = self.preprocess(img) |
||||
if self.int8: |
||||
x = (x / self.in_scale + self.in_zero_point).astype(np.int8) |
||||
self.model.set_tensor(self.in_index, x) |
||||
|
||||
self.model.invoke() |
||||
|
||||
y = self.model.get_tensor(self.out_index) |
||||
|
||||
if self.int8: |
||||
y = (y.astype(np.float32) - self.out_zero_point) * self.out_scale |
||||
|
||||
return self.postprocess(img, y, pad) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser() |
||||
parser.add_argument( |
||||
"--model", |
||||
type=str, |
||||
default="yolov8n_saved_model/yolov8n_full_integer_quant.tflite", |
||||
help="Path to TFLite model.", |
||||
) |
||||
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image") |
||||
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold") |
||||
parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold") |
||||
parser.add_argument("--metadata", type=str, default="yolov8n_saved_model/metadata.yaml", help="Metadata yaml") |
||||
args = parser.parse_args() |
||||
|
||||
detector = YOLOv8TFLite(args.model, args.conf, args.iou, args.metadata) |
||||
result = detector.detect(str(ASSETS / "bus.jpg"))[..., ::-1] |
||||
|
||||
cv2.imshow("Output", result) |
||||
cv2.waitKey(0) |
Loading…
Reference in new issue