From fcfc44ea9cf529685e0afaba6e62f7d2d40c84d8 Mon Sep 17 00:00:00 2001 From: Burhan <62214284+Burhan-Q@users.noreply.github.com> Date: Wed, 8 May 2024 10:05:57 -0400 Subject: [PATCH] `ultralytics 8.2.11` new TensorRT INT8 export feature (#10165) Co-authored-by: UltralyticsAssistant Co-authored-by: Glenn Jocher --- docs/en/hub/integrations.md | 30 +-- docs/en/integrations/index.md | 30 +-- docs/en/integrations/tensorrt.md | 333 +++++++++++++++++++++++++++++++ docs/en/modes/benchmark.md | 30 +-- docs/en/modes/export.md | 36 ++-- docs/en/tasks/classify.md | 30 +-- docs/en/tasks/detect.md | 30 +-- docs/en/tasks/obb.md | 30 +-- docs/en/tasks/pose.md | 30 +-- docs/en/tasks/segment.md | 30 +-- docs/en/usage/cli.md | 30 +-- examples/tutorial.ipynb | 30 +-- tests/test_cuda.py | 32 +++ ultralytics/__init__.py | 2 +- ultralytics/engine/exporter.py | 74 ++++++- 15 files changed, 601 insertions(+), 176 deletions(-) diff --git a/docs/en/hub/integrations.md b/docs/en/hub/integrations.md index 8ed38c1952..183d8c7c28 100644 --- a/docs/en/hub/integrations.md +++ b/docs/en/hub/integrations.md @@ -31,21 +31,21 @@ Welcome to the Integrations guide for [Ultralytics HUB](https://hub.ultralytics. Available export formats are in the table below. You can predict or validate directly on exported models using the `ultralytics` Python package, i.e. `yolo predict model=yolov8n.onnx`. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | ## Coming Soon diff --git a/docs/en/integrations/index.md b/docs/en/integrations/index.md index 91f5c33078..5d5aeb6bc7 100644 --- a/docs/en/integrations/index.md +++ b/docs/en/integrations/index.md @@ -85,21 +85,21 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of We also support a variety of model export formats for deployment in different environments. Here are the available formats: -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | Explore the links to learn more about each integration and how to get the most out of them with Ultralytics. See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/integrations/tensorrt.md b/docs/en/integrations/tensorrt.md index aa89675242..c568759b8e 100644 --- a/docs/en/integrations/tensorrt.md +++ b/docs/en/integrations/tensorrt.md @@ -109,16 +109,349 @@ Before diving into the usage instructions, be sure to check out the range of [YO For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). +### Exporting TensorRT with INT8 Quantization + +Exporting Ultralytics YOLO models using TensorRT with INT8 precision executes post-training quantization (PTQ). TensorRT uses calibration for PTQ, which measures the distribution of activations within each activation tensor as the YOLO model processes inference on representative input data, and then uses that distribution to estimate scale values for each tensor. Each activation tensor that is a candidate for quantization has an associated scale that is deduced by a calibration process. + +When processing implicitly quantized networks TensorRT uses INT8 opportunistically to optimize layer execution time. If a layer runs faster in INT8 and has assigned quantization scales on its data inputs and outputs, then a kernel with INT8 precision is assigned to that layer, otherwise TensorRT selects a precision of either FP32 or FP16 for the kernel based on whichever results in faster execution time for that layer. + +!!! tip + + It is **critical** to ensure that the same device that will use the TensorRT model weights for deployment is used for exporting with INT8 precision, as the calibration results can vary across devices. + +#### Configuring INT8 Export + +The arguments provided when using [export](../modes/export.md) for an Ultralytics YOLO model will **greatly** influence the performance of the exported model. They will also need to be selected based on the device resources available, however the default arguments _should_ work for most [Ampere (or newer) NVIDIA discrete GPUs](https://developer.nvidia.com/blog/nvidia-ampere-architecture-in-depth/). The calibration algorithm used is `"ENTROPY_CALIBRATION_2"` and you can read more details about the options available [in the TensorRT Developer Guide](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#enable_int8_c). Ultralytics tests found that `"ENTROPY_CALIBRATION_2"` was the best choice and exports are fixed to using this algorithm. + + - `workspace` : Controls the size (in GiB) of the device memory allocation while converting the model weights. + + - Aim to use the minimum `workspace` value required as this prevents testing algorithms that require more `workspace` from being considered by the TensorRT builder. Setting a higher value for `workspace` may take **considerably longer** to calibrate and export. + + - Default is `workspace=4` (GiB), this value may need to be increased if calibration crashes (exits without warning). + + - TensorRT will report `UNSUPPORTED_STATE` during export if the value for `workspace` is larger than the memory available to the device, which means the value for `workspace` should be lowered. + + - If `workspace` is set to max value and calibration fails/crashes, consider reducing the values for `imgsz` and `batch` to reduce memory requirements. + + - Remember calibration for INT8 is specific to each device, borrowing a "high-end" GPU for calibration, might result in poor performance when inference is run on another device. + + - `batch` : The maximum batch-size that will be used for inference. During inference smaller batches can be used, but inference will not accept batches any larger than what is specified. + + !!! note + + During calibration, twice the `batch` size provided will be used. Using small batches can lead to inaccurate scaling during calibration. This is because the process adjusts based on the data it sees. Small batches might not capture the full range of values, leading to issues with the final calibration, so the `batch` size is doubled automatically. If no batch size is specified `batch=1`, calibration will be run at `batch=1 * 2` to reduce calibration scaling errors. + +Experimentation by NVIDIA led them to recommend using at least 500 calibration images that are representative of the data for your model, with INT8 quantization calibration. This is a guideline and not a _hard_ requirement, and **you will need to experiment with what is required to perform well for your dataset**. Since the calibration data is required for INT8 calibration with TensorRT, make certain to use the `data` argument when `int8=True` for TensorRT and use `data="my_dataset.yaml"`, which will use the images from [validation](../modes/val.md) to calibrate with. When no value is passed for `data` with export to TensorRT with INT8 quantization, the default will be to use one of the ["small" example datasets based on the model task](../datasets/index.md) instead of throwing an error. + +!!! example + + ```{ .py .annotate } + from ultralytics import YOLO + + model = YOLO("yolov8n.pt") + model.export( + format="engine", + dynamic=True, #(1)! + batch=8, #(2)! + workspace=4, #(3)! + int8=True, + data="coco.yaml", #(4)! + ) + + model = YOLO("yolov8n.engine", task="detect") # load the model + + ``` + + 1. Exports with dynamic axes, this will be enabled by default when exporting with `int8=True` even when not explicitly set. See [export arguments](../modes/export.md#arguments) for additional information. + 2. Sets max batch size of 8 for exported model, which calibrates with `batch = 2 *×* 8` to avoid scaling errors during calibration. + 3. Allocates 4 GiB of memory instead of allocating the entire device for conversion process. + 4. Uses [COCO dataset](../datasets/detect/coco.md) for calibration, specifically the images used for [validation](../modes/val.md) (5,000 total). + +???+ warning "Calibration Cache" + + TensorRT will generate a calibration `.cache` which can be re-used to speed up export of future model weights using the same data, but this may result in poor calibration when the data is vastly different or if the `batch` value is changed drastically. In these circumstances, the existing `.cache` should be renamed and moved to a different directory or deleted entirely. + +#### Advantages of using YOLO with TensorRT INT8 + +- **Reduced model size:** Quantization from FP32 to INT8 can reduce the model size by 4x (on disk or in memory), leading to faster download times. lower storage requirements, and reduced memory footprint when deploying a model. + +- **Lower power consumption:** Reduced precision operations for INT8 exported YOLO models can consume less power compared to FP32 models, especially for battery-powered devices. + +- **Improved inference speeds:** TensorRT optimizes the model for the target hardware, potentially leading to faster inference speeds on GPUs, embedded devices, and accelerators. + +??? note "Note on Inference Speeds" + + The first few inference calls with a model exported to TensorRT INT8 can be expected to have longer than usual preprocessing, inference, and/or postprocessing times. This may also occur when changing `imgsz` during inference, especially when `imgsz` is not the same as what was specified during export (export `imgsz` is set as TensorRT "optimal" profile). + +#### Drawbacks of using YOLO with TensorRT INT8 + +- **Decreases in evaluation metrics:** Using a lower precision will mean that `mAP`, `Precision`, `Recall` or any [other metric used to evaluate model performance](../guides/yolo-performance-metrics.md) is likely to be somewhat worse. See the [Performance results section](#ultralytics-yolo-tensorrt-export-performance) to compare the differences in `mAP50` and `mAP50-95` when exporting with INT8 on small sample of various devices. + +- **Increased development times:** Finding the "optimal" settings for INT8 calibration for dataset and device can take a significant amount of testing. + +- **Hardware dependency:** Calibration and performance gains could be highly hardware dependent and model weights are less transferrable. + +## Ultralytics YOLO TensorRT Export Performance + +### NVIDIA A100 + +!!! tip "Performance" + + Tested with Ubuntu 22.04.3 LTS, `python 3.10.12`, `ultralytics==8.2.4`, `tensorrt==8.6.1.post1` + + === "Detection (COCO)" + + See [Detection Docs](../tasks/detect.md) for usage examples with these models trained on [COCO](../datasets/detect/coco.md), which include 80 pre-trained classes. + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 0.52 | 0.51 \| 0.56 | | | 8 | 640 | + | FP32 | COCOval | 0.52 | | 0.52 | 0.37 | 1 | 640 | + | FP16 | Predict | 0.34 | 0.34 \| 0.41 | | | 8 | 640 | + | FP16 | COCOval | 0.33 | | 0.52 | 0.37 | 1 | 640 | + | INT8 | Predict | 0.28 | 0.27 \| 0.31 | | | 8 | 640 | + | INT8 | COCOval | 0.29 | | 0.47 | 0.33 | 1 | 640 | + + === "Segmentation (COCO)" + + See [Segmentation Docs](../tasks/segment.md) for usage examples with these models trained on [COCO](../datasets/segment/coco.md), which include 80 pre-trained classes. + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n-seg.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | mAPval
50(M) | mAPval
50-95(M) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 0.62 | 0.61 \| 0.68 | | | | | 8 | 640 | + | FP32 | COCOval | 0.63 | | 0.52 | 0.36 | 0.49 | 0.31 | 1 | 640 | + | FP16 | Predict | 0.40 | 0.39 \| 0.44 | | | | | 8 | 640 | + | FP16 | COCOval | 0.43 | | 0.52 | 0.36 | 0.49 | 0.30 | 1 | 640 | + | INT8 | Predict | 0.34 | 0.33 \| 0.37 | | | | | 8 | 640 | + | INT8 | COCOval | 0.36 | | 0.46 | 0.32 | 0.43 | 0.27 | 1 | 640 | + + === "Classification (ImageNet)" + + See [Classification Docs](../tasks/classify.md) for usage examples with these models trained on [ImageNet](../datasets/classify/imagenet.md), which include 1000 pre-trained classes. + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n-cls.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | top-1 | top-5 | `batch` | size
(pixels) | + |-----------|------------------|--------------|--------------------|-------|-------|---------|-----------------------| + | FP32 | Predict | 0.26 | 0.25 \| 0.28 | 0.35 | 0.61 | 8 | 640 | + | FP32 | ImageNetval | 0.26 | | | | 1 | 640 | + | FP16 | Predict | 0.18 | 0.17 \| 0.19 | 0.35 | 0.61 | 8 | 640 | + | FP16 | ImageNetval | 0.18 | | | | 1 | 640 | + | INT8 | Predict | 0.16 | 0.15 \| 0.57 | 0.32 | 0.59 | 8 | 640 | + | INT8 | ImageNetval | 0.15 | | | | 1 | 640 | + + === "Pose (COCO)" + + See [Pose Estimation Docs](../tasks/pose.md) for usage examples with these models trained on [COCO](../datasets/pose/coco.md), which include 1 pre-trained class, "person". + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n-pose.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | mAPval
50(P) | mAPval
50-95(P) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 0.54 | 0.53 \| 0.58 | | | | | 8 | 640 | + | FP32 | COCOval | 0.55 | | 0.91 | 0.69 | 0.80 | 0.51 | 1 | 640 | + | FP16 | Predict | 0.37 | 0.35 \| 0.41 | | | | | 8 | 640 | + | FP16 | COCOval | 0.36 | | 0.91 | 0.69 | 0.80 | 0.51 | 1 | 640 | + | INT8 | Predict | 0.29 | 0.28 \| 0.33 | | | | | 8 | 640 | + | INT8 | COCOval | 0.30 | | 0.90 | 0.68 | 0.78 | 0.47 | 1 | 640 | + + === "OBB (DOTAv1)" + + See [Oriented Detection Docs](../tasks/obb.md) for usage examples with these models trained on [DOTAv1](../datasets/obb/dota-v2.md#dota-v10), which include 15 pre-trained classes. + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n-obb.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | `batch` | size
(pixels) | + |-----------|----------------|--------------|--------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 0.52 | 0.51 \| 0.59 | | | 8 | 640 | + | FP32 | DOTAv1val | 0.76 | | 0.50 | 0.36 | 1 | 640 | + | FP16 | Predict | 0.34 | 0.33 \| 0.42 | | | 8 | 640 | + | FP16 | DOTAv1val | 0.59 | | 0.50 | 0.36 | 1 | 640 | + | INT8 | Predict | 0.29 | 0.28 \| 0.33 | | | 8 | 640 | + | INT8 | DOTAv1val | 0.32 | | 0.45 | 0.32 | 1 | 640 | + +### Consumer GPUs + +!!! tip "Detection Performance (COCO)" + + === "RTX 3080 12 GB" + + Tested with Windows 10.0.19045, `python 3.10.9`, `ultralytics==8.2.4`, `tensorrt==10.0.0b6` + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 1.06 | 0.75 \| 1.88 | | | 8 | 640 | + | FP32 | COCOval | 1.37 | | 0.52 | 0.37 | 1 | 640 | + | FP16 | Predict | 0.62 | 0.75 \| 1.13 | | | 8 | 640 | + | FP16 | COCOval | 0.85 | | 0.52 | 0.37 | 1 | 640 | + | INT8 | Predict | 0.52 | 0.38 \| 1.00 | | | 8 | 640 | + | INT8 | COCOval | 0.74 | | 0.47 | 0.33 | 1 | 640 | + + === "RTX 3060 12 GB" + + Tested with Windows 10.0.22631, `python 3.11.9`, `ultralytics==8.2.4`, `tensorrt==10.0.1` + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n.engine` + + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 1.76 | 1.69 \| 1.87 | | | 8 | 640 | + | FP32 | COCOval | 1.94 | | 0.52 | 0.37 | 1 | 640 | + | FP16 | Predict | 0.86 | 0.75 \| 1.00 | | | 8 | 640 | + | FP16 | COCOval | 1.43 | | 0.52 | 0.37 | 1 | 640 | + | INT8 | Predict | 0.80 | 0.75 \| 1.00 | | | 8 | 640 | + | INT8 | COCOval | 1.35 | | 0.47 | 0.33 | 1 | 640 | + + === "RTX 2060 6 GB" + + Tested with Pop!_OS 22.04 LTS, `python 3.10.12`, `ultralytics==8.2.4`, `tensorrt==8.6.1.post1` + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 2.84 | 2.84 \| 2.85 | | | 8 | 640 | + | FP32 | COCOval | 2.94 | | 0.52 | 0.37 | 1 | 640 | + | FP16 | Predict | 1.09 | 1.09 \| 1.10 | | | 8 | 640 | + | FP16 | COCOval | 1.20 | | 0.52 | 0.37 | 1 | 640 | + | INT8 | Predict | 0.75 | 0.74 \| 0.75 | | | 8 | 640 | + | INT8 | COCOval | 0.76 | | 0.47 | 0.33 | 1 | 640 | + +### Embedded Devices + +!!! tip "Detection Performance (COCO)" + + === "Jetson Orin NX 16GB" + + Tested with JetPack 5.1.3 (L4T 35.5.0) Ubuntu 20.04.6, `python 3.8.10`, `ultralytics==8.2.4`, `tensorrt==8.5.2.2` + + !!! note + Inference times shown for `mean`, `min` (fastest), and `max` (slowest) for each test using pre-trained weights `yolov8n.engine` + + | Precision | Eval test | mean
(ms) | min \| max
(ms) | mAPval
50(B) | mAPval
50-95(B) | `batch` | size
(pixels) | + |-----------|--------------|--------------|--------------------|----------------------|-------------------------|---------|-----------------------| + | FP32 | Predict | 6.90 | 6.89 \| 6.93 | | | 8 | 640 | + | FP32 | COCOval | 6.97 | | 0.52 | 0.37 | 1 | 640 | + | FP16 | Predict | 3.36 | 3.35 \| 3.39 | | | 8 | 640 | + | FP16 | COCOval | 3.39 | | 0.52 | 0.37 | 1 | 640 | + | INT8 | Predict | 2.32 | 2.32 \| 2.34 | | | 8 | 640 | + | INT8 | COCOval | 2.33 | | 0.47 | 0.33 | 1 | 640 | + +!!! info + + See our [quickstart guide on NVIDIA Jetson with Ultralytics YOLO](../guides/nvidia-jetson.md) to learn more about setup and configuration. + +#### Evaluation methods + +Expand sections below for information on how these models were exported and tested. + +??? example "Export configurations" + + See [export mode](../modes/export.md) for details regarding export configuration arguments. + + ```py + from ultralytics import YOLO + + model = YOLO("yolov8n.pt") + + # TensorRT FP32 + out = model.export( + format="engine", + imgsz:640, + dynamic:True, + verbose:False, + batch:8, + workspace:2 + ) + + # TensorRT FP16 + out = model.export( + format="engine", + imgsz:640, + dynamic:True, + verbose:False, + batch:8, + workspace:2, + half=True + ) + + # TensorRT INT8 + out = model.export( + format="engine", + imgsz:640, + dynamic:True, + verbose:False, + batch:8, + workspace:2, + int8=True, + data:"data.yaml" # COCO, ImageNet, or DOTAv1 for appropriate model task + ) + ``` + +??? example "Predict loop" + + See [predict mode](../modes/predict.md) for additional information. + + ```py + import cv2 + from ultralytics import YOLO + + model = YOLO("yolov8n.engine") + img = cv2.imread("path/to/image.jpg") + + for _ in range(100): + result = model.predict( + [img] * 8, # batch=8 of the same image + verbose=False, + device="cuda" + ) + ``` + +??? example "Validation configuration" + + See [`val` mode](../modes/val.md) to learn more about validation configuration arguments. + + ```py + from ultralytics import YOLO + + model = YOLO("yolov8n.engine") + results = model.val( + data="data.yaml", # COCO, ImageNet, or DOTAv1 for appropriate model task + batch=1, + imgsz=640, + verbose=False, + device="cuda" + ) + ``` + ## Deploying Exported YOLOv8 TensorRT Models Having successfully exported your Ultralytics YOLOv8 models to TensorRT format, you're now ready to deploy them. For in-depth instructions on deploying your TensorRT models in various settings, take a look at the following resources: +- **[Deploy Ultralytics with a Triton Server](../guides/triton-inference-server.md)**: Our guide on how to use NVIDIA's Triton Inference (formerly TensorRT Inference) Server specifically for use with Ultralytics YOLO models. + - **[Deploying Deep Neural Networks with NVIDIA TensorRT](https://developer.nvidia.com/blog/deploying-deep-learning-nvidia-tensorrt/)**: This article explains how to use NVIDIA TensorRT to deploy deep neural networks on GPU-based deployment platforms efficiently. - **[End-to-End AI for NVIDIA-Based PCs: NVIDIA TensorRT Deployment](https://developer.nvidia.com/blog/end-to-end-ai-for-nvidia-based-pcs-nvidia-tensorrt-deployment/)**: This blog post explains the use of NVIDIA TensorRT for optimizing and deploying AI models on NVIDIA-based PCs. - **[GitHub Repository for NVIDIA TensorRT:](https://github.com/NVIDIA/TensorRT)**: This is the official GitHub repository that contains the source code and documentation for NVIDIA TensorRT. + ## Summary In this guide, we focused on converting Ultralytics YOLOv8 models to NVIDIA's TensorRT model format. This conversion step is crucial for improving the efficiency and speed of YOLOv8 models, making them more effective and suitable for diverse deployment environments. diff --git a/docs/en/modes/benchmark.md b/docs/en/modes/benchmark.md index 199b397fbf..d33e543be9 100644 --- a/docs/en/modes/benchmark.md +++ b/docs/en/modes/benchmark.md @@ -87,20 +87,20 @@ Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` prov Benchmarks will attempt to run automatically on all possible export formats below. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/modes/export.md b/docs/en/modes/export.md index 526eecd45b..df40121412 100644 --- a/docs/en/modes/export.md +++ b/docs/en/modes/export.md @@ -41,8 +41,8 @@ Here are some of the standout functionalities: !!! Tip "Tip" - * Export to ONNX or OpenVINO for up to 3x CPU speedup. - * Export to TensorRT for up to 5x GPU speedup. + * Export to [ONNX](../integrations/onnx.md) or [OpenVINO](../integrations/openvino.md) for up to 3x CPU speedup. + * Export to [TensorRT](../integrations/tensorrt.md) for up to 5x GPU speedup. ## Usage Examples @@ -85,7 +85,7 @@ This table details the configurations and options available for exporting YOLO m | `dynamic` | `bool` | `False` | Allows dynamic input sizes for ONNX and TensorRT exports, enhancing flexibility in handling varying image dimensions. | | `simplify` | `bool` | `False` | Simplifies the model graph for ONNX exports, potentially improving performance and compatibility. | | `opset` | `int` | `None` | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. | -| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GB for TensorRT optimizations, balancing memory usage and performance. | +| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance. | | `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. | | `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | @@ -95,18 +95,18 @@ Adjusting these parameters allows for customization of the export process to fit Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | diff --git a/docs/en/tasks/classify.md b/docs/en/tasks/classify.md index f2ef3a2eee..a527e25ea8 100644 --- a/docs/en/tasks/classify.md +++ b/docs/en/tasks/classify.md @@ -162,20 +162,20 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. Available YOLOv8-cls export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|-------------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|-------------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/tasks/detect.md b/docs/en/tasks/detect.md index 41dea862fd..a6bae4e8c4 100644 --- a/docs/en/tasks/detect.md +++ b/docs/en/tasks/detect.md @@ -163,20 +163,20 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/tasks/obb.md b/docs/en/tasks/obb.md index fdc9ad9235..7755f45e21 100644 --- a/docs/en/tasks/obb.md +++ b/docs/en/tasks/obb.md @@ -184,20 +184,20 @@ Export a YOLOv8n-obb model to a different format like ONNX, CoreML, etc. Available YOLOv8-obb export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-obb.onnx`. Usage examples are shown for your model after export completes. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|-------------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n-obb.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-obb.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-obb.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-obb_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-obb.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-obb.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-obb_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-obb.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-obb.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-obb_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-obb_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-obb_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-obb_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|-------------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-obb.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-obb.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-obb.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-obb_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-obb.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-obb.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-obb_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-obb.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-obb.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-obb_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-obb_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-obb_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-obb_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/tasks/pose.md b/docs/en/tasks/pose.md index be6800c5f8..13fa05fe12 100644 --- a/docs/en/tasks/pose.md +++ b/docs/en/tasks/pose.md @@ -178,20 +178,20 @@ Export a YOLOv8n Pose model to a different format like ONNX, CoreML, etc. Available YOLOv8-pose export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|--------------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|--------------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md index 8b22f207e6..ea7bfdb17e 100644 --- a/docs/en/tasks/segment.md +++ b/docs/en/tasks/segment.md @@ -168,20 +168,20 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. Available YOLOv8-seg export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|-------------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|-------------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md index 1ec5f3ece6..0df7828ff9 100644 --- a/docs/en/usage/cli.md +++ b/docs/en/usage/cli.md @@ -170,21 +170,21 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. -| Format | `format` Argument | Model | Metadata | Arguments | -|---------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +|---------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | +| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` | See full `export` details in the [Export](../modes/export.md) page. diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 2a5f1d831e..1902c7ce04 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -355,21 +355,21 @@ "- 💡 ProTip: Export to [ONNX](https://docs.ultralytics.com/integrations/onnx/) or [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) for up to 3x CPU speedup. \n", "- 💡 ProTip: Export to [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/) for up to 5x GPU speedup.\n", "\n", - "| Format | `format` Argument | Model | Metadata | Arguments |\n", - "|--------------------------------------------------------------------------|-------------------|---------------------------|----------|--------------------------------------------------------------|\n", - "| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |\n", - "| [TorchScript](https://docs.ultralytics.com/integrations/torchscript) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |\n", - "| [ONNX](https://docs.ultralytics.com/integrations/onnx) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |\n", - "| [OpenVINO](https://docs.ultralytics.com/integrations/openvino) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n", - "| [TensorRT](https://docs.ultralytics.com/integrations/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `batch` |\n", - "| [CoreML](https://docs.ultralytics.com/integrations/coreml) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |\n", - "| [TF SavedModel](https://docs.ultralytics.com/integrations/tf-savedmodel) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |\n", - "| [TF GraphDef](https://docs.ultralytics.com/integrations/tf-graphdef) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |\n", - "| [TF Lite](https://docs.ultralytics.com/integrations/tflite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n", - "| [TF Edge TPU](https://docs.ultralytics.com/integrations/edge-tpu) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` |\n", - "| [TF.js](https://docs.ultralytics.com/integrations/tfjs) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n", - "| [PaddlePaddle](https://docs.ultralytics.com/integrations/paddlepaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |\n", - "| [NCNN](https://docs.ultralytics.com/integrations/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |" + "| Format | `format` Argument | Model | Metadata | Arguments |\n", + "|--------------------------------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------|\n", + "| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |\n", + "| [TorchScript](https://docs.ultralytics.com/integrations/torchscript) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |\n", + "| [ONNX](https://docs.ultralytics.com/integrations/onnx) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |\n", + "| [OpenVINO](https://docs.ultralytics.com/integrations/openvino) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n", + "| [TensorRT](https://docs.ultralytics.com/integrations/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |\n", + "| [CoreML](https://docs.ultralytics.com/integrations/coreml) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |\n", + "| [TF SavedModel](https://docs.ultralytics.com/integrations/tf-savedmodel) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |\n", + "| [TF GraphDef](https://docs.ultralytics.com/integrations/tf-graphdef) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |\n", + "| [TF Lite](https://docs.ultralytics.com/integrations/tflite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n", + "| [TF Edge TPU](https://docs.ultralytics.com/integrations/edge-tpu) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` |\n", + "| [TF.js](https://docs.ultralytics.com/integrations/tfjs) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n", + "| [PaddlePaddle](https://docs.ultralytics.com/integrations/paddlepaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |\n", + "| [NCNN](https://docs.ultralytics.com/integrations/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |" ], "metadata": { "id": "nPZZeNrLCQG6" diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 11e8f4e582..8b29966ce4 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -1,10 +1,14 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license +from pathlib import Path +from itertools import product + import pytest import torch from ultralytics import YOLO from ultralytics.utils import ASSETS, WEIGHTS_DIR +from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS from . import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODEL, SOURCE @@ -23,6 +27,34 @@ def test_export_engine(): YOLO(f)(SOURCE, device=0) +@pytest.mark.slow +@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") +@pytest.mark.parametrize( + "task, dynamic, int8, half, batch", + [ # generate all combinations but exclude those where both int8 and half are True + (task, dynamic, int8, half, batch) + # Note: tests reduced below pending compute availability expansion as GPU CI runner utilization is high + # for task, dynamic, int8, half, batch in product(TASKS, [True, False], [True, False], [True, False], [1, 2]) + for task, dynamic, int8, half, batch in product(TASKS, [True], [True], [False], [2]) + if not (int8 and half) # exclude cases where both int8 and half are True + ], +) +def test_export_engine_matrix(task, dynamic, int8, half, batch): + """Test YOLO exports to TensorRT format.""" + file = YOLO(TASK2MODEL[task]).export( + format="engine", + imgsz=32, + dynamic=dynamic, + int8=int8, + half=half, + batch=batch, + data=TASK2DATA[task], + ) + YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference + Path(file).unlink() # cleanup + Path(file).with_suffix(".cache").unlink() if int8 else None # cleanup INT8 cache + + @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") def test_train(): """Test model training on a minimal dataset.""" diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 153bb770fe..a7fcaf75a5 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.2.10" +__version__ = "8.2.11" from ultralytics.data.explorer.explorer import Explorer from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 4b8dd002ca..fa681099ba 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -200,6 +200,8 @@ class Exporter: self.args.half = False assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one." self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size + if self.args.int8 and engine: + self.args.dynamic = True # enforce dynamic to export TensorRT INT8; ensures ONNX is dynamic if self.args.optimize: assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False" assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'" @@ -349,12 +351,12 @@ class Exporter: task=self.model.task, imgsz=self.imgsz[0], augment=False, - batch_size=self.args.batch, + batch_size=self.args.batch * 2, # NOTE TensorRT INT8 calibration should use 2x batch size ) n = len(dataset) if n < 300: LOGGER.warning(f"{prefix} WARNING ⚠️ >300 images recommended for INT8 calibration, found {n} images.") - return build_dataloader(dataset, batch=self.args.batch, workers=0) # required for batch loading + return build_dataloader(dataset, batch=self.args.batch * 2, workers=0) # required for batch loading @try_export def export_torchscript(self, prefix=colorstr("TorchScript:")): @@ -679,6 +681,7 @@ class Exporter: import tensorrt as trt # noqa check_version(trt.__version__, "7.0.0", hard=True) # require tensorrt>=7.0.0 + # Setup and checks LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...") is_trt10 = int(trt.__version__.split(".")[0]) >= 10 # is TensorRT >= 10 assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}" @@ -687,6 +690,7 @@ class Exporter: if self.args.verbose: logger.min_severity = trt.Logger.Severity.VERBOSE + # Engine builder builder = trt.Builder(logger) config = builder.create_builder_config() workspace = int(self.args.workspace * (1 << 30)) @@ -696,10 +700,14 @@ class Exporter: config.max_workspace_size = workspace flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(flag) + half = builder.platform_has_fast_fp16 and self.args.half + int8 = builder.platform_has_fast_int8 and self.args.int8 + # Read ONNX file parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(f_onnx): raise RuntimeError(f"failed to load ONNX file: {f_onnx}") + # Network inputs inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] for inp in inputs: @@ -713,15 +721,67 @@ class Exporter: LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'") profile = builder.create_optimization_profile() min_shape = (1, shape[1], 32, 32) # minimum input shape - opt_shape = (max(1, shape[0] // 2), *shape[1:]) # optimal input shape max_shape = (*shape[:2], *(max(1, self.args.workspace) * d for d in shape[2:])) # max input shape for inp in inputs: - profile.set_shape(inp.name, min_shape, opt_shape, max_shape) + profile.set_shape(inp.name, min=min_shape, opt=shape, max=max_shape) config.add_optimization_profile(profile) - half = builder.platform_has_fast_fp16 and self.args.half - LOGGER.info(f"{prefix} building FP{16 if half else 32} engine as {f}") - if half: + LOGGER.info(f"{prefix} building {'INT8' if int8 else 'FP' + ('16' if half else '32')} engine as {f}") + if int8: + config.set_flag(trt.BuilderFlag.INT8) + config.set_calibration_profile(profile) + config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED + + class EngineCalibrator(trt.IInt8Calibrator): + def __init__( + self, + dataset, # ultralytics.data.build.InfiniteDataLoader + batch: int, + cache: str = "", + ) -> None: + trt.IInt8Calibrator.__init__(self) + self.dataset = dataset + self.data_iter = iter(dataset) + self.algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2 + self.batch = batch + self.cache = Path(cache) + + def get_algorithm(self) -> trt.CalibrationAlgoType: + """Get the calibration algorithm to use.""" + return self.algo + + def get_batch_size(self) -> int: + """Get the batch size to use for calibration.""" + return self.batch or 1 + + def get_batch(self, names) -> list: + """Get the next batch to use for calibration, as a list of device memory pointers.""" + try: + im0s = next(self.data_iter)["img"] / 255.0 + im0s = im0s.to("cuda") if im0s.device.type == "cpu" else im0s + return [int(im0s.data_ptr())] + except StopIteration: + # Return [] or None, signal to TensorRT there is no calibration data remaining + return None + + def read_calibration_cache(self) -> bytes: + """Use existing cache instead of calibrating again, otherwise, implicitly return None.""" + if self.cache.exists() and self.cache.suffix == ".cache": + return self.cache.read_bytes() + + def write_calibration_cache(self, cache) -> None: + """Write calibration cache to disk.""" + _ = self.cache.write_bytes(cache) + + # Load dataset w/ builder (for batching) and calibrate + dataset = self.get_int8_calibration_dataloader(prefix) + config.int8_calibrator = EngineCalibrator( + dataset=dataset, + batch=2 * self.args.batch, + cache=self.file.with_suffix(".cache"), + ) + + elif half: config.set_flag(trt.BuilderFlag.FP16) # Free CUDA memory