Merge branch 'main' into torch-prof

3 months ago · fa9f251ad0
parent 4c71b3d889 cece2ee2cf
commit fa9f251ad0
28 changed files with 361 additions and 524 deletions
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@ -172,7 +172,7 @@ jobs:
          fi
          if [[ "${{ matrix.tags }}" == "latest-python" ]]; then
            t=ultralytics/ultralytics:latest-jupyter
-            v=ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }}-jupyter
+            v=ultralytics/ultralytics:${{ steps.get_version.outputs.version }}-jupyter
            docker build -f docker/Dockerfile-jupyter -t $t -t $v .
            docker push $t
            if [[ "${{ steps.check_tag.outputs.new_release }}" == "true" ]]; then
--- a/.gitignore
+++ b/.gitignore
@ -163,6 +163,7 @@ weights/
 *_openvino_model/
 *_paddle_model/
 *_ncnn_model/
+*_imx_model/
 pnnx*

 # Autogenerated files for tests
--- a/docs/en/guides/analytics.md
+++ b/docs/en/guides/analytics.md
@ -45,126 +45,15 @@ This guide provides a comprehensive overview of three fundamental types of [data

        # generate the pie chart
        yolo solutions analytics analytics_type="pie" show=True
-        ```
-
-    === "Python"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        out = cv2.VideoWriter(
-            "ultralytics_analytics.avi",
-            cv2.VideoWriter_fourcc(*"MJPG"),
-            fps,
-            (1920, 1080),  # This is fixed
-        )

-        analytics = solutions.Analytics(
-            analytics_type="line",
-            show=True,
-        )
+        # generate the bar plots
+        yolo solutions analytics analytics_type="bar" show=True

-        frame_count = 0
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if success:
-                frame_count += 1
-                im0 = analytics.process_data(im0, frame_count)  # update analytics graph every frame
-                out.write(im0)  # write the video file
-            else:
-                break
-
-        cap.release()
-        out.release()
-        cv2.destroyAllWindows()
+        # generate the area plots
+        yolo solutions analytics analytics_type="area" show=True
        ```

-    === "Pie Chart"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        out = cv2.VideoWriter(
-            "ultralytics_analytics.avi",
-            cv2.VideoWriter_fourcc(*"MJPG"),
-            fps,
-            (1920, 1080),  # This is fixed
-        )
-
-        analytics = solutions.Analytics(
-            analytics_type="pie",
-            show=True,
-        )
-
-        frame_count = 0
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if success:
-                frame_count += 1
-                im0 = analytics.process_data(im0, frame_count)  # update analytics graph every frame
-                out.write(im0)  # write the video file
-            else:
-                break
-
-        cap.release()
-        out.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Bar Plot"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        out = cv2.VideoWriter(
-            "ultralytics_analytics.avi",
-            cv2.VideoWriter_fourcc(*"MJPG"),
-            fps,
-            (1920, 1080),  # This is fixed
-        )
-
-        analytics = solutions.Analytics(
-            analytics_type="bar",
-            show=True,
-        )
-
-        frame_count = 0
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if success:
-                frame_count += 1
-                im0 = analytics.process_data(im0, frame_count)  # update analytics graph every frame
-                out.write(im0)  # write the video file
-            else:
-                break
-
-        cap.release()
-        out.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Area chart"
+    === "Python"

        ```python
        import cv2
@ -173,9 +62,9 @@ This guide provides a comprehensive overview of three fundamental types of [data

        cap = cv2.VideoCapture("Path/to/video/file.mp4")
        assert cap.isOpened(), "Error reading video file"
-
        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

+        # Video writer
        out = cv2.VideoWriter(
            "ultralytics_analytics.avi",
            cv2.VideoWriter_fourcc(*"MJPG"),
@ -183,11 +72,15 @@ This guide provides a comprehensive overview of three fundamental types of [data
            (1920, 1080),  # This is fixed
        )

+        # Init analytics
        analytics = solutions.Analytics(
-            analytics_type="area",
-            show=True,
+            show=True,  # Display the output
+            analytics_type="line",  # Pass the analytics type, could be "pie", "bar" or "area".
+            model="yolo11n.pt",  # Path to the YOLO11 model file
+            # classes=[0, 2],  # If you want to count specific classes i.e person and car with COCO pretrained model.
        )

+        # Process video
        frame_count = 0
        while cap.isOpened():
            success, im0 = cap.read()
--- a/docs/en/guides/distance-calculation.md
+++ b/docs/en/guides/distance-calculation.md
@ -55,6 +55,7 @@ Measuring the gap between two objects is known as distance calculation within a
        # Init distance-calculation obj
        distance = solutions.DistanceCalculation(model="yolo11n.pt", show=True)

+        # Process video
        while cap.isOpened():
            success, im0 = cap.read()
            if not success:
--- a/docs/en/guides/heatmaps.md
+++ b/docs/en/guides/heatmaps.md
@ -47,119 +47,12 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult

        # Pass a custom colormap
        yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO
-        ```
-
-    === "Python"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Line Counting"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

-        # line for object counting
-        line_points = [(20, 400), (1080, 404)]
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-            region=line_points,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
+        # Heatmaps + object counting
+        yolo solutions heatmap region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
        ```

-    === "Polygon Counting"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Define polygon points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-            region=region_points,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Region Counting"
+    === "Python"

        ```python
        import cv2
@ -173,51 +66,24 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult
        # Video writer
        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

-        # Define region points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-            region=region_points,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Specific Classes"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+        # In case you want to apply object counting + heatmaps, you can pass region points.
+        # region_points = [(20, 400), (1080, 404)]  # Define line points
+        # region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]  # Define region points
+        # region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]  # Define polygon points

        # Init heatmap
        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            classes=[0, 2],
+            show=True,  # Display the output
+            model="yolo11n.pt",  # Path to the YOLO11 model file
+            colormap=cv2.COLORMAP_PARULA,  # Colormap of heatmap
+            # region=region_points,  # If you want to do object counting with heatmaps, you can pass region_points
+            # classes=[0, 2],  # If you want to generate heatmap for specific classes i.e person and car.
+            # show_in=True,  # Display in counts
+            # show_out=True,  # Display out counts
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
        )

+        # Process video
        while cap.isOpened():
            success, im0 = cap.read()
            if not success:
--- a/docs/en/guides/object-counting.md
+++ b/docs/en/guides/object-counting.md
@ -73,165 +73,22 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly
        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

        # Define region points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        # region_points = [(20, 400), (1080, 400)]  # For line counting
+        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]  # For rectangle region counting
+        # region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]  # For polygon region counting

        # Video writer
        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

        # Init Object Counter
        counter = solutions.ObjectCounter(
-            show=True,
-            region=region_points,
-            model="yolo11n.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "OBB Object Counting"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # line or region points
-        line_points = [(20, 400), (1080, 400)]
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            region=line_points,
-            model="yolo11n-obb.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Count in Polygon"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Define region points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            region=region_points,
-            model="yolo11n.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Count in Line"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Define region points
-        line_points = [(20, 400), (1080, 400)]
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            region=line_points,
-            model="yolo11n.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Specific Classes"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            model="yolo11n.pt",
-            classes=[0, 1],
+            show=True,  # Display the output
+            region=region_points,  # Pass region points
+            model="yolo11n.pt",  # model="yolo11n-obb.pt" for object counting using YOLO11 OBB model.
+            # classes=[0, 2],  # If you want to count specific classes i.e person and car with COCO pretrained model.
+            # show_in=True,  # Display in counts
+            # show_out=True,  # Display out counts
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
        )

        # Process video
--- a/docs/en/guides/queue-management.md
+++ b/docs/en/guides/queue-management.md
@ -60,53 +60,23 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra
        assert cap.isOpened(), "Error reading video file"
        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

+        # Video writer
        video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

-        queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        # Define queue region points
+        queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]  # Define queue region points
+        # queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]  # Define queue polygon points

+        # Init Queue Manager
        queue = solutions.QueueManager(
-            model="yolo11n.pt",
-            region=queue_region,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-
-            if success:
-                out = queue.process_queue(im0)
-                video_writer.write(im0)
-                if cv2.waitKey(1) & 0xFF == ord("q"):
-                    break
-                continue
-
-            print("Video frame is empty or video processing has been successfully completed.")
-            break
-
-        cap.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Queue Manager Specific Classes"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
-        queue = solutions.QueueManager(
-            model="yolo11n.pt",
-            classes=3,
+            show=True,  # Display the output
+            model="yolo11n.pt",  # Path to the YOLO11 model file
+            region=queue_region,  # Pass queue region points
+            # classes=[0, 2],  # If you want to count specific classes i.e person and car with COCO pretrained model.
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
        )

+        # Process video
        while cap.isOpened():
            success, im0 = cap.read()

--- a/docs/en/guides/speed-estimation.md
+++ b/docs/en/guides/speed-estimation.md
@ -61,16 +61,24 @@ keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision
        from ultralytics import solutions

        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-
        assert cap.isOpened(), "Error reading video file"
        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

+        # Video writer
        video_writer = cv2.VideoWriter("speed_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

+        # Define speed region points
        speed_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]

-        speed = solutions.SpeedEstimator(model="yolo11n.pt", region=speed_region, show=True)
+        speed = solutions.SpeedEstimator(
+            show=True,  # Display the output
+            model="yolo11n-pose.pt",  # Path to the YOLO11 model file.
+            region=speed_region,  # Pass region points
+            # classes=[0, 2],  # If you want to estimate speed of specific classes.
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
+        )

+        # Process video
        while cap.isOpened():
            success, im0 = cap.read()

--- a/docs/en/guides/streamlit-live-inference.md
+++ b/docs/en/guides/streamlit-live-inference.md
@ -40,6 +40,12 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb

 !!! example "Streamlit Application"

+    === "CLI"
+
+        ```bash
+        yolo streamlit-predict
+        ```
+
    === "Python"

        ```python
@ -50,12 +56,6 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
        ### Make sure to run the file using command `streamlit run <file-name.py>`
        ```

-    === "CLI"
-
-        ```bash
-        yolo streamlit-predict
-        ```
-
 This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLO11 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection.

 You can optionally supply a specific model in Python:
--- a/docs/en/guides/workouts-monitoring.md
+++ b/docs/en/guides/workouts-monitoring.md
@ -60,40 +60,18 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi
        assert cap.isOpened(), "Error reading video file"
        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

-        gym = solutions.AIGym(
-            model="yolo11n-pose.pt",
-            show=True,
-            kpts=[6, 8, 10],
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = gym.monitor(im0)
-
-        cv2.destroyAllWindows()
-        ```
-
-    === "Workouts Monitoring with Save Output"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
+        # Video writer
        video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

+        # Init AIGym
        gym = solutions.AIGym(
-            show=True,
-            kpts=[6, 8, 10],
+            show=True,  # Display the frame
+            kpts=[6, 8, 10],  # keypoints index of person for monitoring specific exercise, by default it's for pushup
+            model="yolo11n-pose.pt",  # Path to the YOLO11 pose estimation model file
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
        )

+        # Process video
        while cap.isOpened():
            success, im0 = cap.read()
            if not success:
--- a/docs/en/integrations/index.md
+++ b/docs/en/integrations/index.md
@ -61,6 +61,8 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of

 - [Albumentations](albumentations.md): Enhance your Ultralytics models with powerful image augmentations to improve model robustness and generalization.

+- [SONY IMX500](sony-imx500.md): Optimize and deploy [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) models on Raspberry Pi AI Cameras with the IMX500 sensor for fast, low-power performance.
+
 ## Deployment Integrations

 - [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment).
--- a/docs/en/integrations/sony-imx500.md
+++ b/docs/en/integrations/sony-imx500.md
@ -4,7 +4,7 @@ description: Learn to export Ultralytics YOLOv8 models to Sony's IMX500 format t
 keywords: Sony, IMX500, IMX 500, Atrios, MCT, model export, quantization, pruning, deep learning optimization, Raspberry Pi AI Camera, edge AI, PyTorch, IMX
 ---

-# IMX500 Export for Ultralytics YOLOv8
+# Sony IMX500 Export for Ultralytics YOLOv8

 This guide covers exporting and deploying Ultralytics YOLOv8 models to Raspberry Pi AI Cameras that feature the Sony IMX500 sensor.

--- a/docs/en/macros/export-table.md
+++ b/docs/en/macros/export-table.md
@ -14,3 +14,4 @@
 | [PaddlePaddle](../integrations/paddlepaddle.md)   | `paddle`          | `{{ model_name or "yolo11n" }}_paddle_model/`   | ✅       | `imgsz`, `batch`                                                     |
 | [MNN](../integrations/mnn.md)                     | `mnn`             | `{{ model_name or "yolo11n" }}.mnn`             | ✅       | `imgsz`, `batch`, `int8`, `half`                                     |
 | [NCNN](../integrations/ncnn.md)                   | `ncnn`            | `{{ model_name or "yolo11n" }}_ncnn_model/`     | ✅       | `imgsz`, `half`, `batch`                                             |
+| [IMX500](../integrations/sony-imx500.md)          | `imx`             | `{{ model_name or "yolo11n" }}_imx_model/`      | ✅       | `imgsz`, `int8`                                                      |
--- a/docs/en/reference/utils/torch_utils.md
+++ b/docs/en/reference/utils/torch_utils.md
@ -19,6 +19,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere

 <br><br><hr><br>

+## ::: ultralytics.utils.torch_utils.FXModel
+
+<br><br><hr><br>
+
 ## ::: ultralytics.utils.torch_utils.torch_distributed_zero_first

 <br><br><hr><br>
--- a/docs/mkdocs_github_authors.yaml
+++ b/docs/mkdocs_github_authors.yaml
@ -109,6 +109,9 @@ chr043416@gmail.com:
 davis.justin@mssm.org:
  avatar: https://avatars.githubusercontent.com/u/23462437?v=4
  username: justincdavis
+francesco.mttl@gmail.com:
+  avatar: https://avatars.githubusercontent.com/u/3855193?v=4
+  username: ambitious-octopus
 glenn.jocher@ultralytics.com:
  avatar: https://avatars.githubusercontent.com/u/26833433?v=4
  username: glenn-jocher
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -412,12 +412,14 @@ nav:
      - TF.js: integrations/tfjs.md
      - TFLite: integrations/tflite.md
      - TFLite Edge TPU: integrations/edge-tpu.md
+      - Sony IMX500: integrations/sony-imx500.md
      - TensorBoard: integrations/tensorboard.md
      - TensorRT: integrations/tensorrt.md
      - TorchScript: integrations/torchscript.md
      - VS Code: integrations/vscode.md
      - Weights & Biases: integrations/weights-biases.md
      - Albumentations: integrations/albumentations.md
+      - SONY IMX500: integrations/sony-imx500.md
  - HUB:
      - hub/index.md
      - Web:
@ -559,7 +561,6 @@ nav:
              - utils: reference/nn/modules/utils.md
          - tasks: reference/nn/tasks.md
      - solutions:
-          - solutions: reference/solutions/solutions.md
          - ai_gym: reference/solutions/ai_gym.md
          - analytics: reference/solutions/analytics.md
          - distance_calculation: reference/solutions/distance_calculation.md
@ -567,6 +568,7 @@ nav:
          - object_counter: reference/solutions/object_counter.md
          - parking_management: reference/solutions/parking_management.md
          - queue_management: reference/solutions/queue_management.md
+          - solutions: reference/solutions/solutions.md
          - speed_estimation: reference/solutions/speed_estimation.md
          - streamlit_inference: reference/solutions/streamlit_inference.md
      - trackers:
--- a/tests/test_exports.py
+++ b/tests/test_exports.py
@ -205,3 +205,12 @@ def test_export_ncnn():
    """Test YOLO exports to NCNN format."""
    file = YOLO(MODEL).export(format="ncnn", imgsz=32)
    YOLO(file)(SOURCE, imgsz=32)  # exported model inference
+
+
+@pytest.mark.skipif(True, reason="Test disabled as keras and tensorflow version conflicts with tflite export.")
+@pytest.mark.skipif(not LINUX or MACOS, reason="Skipping test on Windows and Macos")
+def test_export_imx():
+    """Test YOLOv8n exports to IMX format."""
+    model = YOLO("yolov8n.pt")
+    file = model.export(format="imx", imgsz=32)
+    YOLO(file)(SOURCE, imgsz=32)
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-__version__ = "8.3.28"
+__version__ = "8.3.29"

 import os

--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@ -18,6 +18,7 @@ TensorFlow.js           | `tfjs`                    | yolo11n_web_model/
 PaddlePaddle            | `paddle`                  | yolo11n_paddle_model/
 MNN                     | `mnn`                     | yolo11n.mnn
 NCNN                    | `ncnn`                    | yolo11n_ncnn_model/
+IMX                     | `imx`                     | yolo11n_imx_model/

 Requirements:
    $ pip install "ultralytics[export]"
@ -44,6 +45,7 @@ Inference:
                         yolo11n_paddle_model       # PaddlePaddle
                         yolo11n.mnn                # MNN
                         yolo11n_ncnn_model         # NCNN
+                         yolo11n_imx_model          # IMX

 TensorFlow.js:
    $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
@ -94,7 +96,7 @@ from ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requ
 from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download
 from ultralytics.utils.files import file_size, spaces_in_path
 from ultralytics.utils.ops import Profile
-from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device, smart_inference_mode
+from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device


 def export_formats():
@ -114,6 +116,7 @@ def export_formats():
        ["PaddlePaddle", "paddle", "_paddle_model", True, True],
        ["MNN", "mnn", ".mnn", True, True],
        ["NCNN", "ncnn", "_ncnn_model", True, True],
+        ["IMX", "imx", "_imx_model", True, True],
    ]
    return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x)))

@ -171,7 +174,6 @@ class Exporter:
        self.callbacks = _callbacks or callbacks.get_default_callbacks()
        callbacks.add_integration_callbacks(self)

-    @smart_inference_mode()
    def __call__(self, model=None) -> str:
        """Returns list of exported files/dirs after running callbacks."""
        self.run_callbacks("on_export_start")
@ -194,9 +196,22 @@ class Exporter:
        flags = [x == fmt for x in fmts]
        if sum(flags) != 1:
            raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
-        jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, mnn, ncnn = (
-            flags  # export booleans
-        )
+        (
+            jit,
+            onnx,
+            xml,
+            engine,
+            coreml,
+            saved_model,
+            pb,
+            tflite,
+            edgetpu,
+            tfjs,
+            paddle,
+            mnn,
+            ncnn,
+            imx,
+        ) = flags  # export booleans
        is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))

        # Device
@ -210,6 +225,9 @@ class Exporter:
        self.device = select_device("cpu" if self.args.device is None else self.args.device)

        # Checks
+        if imx and not self.args.int8:
+            LOGGER.warning("WARNING ⚠️ IMX only supports int8 export, setting int8=True.")
+            self.args.int8 = True
        if not hasattr(model, "names"):
            model.names = default_class_names()
        model.names = check_class_names(model.names)
@ -249,6 +267,7 @@ class Exporter:
            )
        if mnn and (IS_RASPBERRYPI or IS_JETSON):
            raise SystemError("MNN export not supported on Raspberry Pi and NVIDIA Jetson")
+
        # Input
        im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
        file = Path(
@ -264,6 +283,11 @@ class Exporter:
        model.eval()
        model.float()
        model = model.fuse()
+
+        if imx:
+            from ultralytics.utils.torch_utils import FXModel
+
+            model = FXModel(model)
        for m in model.modules():
            if isinstance(m, (Detect, RTDETRDecoder)):  # includes all Detect subclasses like Segment, Pose, OBB
                m.dynamic = self.args.dynamic
@ -273,6 +297,15 @@ class Exporter:
            elif isinstance(m, C2f) and not is_tf_format:
                # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
                m.forward = m.forward_split
+            if isinstance(m, Detect) and imx:
+                from ultralytics.utils.tal import make_anchors
+
+                m.anchors, m.strides = (
+                    x.transpose(0, 1)
+                    for x in make_anchors(
+                        torch.cat([s / m.stride.unsqueeze(-1) for s in self.imgsz], dim=1), m.stride, 0.5
+                    )
+                )

        y = None
        for _ in range(2):
@ -347,6 +380,8 @@ class Exporter:
            f[11], _ = self.export_mnn()
        if ncnn:  # NCNN
            f[12], _ = self.export_ncnn()
+        if imx:
+            f[13], _ = self.export_imx()

        # Finish
        f = [str(x) for x in f if x]  # filter out '' and None
@ -1068,6 +1103,137 @@ class Exporter:
        yaml_save(Path(f) / "metadata.yaml", self.metadata)  # add metadata.yaml
        return f, None

+    @try_export
+    def export_imx(self, prefix=colorstr("IMX:")):
+        """YOLO IMX export."""
+        gptq = False
+        assert LINUX, "export only supported on Linux. See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
+        if getattr(self.model, "end2end", False):
+            raise ValueError("IMX export is not supported for end2end models.")
+        if "C2f" not in self.model.__str__():
+            raise ValueError("IMX export is only supported for YOLOv8 detection models")
+        check_requirements(("model-compression-toolkit==2.1.1", "sony-custom-layers==0.2.0", "tensorflow==2.12.0"))
+        check_requirements("imx500-converter[pt]==3.14.3")  # Separate requirements for imx500-converter
+
+        import model_compression_toolkit as mct
+        import onnx
+        from sony_custom_layers.pytorch.object_detection.nms import multiclass_nms
+
+        try:
+            out = subprocess.run(
+                ["java", "--version"], check=True, capture_output=True
+            )  # Java 17 is required for imx500-converter
+            if "openjdk 17" not in str(out.stdout):
+                raise FileNotFoundError
+        except FileNotFoundError:
+            subprocess.run(["sudo", "apt", "install", "-y", "openjdk-17-jdk", "openjdk-17-jre"], check=True)
+
+        def representative_dataset_gen(dataloader=self.get_int8_calibration_dataloader(prefix)):
+            for batch in dataloader:
+                img = batch["img"]
+                img = img / 255.0
+                yield [img]
+
+        tpc = mct.get_target_platform_capabilities(
+            fw_name="pytorch", target_platform_name="imx500", target_platform_version="v1"
+        )
+
+        config = mct.core.CoreConfig(
+            mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10),
+            quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True),
+        )
+
+        resource_utilization = mct.core.ResourceUtilization(weights_memory=3146176 * 0.76)
+
+        quant_model = (
+            mct.gptq.pytorch_gradient_post_training_quantization(  # Perform Gradient-Based Post Training Quantization
+                model=self.model,
+                representative_data_gen=representative_dataset_gen,
+                target_resource_utilization=resource_utilization,
+                gptq_config=mct.gptq.get_pytorch_gptq_config(n_epochs=1000, use_hessian_based_weights=False),
+                core_config=config,
+                target_platform_capabilities=tpc,
+            )[0]
+            if gptq
+            else mct.ptq.pytorch_post_training_quantization(  # Perform post training quantization
+                in_module=self.model,
+                representative_data_gen=representative_dataset_gen,
+                target_resource_utilization=resource_utilization,
+                core_config=config,
+                target_platform_capabilities=tpc,
+            )[0]
+        )
+
+        class NMSWrapper(torch.nn.Module):
+            def __init__(
+                self,
+                model: torch.nn.Module,
+                score_threshold: float = 0.001,
+                iou_threshold: float = 0.7,
+                max_detections: int = 300,
+            ):
+                """
+                Wrapping PyTorch Module with multiclass_nms layer from sony_custom_layers.
+
+                Args:
+                    model (nn.Module): Model instance.
+                    score_threshold (float): Score threshold for non-maximum suppression.
+                    iou_threshold (float): Intersection over union threshold for non-maximum suppression.
+                    max_detections (float): The number of detections to return.
+                """
+                super().__init__()
+                self.model = model
+                self.score_threshold = score_threshold
+                self.iou_threshold = iou_threshold
+                self.max_detections = max_detections
+
+            def forward(self, images):
+                # model inference
+                outputs = self.model(images)
+
+                boxes = outputs[0]
+                scores = outputs[1]
+                nms = multiclass_nms(
+                    boxes=boxes,
+                    scores=scores,
+                    score_threshold=self.score_threshold,
+                    iou_threshold=self.iou_threshold,
+                    max_detections=self.max_detections,
+                )
+                return nms
+
+        quant_model = NMSWrapper(
+            model=quant_model,
+            score_threshold=self.args.conf or 0.001,
+            iou_threshold=self.args.iou,
+            max_detections=self.args.max_det,
+        ).to(self.device)
+
+        f = Path(str(self.file).replace(self.file.suffix, "_imx_model"))
+        f.mkdir(exist_ok=True)
+        onnx_model = f / Path(str(self.file).replace(self.file.suffix, "_imx.onnx"))  # js dir
+        mct.exporter.pytorch_export_model(
+            model=quant_model, save_model_path=onnx_model, repr_dataset=representative_dataset_gen
+        )
+
+        model_onnx = onnx.load(onnx_model)  # load onnx model
+        for k, v in self.metadata.items():
+            meta = model_onnx.metadata_props.add()
+            meta.key, meta.value = k, str(v)
+
+        onnx.save(model_onnx, onnx_model)
+
+        subprocess.run(
+            ["imxconv-pt", "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"],
+            check=True,
+        )
+
+        # Needed for imx models.
+        with open(f / "labels.txt", "w") as file:
+            file.writelines([f"{name}\n" for _, name in self.model.names.items()])
+
+        return f, None
+
    def _add_tflite_metadata(self, file):
        """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
        import flatbuffers
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@ -155,8 +155,8 @@ class DetectionValidator(BaseValidator):
            # Evaluate
            if nl:
                stat["tp"] = self._process_batch(predn, bbox, cls)
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            if self.args.plots:
+                self.confusion_matrix.process_batch(predn, bbox, cls)
            for k in self.stats.keys():
                self.stats[k].append(stat[k])

--- a/ultralytics/models/yolo/pose/val.py
+++ b/ultralytics/models/yolo/pose/val.py
@ -138,8 +138,8 @@ class PoseValidator(DetectionValidator):
            if nl:
                stat["tp"] = self._process_batch(predn, bbox, cls)
                stat["tp_p"] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch["kpts"])
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            if self.args.plots:
+                self.confusion_matrix.process_batch(predn, bbox, cls)

            for k in self.stats.keys():
                self.stats[k].append(stat[k])
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@ -135,8 +135,8 @@ class SegmentationValidator(DetectionValidator):
                stat["tp_m"] = self._process_batch(
                    predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True
                )
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            if self.args.plots:
+                self.confusion_matrix.process_batch(predn, bbox, cls)

            for k in self.stats.keys():
                self.stats[k].append(stat[k])
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@ -123,6 +123,7 @@ class AutoBackend(nn.Module):
            paddle,
            mnn,
            ncnn,
+            imx,
            triton,
        ) = self._model_type(w)
        fp16 &= pt or jit or onnx or xml or engine or nn_module or triton  # FP16
@ -182,8 +183,8 @@ class AutoBackend(nn.Module):
            check_requirements("opencv-python>=4.5.4")
            net = cv2.dnn.readNetFromONNX(w)

-        # ONNX Runtime
-        elif onnx:
+        # ONNX Runtime and IMX
+        elif onnx or imx:
            LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
            check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
            if IS_RASPBERRYPI or IS_JETSON:
@ -199,7 +200,22 @@ class AutoBackend(nn.Module):
                device = torch.device("cpu")
                cuda = False
            LOGGER.info(f"Preferring ONNX Runtime {providers[0]}")
-            session = onnxruntime.InferenceSession(w, providers=providers)
+            if onnx:
+                session = onnxruntime.InferenceSession(w, providers=providers)
+            else:
+                check_requirements(
+                    ["model-compression-toolkit==2.1.1", "sony-custom-layers[torch]==0.2.0", "onnxruntime-extensions"]
+                )
+                w = next(Path(w).glob("*.onnx"))
+                LOGGER.info(f"Loading {w} for ONNX IMX inference...")
+                import mct_quantizers as mctq
+                from sony_custom_layers.pytorch.object_detection import nms_ort  # noqa
+
+                session = onnxruntime.InferenceSession(
+                    w, mctq.get_ort_session_options(), providers=["CPUExecutionProvider"]
+                )
+                task = "detect"
+
            output_names = [x.name for x in session.get_outputs()]
            metadata = session.get_modelmeta().custom_metadata_map
            dynamic = isinstance(session.get_outputs()[0].shape[0], str)
@ -520,7 +536,7 @@ class AutoBackend(nn.Module):
            y = self.net.forward()

        # ONNX Runtime
-        elif self.onnx:
+        elif self.onnx or self.imx:
            if self.dynamic:
                im = im.cpu().numpy()  # torch to numpy
                y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
@ -537,6 +553,9 @@ class AutoBackend(nn.Module):
                )
                self.session.run_with_iobinding(self.io)
                y = self.bindings
+            if self.imx:
+                # boxes, conf, cls
+                y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1)

        # OpenVINO
        elif self.xml:
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@ -240,7 +240,8 @@ class C2f(nn.Module):

    def forward_split(self, x):
        """Forward pass using split() instead of chunk()."""
-        y = list(self.cv1(x).split((self.c, self.c), 1))
+        y = self.cv1(x).split((self.c, self.c), 1)
+        y = [y[0], y[1]]
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -23,6 +23,7 @@ class Detect(nn.Module):

    dynamic = False  # force grid reconstruction
    export = False  # export mode
+    format = None  # export format
    end2end = False  # end2end
    max_det = 300  # max_det
    shape = None
@ -101,7 +102,7 @@ class Detect(nn.Module):
        # Inference path
        shape = x[0].shape  # BCHW
        x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
-        if self.dynamic or self.shape != shape:
+        if self.format != "imx" and (self.dynamic or self.shape != shape):
            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
            self.shape = shape

@ -119,6 +120,11 @@ class Detect(nn.Module):
            grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
            norm = self.strides / (self.stride[0] * grid_size)
            dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
+        elif self.export and self.format == "imx":
+            dbox = self.decode_bboxes(
+                self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
+            )
+            return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
        else:
            dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides

@ -137,9 +143,9 @@ class Detect(nn.Module):
                a[-1].bias.data[:] = 1.0  # box
                b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)

-    def decode_bboxes(self, bboxes, anchors):
+    def decode_bboxes(self, bboxes, anchors, xywh=True):
        """Decode bounding boxes."""
-        return dist2bbox(bboxes, anchors, xywh=not self.end2end, dim=1)
+        return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)

    @staticmethod
    def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@ -118,6 +118,11 @@ def benchmark(
                assert not IS_JETSON, "MNN export not supported on NVIDIA Jetson"
            if i == 13:  # NCNN
                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
+            if i == 14:  # IMX
+                assert not is_end2end
+                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
+                assert model.task == "detect", "IMX only supported for detection task"
+                assert "C2f" in model.__str__(), "IMX only supported for YOLOv8"
            if "cpu" in device.type:
                assert cpu, "inference not supported on CPU"
            if "cuda" in device.type:
--- a/ultralytics/utils/tal.py
+++ b/ultralytics/utils/tal.py
@ -306,7 +306,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
    assert feats is not None
    dtype, device = feats[0].dtype, feats[0].device
    for i, stride in enumerate(strides):
-        _, _, h, w = feats[i].shape
+        h, w = feats[i].shape[2:] if isinstance(feats, list) else (int(feats[i][0]), int(feats[i][1]))
        sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
        sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
        sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@ -730,3 +730,48 @@ class EarlyStopping:
                f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping."
            )
        return stop
+
+
+class FXModel(nn.Module):
+    """
+    A custom model class for torch.fx compatibility.
+
+    This class extends `torch.nn.Module` and is designed to ensure compatibility with torch.fx for tracing and graph manipulation.
+    It copies attributes from an existing model and explicitly sets the model attribute to ensure proper copying.
+
+    Args:
+        model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+    """
+
+    def __init__(self, model):
+        """
+        Initialize the FXModel.
+
+        Args:
+            model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+        """
+        super().__init__()
+        copy_attr(self, model)
+        # Explicitly set `model` since `copy_attr` somehow does not copy it.
+        self.model = model.model
+
+    def forward(self, x):
+        """
+        Forward pass through the model.
+
+        This method performs the forward pass through the model, handling the dependencies between layers and saving intermediate outputs.
+
+        Args:
+            x (torch.Tensor): The input tensor to the model.
+
+        Returns:
+            (torch.Tensor): The output tensor from the model.
+        """
+        y = []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                # from earlier layers
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]
+            x = m(x)  # run
+            y.append(x)  # save output
+        return x