From 5b09fff3cd7621626b8a76a8b0604c5ce6e40e3b Mon Sep 17 00:00:00 2001
From: triple-Mu <gpu@163.com>
Date: Tue, 11 Jul 2023 16:21:31 +0800
Subject: [PATCH 1/3] Support yolov8 original pose model.

---
 config.py                   |  15 +++++
 infer-det-without-torch.py  |   4 ++
 infer-det.py                |   4 ++
 infer-pose-without-torch.py | 116 ++++++++++++++++++++++++++++++++++++
 infer-pose.py               | 112 ++++++++++++++++++++++++++++++++++
 infer-seg-without-torch.py  |   4 ++
 infer-seg.py                |   7 +--
 models/torch_utils.py       |  38 ++++++++++--
 models/utils.py             |  36 +++++++++++
 9 files changed, 327 insertions(+), 9 deletions(-)
 create mode 100644 infer-pose-without-torch.py
 create mode 100644 infer-pose.py

diff --git a/config.py b/config.py
index e5b4e7b..6a0e315 100644
--- a/config.py
+++ b/config.py
@@ -36,5 +36,20 @@ MASK_COLORS = np.array([(255, 56, 56), (255, 157, 151), (255, 112, 31),
                         (255, 149, 200), (255, 55, 199)],
                        dtype=np.float32) / 255.
 
+KPS_COLORS = [[0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
+              [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
+              [255, 128, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255],
+              [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255]]
+
+SKELETON = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13],
+            [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3],
+            [2, 4], [3, 5], [4, 6], [5, 7]]
+
+LIMB_COLORS = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255],
+               [255, 51, 255], [255, 51, 255], [255, 51, 255], [255, 128, 0],
+               [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
+               [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
+               [0, 255, 0], [0, 255, 0]]
+
 # alpha for segment masks
 ALPHA = 0.5
diff --git a/infer-det-without-torch.py b/infer-det-without-torch.py
index a38eecd..81055ce 100644
--- a/infer-det-without-torch.py
+++ b/infer-det-without-torch.py
@@ -38,6 +38,10 @@ def main(args: argparse.Namespace) -> None:
         data = Engine(tensor)
 
         bboxes, scores, labels = det_postprocess(data)
+        if bboxes.size == 0:
+            # if no bounding box
+            print(f'{image}: no object!')
+            continue
         bboxes -= dwdh
         bboxes /= ratio
 
diff --git a/infer-det.py b/infer-det.py
index 55c50e9..ab9586d 100644
--- a/infer-det.py
+++ b/infer-det.py
@@ -37,6 +37,10 @@ def main(args: argparse.Namespace) -> None:
         data = Engine(tensor)
 
         bboxes, scores, labels = det_postprocess(data)
+        if bboxes.numel() == 0:
+            # if no bounding box
+            print(f'{image}: no object!')
+            continue
         bboxes -= dwdh
         bboxes /= ratio
 
diff --git a/infer-pose-without-torch.py b/infer-pose-without-torch.py
new file mode 100644
index 0000000..7b89a84
--- /dev/null
+++ b/infer-pose-without-torch.py
@@ -0,0 +1,116 @@
+import argparse
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+from config import COLORS, KPS_COLORS, LIMB_COLORS, SKELETON
+from models.utils import blob, letterbox, path_to_list, pose_postprocess
+
+
+def main(args: argparse.Namespace) -> None:
+    if args.method == 'cudart':
+        from models.cudart_api import TRTEngine
+    elif args.method == 'pycuda':
+        from models.pycuda_api import TRTEngine
+    else:
+        raise NotImplementedError
+
+    Engine = TRTEngine(args.engine)
+    H, W = Engine.inp_info[0].shape[-2:]
+
+    images = path_to_list(args.imgs)
+    save_path = Path(args.out_dir)
+
+    if not args.show and not save_path.exists():
+        save_path.mkdir(parents=True, exist_ok=True)
+
+    for image in images:
+        save_image = save_path / image.name
+        bgr = cv2.imread(str(image))
+        draw = bgr.copy()
+        bgr, ratio, dwdh = letterbox(bgr, (W, H))
+        dw, dh = int(dwdh[0]), int(dwdh[1])
+        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
+        tensor = blob(rgb, return_seg=False)
+        dwdh = np.array(dwdh * 2, dtype=np.float32)
+        tensor = np.ascontiguousarray(tensor)
+        # inference
+        data = Engine(tensor)
+
+        bboxes, scores, kpts = pose_postprocess(data, args.conf_thres,
+                                                args.iou_thres)
+        if bboxes.size == 0:
+            # if no bounding box
+            print(f'{image}: no object!')
+            continue
+        bboxes -= dwdh
+        bboxes /= ratio
+
+        for (bbox, score, kpt) in zip(bboxes, scores, kpts):
+            bbox = bbox.round().astype(np.int32).tolist()
+            color = COLORS['person']
+            cv2.rectangle(draw, bbox[:2], bbox[2:], color, 2)
+            cv2.putText(draw,
+                        f'person:{score:.3f}', (bbox[0], bbox[1] - 2),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.75, [225, 255, 255],
+                        thickness=2)
+            for i in range(19):
+                if i < 17:
+                    px, py, ps = kpt[i]
+                    if ps > 0.5:
+                        kcolor = KPS_COLORS[i]
+                        px = round(float(px - dw) / ratio)
+                        py = round(float(py - dh) / ratio)
+                        cv2.circle(draw, (px, py), 5, kcolor, -1)
+                xi, yi = SKELETON[i]
+                pos1_s = kpt[xi - 1][2]
+                pos2_s = kpt[yi - 1][2]
+                if pos1_s > 0.5 and pos2_s > 0.5:
+                    limb_color = LIMB_COLORS[i]
+                    pos1_x = round(float(kpt[xi - 1][0] - dw) / ratio)
+                    pos1_y = round(float(kpt[xi - 1][1] - dh) / ratio)
+
+                    pos2_x = round(float(kpt[yi - 1][0] - dw) / ratio)
+                    pos2_y = round(float(kpt[yi - 1][1] - dh) / ratio)
+
+                    cv2.line(draw, (pos1_x, pos1_y), (pos2_x, pos2_y),
+                             limb_color, 2)
+        if args.show:
+            cv2.imshow('result', draw)
+            cv2.waitKey(0)
+        else:
+            cv2.imwrite(str(save_image), draw)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--engine', type=str, help='Engine file')
+    parser.add_argument('--imgs', type=str, help='Images file')
+    parser.add_argument('--show',
+                        action='store_true',
+                        help='Show the detection results')
+    parser.add_argument('--out-dir',
+                        type=str,
+                        default='./output',
+                        help='Path to output file')
+    parser.add_argument('--conf-thres',
+                        type=float,
+                        default=0.25,
+                        help='Confidence threshold')
+    parser.add_argument('--iou-thres',
+                        type=float,
+                        default=0.65,
+                        help='Confidence threshold')
+    parser.add_argument('--method',
+                        type=str,
+                        default='cudart',
+                        help='CUDART pipeline')
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
diff --git a/infer-pose.py b/infer-pose.py
new file mode 100644
index 0000000..0ba0679
--- /dev/null
+++ b/infer-pose.py
@@ -0,0 +1,112 @@
+from models import TRTModule  # isort:skip
+import argparse
+from pathlib import Path
+
+import cv2
+import torch
+
+from config import COLORS, KPS_COLORS, LIMB_COLORS, SKELETON
+from models.torch_utils import pose_postprocess
+from models.utils import blob, letterbox, path_to_list
+
+
+def main(args: argparse.Namespace) -> None:
+    device = torch.device(args.device)
+    Engine = TRTModule(args.engine, device)
+    H, W = Engine.inp_info[0].shape[-2:]
+
+    images = path_to_list(args.imgs)
+    save_path = Path(args.out_dir)
+
+    if not args.show and not save_path.exists():
+        save_path.mkdir(parents=True, exist_ok=True)
+
+    for image in images:
+        save_image = save_path / image.name
+        bgr = cv2.imread(str(image))
+        draw = bgr.copy()
+        bgr, ratio, dwdh = letterbox(bgr, (W, H))
+        dw, dh = int(dwdh[0]), int(dwdh[1])
+        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
+        tensor = blob(rgb, return_seg=False)
+        dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
+        tensor = torch.asarray(tensor, device=device)
+        # inference
+        data = Engine(tensor)
+
+        bboxes, scores, kpts = pose_postprocess(data, args.conf_thres,
+                                                args.iou_thres)
+        if bboxes.numel() == 0:
+            # if no bounding box
+            print(f'{image}: no object!')
+            continue
+        bboxes -= dwdh
+        bboxes /= ratio
+
+        for (bbox, score, kpt) in zip(bboxes, scores, kpts):
+            bbox = bbox.round().int().tolist()
+            color = COLORS['person']
+            cv2.rectangle(draw, bbox[:2], bbox[2:], color, 2)
+            cv2.putText(draw,
+                        f'person:{score:.3f}', (bbox[0], bbox[1] - 2),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.75, [225, 255, 255],
+                        thickness=2)
+            for i in range(19):
+                if i < 17:
+                    px, py, ps = kpt[i]
+                    if ps > 0.5:
+                        kcolor = KPS_COLORS[i]
+                        px = round(float(px - dw) / ratio)
+                        py = round(float(py - dh) / ratio)
+                        cv2.circle(draw, (px, py), 5, kcolor, -1)
+                xi, yi = SKELETON[i]
+                pos1_s = kpt[xi - 1][2]
+                pos2_s = kpt[yi - 1][2]
+                if pos1_s > 0.5 and pos2_s > 0.5:
+                    limb_color = LIMB_COLORS[i]
+                    pos1_x = round(float(kpt[xi - 1][0] - dw) / ratio)
+                    pos1_y = round(float(kpt[xi - 1][1] - dh) / ratio)
+
+                    pos2_x = round(float(kpt[yi - 1][0] - dw) / ratio)
+                    pos2_y = round(float(kpt[yi - 1][1] - dh) / ratio)
+
+                    cv2.line(draw, (pos1_x, pos1_y), (pos2_x, pos2_y),
+                             limb_color, 2)
+        if args.show:
+            cv2.imshow('result', draw)
+            cv2.waitKey(0)
+        else:
+            cv2.imwrite(str(save_image), draw)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--engine', type=str, help='Engine file')
+    parser.add_argument('--imgs', type=str, help='Images file')
+    parser.add_argument('--show',
+                        action='store_true',
+                        help='Show the detection results')
+    parser.add_argument('--out-dir',
+                        type=str,
+                        default='./output',
+                        help='Path to output file')
+    parser.add_argument('--conf-thres',
+                        type=float,
+                        default=0.25,
+                        help='Confidence threshold')
+    parser.add_argument('--iou-thres',
+                        type=float,
+                        default=0.65,
+                        help='Confidence threshold')
+    parser.add_argument('--device',
+                        type=str,
+                        default='cuda:0',
+                        help='TensorRT infer device')
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
diff --git a/infer-seg-without-torch.py b/infer-seg-without-torch.py
index f1923e3..fa08b62 100644
--- a/infer-seg-without-torch.py
+++ b/infer-seg-without-torch.py
@@ -41,6 +41,10 @@ def main(args: argparse.Namespace) -> None:
         seg_img = seg_img[dh:H - dh, dw:W - dw, [2, 1, 0]]
         bboxes, scores, labels, masks = seg_postprocess(
             data, bgr.shape[:2], args.conf_thres, args.iou_thres)
+        if bboxes.size == 0:
+            # if no bounding box
+            print(f'{image}: no object!')
+            continue
         masks = masks[:, dh:H - dh, dw:W - dw, :]
         mask_colors = MASK_COLORS[labels % len(MASK_COLORS)]
         mask_colors = mask_colors.reshape(-1, 1, 1, 3) * ALPHA
diff --git a/infer-seg.py b/infer-seg.py
index de77316..0284244 100644
--- a/infer-seg.py
+++ b/infer-seg.py
@@ -42,10 +42,9 @@ def main(args: argparse.Namespace) -> None:
                                 device=device)
         bboxes, scores, labels, masks = seg_postprocess(
             data, bgr.shape[:2], args.conf_thres, args.iou_thres)
-        if bboxes is None:
-            # if no bounding box or others save original image
-            if not args.show:
-                cv2.imwrite(str(save_image), draw)
+        if bboxes.numel() == 0:
+            # if no bounding box
+            print(f'{image}: no object!')
             continue
         masks = masks[:, dh:H - dh, dw:W - dw, :]
         indices = (labels % len(MASK_COLORS)).long()
diff --git a/models/torch_utils.py b/models/torch_utils.py
index 1bf7d39..36d0255 100644
--- a/models/torch_utils.py
+++ b/models/torch_utils.py
@@ -3,7 +3,7 @@ from typing import List, Tuple, Union
 import torch
 import torch.nn.functional as F
 from torch import Tensor
-from torchvision.ops import batched_nms
+from torchvision.ops import batched_nms, nms
 
 
 def seg_postprocess(
@@ -14,12 +14,13 @@ def seg_postprocess(
         -> Tuple[Tensor, Tensor, Tensor, Tensor]:
     assert len(data) == 2
     h, w = shape[0] // 4, shape[1] // 4  # 4x downsampling
-    outputs, proto = (i[0] for i in data)
+    outputs, proto = data[0][0], data[1][0]
     bboxes, scores, labels, maskconf = outputs.split([4, 1, 1, 32], 1)
     scores, labels = scores.squeeze(), labels.squeeze()
     idx = scores > conf_thres
-    if idx.sum() == 0:  # no bounding boxes or seg were created
-        return None, None, None, None
+    if not idx.any():  # no bounding boxes or seg were created
+        return bboxes.new_zeros((0, 4)), scores.new_zeros(
+            (0, )), labels.new_zeros((0, )), bboxes.new_zeros((0, 0, 0, 0))
     bboxes, scores, labels, maskconf = \
         bboxes[idx], scores[idx], labels[idx], maskconf[idx]
     idx = batched_nms(bboxes, scores, labels, iou_thres)
@@ -35,10 +36,37 @@ def seg_postprocess(
     return bboxes, scores, labels, masks
 
 
+def pose_postprocess(
+        data: Union[Tuple, Tensor],
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.65) \
+        -> Tuple[Tensor, Tensor, Tensor]:
+    if isinstance(data, tuple):
+        assert len(data) == 1
+        data = data[0]
+    outputs = torch.transpose(data[0], 0, 1).contiguous()
+    bboxes, scores, kpts = outputs.split([4, 1, 51], 1)
+    scores, kpts = scores.squeeze(), kpts.squeeze()
+    idx = scores > conf_thres
+    if not idx.any():  # no bounding boxes or seg were created
+        return bboxes.new_zeros((0, 4)), scores.new_zeros(
+            (0, )), bboxes.new_zeros((0, 0, 0))
+    bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
+    xycenter, wh = bboxes.chunk(2, -1)
+    bboxes = torch.cat([xycenter - 0.5 * wh, xycenter + 0.5 * wh], -1)
+    idx = nms(bboxes, scores, iou_thres)
+    bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
+    return bboxes, scores, kpts.reshape(idx.shape[0], -1, 3)
+
+
 def det_postprocess(data: Tuple[Tensor, Tensor, Tensor, Tensor]):
     assert len(data) == 4
-    num_dets, bboxes, scores, labels = (i[0] for i in data)
+    num_dets, bboxes, scores, labels = data[0][0], data[1][0], data[2][
+        0], data[3][0]
     nums = num_dets.item()
+    if nums == 0:
+        return bboxes.new_zeros((0, 4)), scores.new_zeros(
+            (0, )), labels.new_zeros((0, ))
     bboxes = bboxes[:nums]
     scores = scores[:nums]
     labels = labels[:nums]
diff --git a/models/utils.py b/models/utils.py
index 01c5034..c0d78ae 100644
--- a/models/utils.py
+++ b/models/utils.py
@@ -45,6 +45,7 @@ def letterbox(im: ndarray,
 
 
 def blob(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]:
+    seg = None
     if return_seg:
         seg = im.astype(np.float32) / 255
     im = im.transpose([2, 0, 1])
@@ -88,6 +89,9 @@ def det_postprocess(data: Tuple[ndarray, ndarray, ndarray, ndarray]):
     assert len(data) == 4
     num_dets, bboxes, scores, labels = (i[0] for i in data)
     nums = num_dets.item()
+    if nums == 0:
+        return np.empty((0, 4), dtype=np.float32), np.empty(
+            (0, ), dtype=np.float32), np.empty((0, ), dtype=np.int32)
     bboxes = bboxes[:nums]
     scores = scores[:nums]
     labels = labels[:nums]
@@ -106,6 +110,12 @@ def seg_postprocess(
     bboxes, scores, labels, maskconf = np.split(outputs, [4, 5, 6], 1)
     scores, labels = scores.squeeze(), labels.squeeze()
     idx = scores > conf_thres
+    if not idx.any():  # no bounding boxes or seg were created
+        return np.empty((0, 4), dtype=np.float32), \
+            np.empty((0,), dtype=np.float32), \
+            np.empty((0,), dtype=np.int32), \
+            np.empty((0, 0, 0, 0), dtype=np.int32)
+
     bboxes, scores, labels, maskconf = \
         bboxes[idx], scores[idx], labels[idx], maskconf[idx]
     cvbboxes = np.concatenate([bboxes[:, :2], bboxes[:, 2:] - bboxes[:, :2]],
@@ -128,3 +138,29 @@ def seg_postprocess(
     masks = masks.transpose(2, 0, 1)
     masks = np.ascontiguousarray((masks > 0.5)[..., None], dtype=np.float32)
     return bboxes, scores, labels, masks
+
+
+def pose_postprocess(
+        data: Union[Tuple, ndarray],
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.65) \
+        -> Tuple[ndarray, ndarray, ndarray]:
+    if isinstance(data, tuple):
+        assert len(data) == 1
+        data = data[0]
+    outputs = np.transpose(data[0], (1, 0))
+    bboxes, scores, kpts = np.split(outputs, [4, 5], 1)
+    scores, kpts = scores.squeeze(), kpts.squeeze()
+    idx = scores > conf_thres
+    if not idx.any():  # no bounding boxes or seg were created
+        return np.empty((0, 4), dtype=np.float32), np.empty(
+            (0, ), dtype=np.float32), np.empty((0, 0, 0), dtype=np.float32)
+    bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
+    xycenter, wh = np.split(bboxes, [
+        2,
+    ], -1)
+    cvbboxes = np.concatenate([xycenter - 0.5 * wh, wh], -1)
+    idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres)
+    cvbboxes, scores, kpts = cvbboxes[idx], scores[idx], kpts[idx]
+    cvbboxes[:, 2:] += cvbboxes[:, :2]
+    return cvbboxes, scores, kpts.reshape(idx.shape[0], -1, 3)

From b9a4bbbaa1ecd5a07a96c594d8c9432a452eefbc Mon Sep 17 00:00:00 2001
From: triple-Mu <gpu@163.com>
Date: Tue, 11 Jul 2023 16:35:34 +0800
Subject: [PATCH 2/3] Add docs

---
 docs/Jetson.md  | 18 ++++++++-----
 docs/Normal.md  | 48 ++++++++++++++++++++++++++-------
 docs/Pose.md    | 72 +++++++++++++++++++++++++++++++++++++++++++------
 docs/Segment.md | 55 +++++++++++++++++++++++++++++--------
 4 files changed, 158 insertions(+), 35 deletions(-)

diff --git a/docs/Jetson.md b/docs/Jetson.md
index e10b613..b804279 100644
--- a/docs/Jetson.md
+++ b/docs/Jetson.md
@@ -3,6 +3,7 @@
 Only test on `Jetson-NX 4GB`
 
 ENVS:
+
 - Jetpack 4.6.3
 - CUDA-10.2
 - CUDNN-8.2.1
@@ -20,7 +21,8 @@ If you have other environment-related issues, please discuss in issue.
 `yolov8s.pt` is your trained pytorch model, or the official pre-trained model.
 
 Do not use any model other than pytorch model.
-Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on jetson.
+Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on
+jetson.
 
 ***!!! Please use the PC to execute the following script !!!***
 
@@ -79,7 +81,8 @@ Usage:
 `yolov8s-seg.pt` is your trained pytorch model, or the official pre-trained model.
 
 Do not use any model other than pytorch model.
-Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on jetson.
+Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on
+jetson.
 
 ***!!! Please use the PC to execute the following script !!!***
 
@@ -106,7 +109,8 @@ Here is a demo: [`csrc/jetson/segment`](../csrc/jetson/segment) .
 
 #### Build:
 
-Please modify `CLASS_NAMES` and `COLORS` and postprocess parameters in [`main.cpp`](../csrc/jetson/segment/main.cpp) for yourself.
+Please modify `CLASS_NAMES` and `COLORS` and postprocess parameters in [`main.cpp`](../csrc/jetson/segment/main.cpp) for
+yourself.
 
 ```c++
 int topk = 100;
@@ -140,8 +144,6 @@ Usage:
 ./yolov8-seg yolov8s-seg.engine data/test.mp4 # the video path
 ```
 
-
-
 ## Normal Posture
 
 ### 1. Export Posture Normal ONNX
@@ -149,7 +151,8 @@ Usage:
 `yolov8s-pose.pt` is your trained pytorch model, or the official pre-trained model.
 
 Do not use any model other than pytorch model.
-Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on jetson.
+Do not use [`build.py`](../build.py) to export engine if you don't know how to install pytorch and other environments on
+jetson.
 
 ***!!! Please use the PC to execute the following script !!!***
 
@@ -176,7 +179,8 @@ Here is a demo: [`csrc/jetson/pose`](../csrc/jetson/pose) .
 
 #### Build:
 
-Please modify `KPS_COLORS` and `SKELETON` and `LIMB_COLORS` and postprocess parameters in [`main.cpp`](../csrc/jetson/pose/main.cpp) for yourself.
+Please modify `KPS_COLORS` and `SKELETON` and `LIMB_COLORS` and postprocess parameters
+in [`main.cpp`](../csrc/jetson/pose/main.cpp) for yourself.
 
 ```c++
 int topk = 100;
diff --git a/docs/Normal.md b/docs/Normal.md
index 1b9eede..0435f5b 100644
--- a/docs/Normal.md
+++ b/docs/Normal.md
@@ -2,23 +2,39 @@
 
 ## Export TensorRT Engine
 
-### 1. Python script
+### 1. ONNX -> TensorRT
 
-Usage:
+You can export your onnx model by `ultralytics` API.
 
-```python
+``` shell
+yolo export model=yolov8s.pt format=onnx opset=11 simplify=True
+```
 
+or run this python script:
+
+```python
 from ultralytics import YOLO
 
 # Load a model
 model = YOLO("yolov8s.pt")  # load a pretrained model (recommended for training)
-success = model.export(format="engine", device=0)  # export the model to engine format
+success = model.export(format="onnx", opset=11, simplify=True)  # export the model to onnx format
 assert success
 ```
 
-After executing the above script, you will get an engine named `yolov8s.engine` .
+Then build engine by Trtexec Tools.
+
+You can export TensorRT engine by [`trtexec`](https://github.com/NVIDIA/TensorRT/tree/main/samples/trtexec) tools.
+
+Usage:
+
+``` shell
+/usr/src/tensorrt/bin/trtexec \
+--onnx=yolov8s.onnx \
+--saveEngine=yolov8s.engine \
+--fp16
+```
 
-### 2. CLI tools
+### 2. Direct to TensorRT (NOT RECOMMAND!!)
 
 Usage:
 
@@ -26,7 +42,19 @@ Usage:
 yolo export model=yolov8s.pt format=engine device=0
 ```
 
-After executing the above command, you will get an engine named `yolov8s.engine` too.
+or run python script:
+
+```python
+
+from ultralytics import YOLO
+
+# Load a model
+model = YOLO("yolov8s.pt")  # load a pretrained model (recommended for training)
+success = model.export(format="engine", device=0)  # export the model to engine format
+assert success
+```
+
+After executing the above script, you will get an engine named `yolov8s.engine` .
 
 ## Inference with c++
 
@@ -34,9 +62,11 @@ You can infer with c++ in [`csrc/detect/normal`](../csrc/detect/normal) .
 
 ### Build:
 
-Please set you own librarys in [`CMakeLists.txt`](../csrc/detect/normal/CMakeLists.txt) and modify `CLASS_NAMES` and `COLORS` in [`main.cpp`](../csrc/detect/normal/main.cpp).
+Please set you own librarys in [`CMakeLists.txt`](../csrc/detect/normal/CMakeLists.txt) and modify `CLASS_NAMES`
+and `COLORS` in [`main.cpp`](../csrc/detect/normal/main.cpp).
 
-Besides, you can modify the postprocess parameters such as `num_labels` and `score_thres` and `iou_thres` and `topk` in [`main.cpp`](../csrc/detect/normal/main.cpp).
+Besides, you can modify the postprocess parameters such as `num_labels` and `score_thres` and `iou_thres` and `topk`
+in [`main.cpp`](../csrc/detect/normal/main.cpp).
 
 ```c++
 int num_labels = 80;
diff --git a/docs/Pose.md b/docs/Pose.md
index 61d7fbc..45ea899 100644
--- a/docs/Pose.md
+++ b/docs/Pose.md
@@ -1,7 +1,7 @@
 # YOLOv8-pose Model with TensorRT
 
 The yolov8-pose model conversion route is :
-    YOLOv8 PyTorch model -> ONNX -> TensorRT Engine
+YOLOv8 PyTorch model -> ONNX -> TensorRT Engine
 
 ***Notice !!!*** We don't support TensorRT API building !!!
 
@@ -9,10 +9,48 @@ The yolov8-pose model conversion route is :
 
 You can leave this repo and use the original `ultralytics` repo for onnx export.
 
-### 1. Python script
+### 1. ONNX -> TensorRT
+
+You can export your onnx model by `ultralytics` API.
+
+``` shell
+yolo export model=yolov8s-pose.pt format=onnx opset=11 simplify=True
+```
+
+or run this python script:
+
+```python
+from ultralytics import YOLO
+
+# Load a model
+model = YOLO("yolov8s-pose.pt")  # load a pretrained model (recommended for training)
+success = model.export(format="onnx", opset=11, simplify=True)  # export the model to onnx format
+assert success
+```
+
+Then build engine by Trtexec Tools.
+
+You can export TensorRT engine by [`trtexec`](https://github.com/NVIDIA/TensorRT/tree/main/samples/trtexec) tools.
+
+Usage:
+
+``` shell
+/usr/src/tensorrt/bin/trtexec \
+--onnx=yolov8s-pose.onnx \
+--saveEngine=yolov8s-pose.engine \
+--fp16
+```
+
+### 2. Direct to TensorRT (NOT RECOMMAND!!)
 
 Usage:
 
+```shell
+yolo export model=yolov8s-pose.pt format=engine device=0
+```
+
+or run python script:
+
 ```python
 from ultralytics import YOLO
 
@@ -24,15 +62,31 @@ assert success
 
 After executing the above script, you will get an engine named `yolov8s-pose.engine` .
 
-### 2. CLI tools
+# Inference
+
+## Infer with python script
+
+You can infer images with the engine by [`infer-pose.py`](../infer-pose.py) .
 
 Usage:
 
-```shell
-yolo export model=yolov8s-pose.pt format=engine device=0
+``` shell
+python3 infer-pose.py \
+--engine yolov8s-pose.engine \
+--imgs data \
+--show \
+--out-dir outputs \
+--device cuda:0
 ```
 
-After executing the above command, you will get an engine named `yolov8s-pose.engine` too.
+#### Description of all arguments
+
+- `--engine` : The Engine you export.
+- `--imgs` : The images path you want to detect.
+- `--show` : Whether to show detection results.
+- `--out-dir` : Where to save detection results images. It will not work when use `--show` flag.
+- `--device` : The CUDA deivce you use.
+- `--profile` : Profile the TensorRT engine.
 
 ## Inference with c++
 
@@ -40,9 +94,11 @@ You can infer with c++ in [`csrc/pose/normal`](../csrc/pose/normal) .
 
 ### Build:
 
-Please set you own librarys in [`CMakeLists.txt`](../csrc/pose/normal/CMakeLists.txt) and modify `KPS_COLORS` and `SKELETON` and  `LIMB_COLORS`  in [`main.cpp`](../csrc/pose/normal/main.cpp).
+Please set you own librarys in [`CMakeLists.txt`](../csrc/pose/normal/CMakeLists.txt) and modify `KPS_COLORS`
+and `SKELETON` and  `LIMB_COLORS`  in [`main.cpp`](../csrc/pose/normal/main.cpp).
 
-Besides, you can modify the postprocess parameters such as `score_thres` and `iou_thres` and `topk` in [`main.cpp`](../csrc/pose/normal/main.cpp).
+Besides, you can modify the postprocess parameters such as `score_thres` and `iou_thres` and `topk`
+in [`main.cpp`](../csrc/pose/normal/main.cpp).
 
 ```c++
 int topk = 100;
diff --git a/docs/Segment.md b/docs/Segment.md
index a41b44c..cb335be 100644
--- a/docs/Segment.md
+++ b/docs/Segment.md
@@ -1,7 +1,7 @@
 # YOLOv8-seg Model with TensorRT
 
 The yolov8-seg model conversion route is :
-    YOLOv8 PyTorch model -> ONNX -> TensorRT Engine
+YOLOv8 PyTorch model -> ONNX -> TensorRT Engine
 
 ***Notice !!!*** We don't support TensorRT API building !!!
 
@@ -96,7 +96,9 @@ You can infer segment engine with c++ in [`csrc/segment/simple`](../csrc/segment
 
 ### Build:
 
-Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/simple/CMakeLists.txt) and modify you own config in [`main.cpp`](../csrc/segment/simple/main.cpp) such as `CLASS_NAMES`, `COLORS`, `MASK_COLORS` and postprocess parameters .
+Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/simple/CMakeLists.txt) and modify you own config
+in [`main.cpp`](../csrc/segment/simple/main.cpp) such as `CLASS_NAMES`, `COLORS`, `MASK_COLORS` and postprocess
+parameters .
 
 ```c++
 int topk = 100;
@@ -119,7 +121,8 @@ cd ${root}
 
 ***Notice !!!***
 
-If you have build OpenCV(>=4.7.0) by yourself, it provides a new api [`cv::dnn::NMSBoxesBatched`](https://docs.opencv.org/4.x/d6/d0f/group__dnn.html#ga977aae09fbf7c804e003cfea1d4e928c) .
+If you have build OpenCV(>=4.7.0) by yourself, it provides a new
+api [`cv::dnn::NMSBoxesBatched`](https://docs.opencv.org/4.x/d6/d0f/group__dnn.html#ga977aae09fbf7c804e003cfea1d4e928c) .
 It is a gread api about efficient in-class nms . It will be used by default!
 
 ***!!!***
@@ -139,22 +142,39 @@ Usage:
 
 You can leave this repo and use the original `ultralytics` repo for onnx export.
 
-### 1. Python script
+### 1. ONNX -> TensorRT
 
-Usage:
+You can export your onnx model by `ultralytics` API.
+
+``` shell
+yolo export model=yolov8s-seg.pt format=onnx opset=11 simplify=True
+```
+
+or run this python script:
 
 ```python
 from ultralytics import YOLO
 
 # Load a model
 model = YOLO("yolov8s-seg.pt")  # load a pretrained model (recommended for training)
-success = model.export(format="engine", device=0)  # export the model to engine format
+success = model.export(format="onnx", opset=11, simplify=True)  # export the model to onnx format
 assert success
 ```
 
-After executing the above script, you will get an engine named `yolov8s-seg.engine` .
+Then build engine by Trtexec Tools.
 
-### 2. CLI tools
+You can export TensorRT engine by [`trtexec`](https://github.com/NVIDIA/TensorRT/tree/main/samples/trtexec) tools.
+
+Usage:
+
+``` shell
+/usr/src/tensorrt/bin/trtexec \
+--onnx=yolov8s-seg.onnx \
+--saveEngine=yolov8s-seg.engine \
+--fp16
+```
+
+### 2. Direct to TensorRT (NOT RECOMMAND!!)
 
 Usage:
 
@@ -162,7 +182,18 @@ Usage:
 yolo export model=yolov8s-seg.pt format=engine device=0
 ```
 
-After executing the above command, you will get an engine named `yolov8s-seg.engine` too.
+or run python script:
+
+```python
+from ultralytics import YOLO
+
+# Load a model
+model = YOLO("yolov8s-seg.pt")  # load a pretrained model (recommended for training)
+success = model.export(format="engine", device=0)  # export the model to engine format
+assert success
+```
+
+After executing the above script, you will get an engine named `yolov8s-seg.engine` .
 
 ## Inference with c++
 
@@ -170,9 +201,11 @@ You can infer with c++ in [`csrc/segment/normal`](../csrc/segment/normal) .
 
 ### Build:
 
-Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/normal/CMakeLists.txt) and modify `CLASS_NAMES` and `COLORS` in [`main.cpp`](../csrc/segment/normal/main.cpp).
+Please set you own librarys in [`CMakeLists.txt`](../csrc/segment/normal/CMakeLists.txt) and modify `CLASS_NAMES`
+and `COLORS` in [`main.cpp`](../csrc/segment/normal/main.cpp).
 
-Besides, you can modify the postprocess parameters such as `num_labels` and `score_thres` and `iou_thres` and `topk` in [`main.cpp`](../csrc/segment/normal/main.cpp).
+Besides, you can modify the postprocess parameters such as `num_labels` and `score_thres` and `iou_thres` and `topk`
+in [`main.cpp`](../csrc/segment/normal/main.cpp).
 
 ```c++
 int topk = 100;

From 963a56d0041f24ee9d13ef857b27c1cac3df69d1 Mon Sep 17 00:00:00 2001
From: triple-Mu <gpu@163.com>
Date: Tue, 11 Jul 2023 16:39:51 +0800
Subject: [PATCH 3/3] Format cpp code style by clangformat

---
 .../nvdsparsebbox_yoloV8.cpp                  | 134 ++-
 csrc/detect/end2end/include/common.hpp        | 213 ++---
 csrc/detect/end2end/include/yolov8.hpp        | 615 +++++--------
 csrc/detect/end2end/main.cpp                  | 237 ++---
 csrc/detect/normal/include/common.hpp         | 213 ++---
 csrc/detect/normal/include/yolov8.hpp         | 718 ++++++---------
 csrc/detect/normal/main.cpp                   | 245 +++--
 csrc/jetson/detect/include/common.hpp         | 213 ++---
 csrc/jetson/detect/include/yolov8.hpp         | 613 +++++--------
 csrc/jetson/detect/main.cpp                   | 233 ++---
 csrc/jetson/pose/include/common.hpp           | 215 ++---
 csrc/jetson/pose/include/yolov8-pose.hpp      | 457 ++++------
 csrc/jetson/pose/main.cpp                     | 143 ++-
 csrc/jetson/segment/include/common.hpp        | 215 ++---
 csrc/jetson/segment/include/yolov8-seg.hpp    | 801 +++++++---------
 csrc/jetson/segment/main.cpp                  | 260 +++---
 csrc/pose/normal/include/common.hpp           | 215 ++---
 csrc/pose/normal/include/yolov8-pose.hpp      | 458 ++++------
 csrc/pose/normal/main.cpp                     | 143 ++-
 csrc/segment/normal/include/common.hpp        | 215 ++---
 csrc/segment/normal/include/yolov8-seg.hpp    | 860 +++++++-----------
 csrc/segment/normal/main.cpp                  | 260 +++---
 csrc/segment/simple/include/common.hpp        | 215 ++---
 csrc/segment/simple/include/yolov8-seg.hpp    | 805 +++++++---------
 csrc/segment/simple/main.cpp                  | 260 +++---
 25 files changed, 3610 insertions(+), 5346 deletions(-)

diff --git a/csrc/deepstream/custom_bbox_parser/nvdsparsebbox_yoloV8.cpp b/csrc/deepstream/custom_bbox_parser/nvdsparsebbox_yoloV8.cpp
index 80b0fc5..e66802e 100644
--- a/csrc/deepstream/custom_bbox_parser/nvdsparsebbox_yoloV8.cpp
+++ b/csrc/deepstream/custom_bbox_parser/nvdsparsebbox_yoloV8.cpp
@@ -18,95 +18,85 @@
  */
 
 // This is just the function prototype. The definition is written at the end of the file.
-extern "C" bool NvDsInferParseCustomYoloV8(
-	std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
-	NvDsInferNetworkInfo const& networkInfo,
-	NvDsInferParseDetectionParams const& detectionParams,
-	std::vector<NvDsInferParseObjectInfo>& objectList);
+extern "C" bool NvDsInferParseCustomYoloV8(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
+                                           NvDsInferNetworkInfo const&            networkInfo,
+                                           NvDsInferParseDetectionParams const&   detectionParams,
+                                           std::vector<NvDsInferParseObjectInfo>& objectList);
 
 static __inline__ float bbox_clip(const float& val, const float& minVal = 0.f, const float& maxVal = 1280.f)
 {
-	assert(minVal <= maxVal);
-	return std::max(std::min(val, (maxVal - 1)), minVal);
+    assert(minVal <= maxVal);
+    return std::max(std::min(val, (maxVal - 1)), minVal);
 }
 
-static std::vector<NvDsInferParseObjectInfo> decodeYoloV8Tensor(
-	const int* num_dets,
-	const float* bboxes,
-	const float* scores,
-	const int* labels,
-	const unsigned int& img_w,
-	const unsigned int& img_h
-)
+static std::vector<NvDsInferParseObjectInfo> decodeYoloV8Tensor(const int*          num_dets,
+                                                                const float*        bboxes,
+                                                                const float*        scores,
+                                                                const int*          labels,
+                                                                const unsigned int& img_w,
+                                                                const unsigned int& img_h)
 {
-	std::vector<NvDsInferParseObjectInfo> bboxInfo;
-	size_t nums = num_dets[0];
-	for (size_t i = 0; i < nums; i++)
-	{
-		float x0 = (bboxes[i * 4]);
-		float y0 = (bboxes[i * 4 + 1]);
-		float x1 = (bboxes[i * 4 + 2]);
-		float y1 = (bboxes[i * 4 + 3]);
-		x0 = bbox_clip(x0, 0.f, img_w);
-		y0 = bbox_clip(y0, 0.f, img_h);
-		x1 = bbox_clip(x1, 0.f, img_w);
-		y1 = bbox_clip(y1, 0.f, img_h);
-		NvDsInferParseObjectInfo obj;
-		obj.left = x0;
-		obj.top = y0;
-		obj.width = x1 - x0;
-		obj.height = y1 - y0;
-		obj.detectionConfidence = scores[i];
-		obj.classId = labels[i];
-		bboxInfo.push_back(obj);
-	}
+    std::vector<NvDsInferParseObjectInfo> bboxInfo;
+    size_t                                nums = num_dets[0];
+    for (size_t i = 0; i < nums; i++) {
+        float x0 = (bboxes[i * 4]);
+        float y0 = (bboxes[i * 4 + 1]);
+        float x1 = (bboxes[i * 4 + 2]);
+        float y1 = (bboxes[i * 4 + 3]);
+        x0       = bbox_clip(x0, 0.f, img_w);
+        y0       = bbox_clip(y0, 0.f, img_h);
+        x1       = bbox_clip(x1, 0.f, img_w);
+        y1       = bbox_clip(y1, 0.f, img_h);
+        NvDsInferParseObjectInfo obj;
+        obj.left                = x0;
+        obj.top                 = y0;
+        obj.width               = x1 - x0;
+        obj.height              = y1 - y0;
+        obj.detectionConfidence = scores[i];
+        obj.classId             = labels[i];
+        bboxInfo.push_back(obj);
+    }
 
-	return bboxInfo;
+    return bboxInfo;
 }
 
 /* C-linkage to prevent name-mangling */
-extern "C" bool NvDsInferParseCustomYoloV8(
-	std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
-	NvDsInferNetworkInfo const& networkInfo,
-	NvDsInferParseDetectionParams const& detectionParams,
-	std::vector<NvDsInferParseObjectInfo>& objectList)
+extern "C" bool NvDsInferParseCustomYoloV8(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
+                                           NvDsInferNetworkInfo const&            networkInfo,
+                                           NvDsInferParseDetectionParams const&   detectionParams,
+                                           std::vector<NvDsInferParseObjectInfo>& objectList)
 {
 
-	// Some assertions and error checking.
+    // Some assertions and error checking.
 
-	if (outputLayersInfo.empty() || outputLayersInfo.size() != 4)
-	{
-		std::cerr << "Could not find output layer in bbox parsing" << std::endl;
-		return false;
-	}
+    if (outputLayersInfo.empty() || outputLayersInfo.size() != 4) {
+        std::cerr << "Could not find output layer in bbox parsing" << std::endl;
+        return false;
+    }
 
-	// Obtaining the output layer.
-	const NvDsInferLayerInfo& num_dets = outputLayersInfo[0];
-	const NvDsInferLayerInfo& bboxes = outputLayersInfo[1];
-	const NvDsInferLayerInfo& scores = outputLayersInfo[2];
-	const NvDsInferLayerInfo& labels = outputLayersInfo[3];
+    // Obtaining the output layer.
+    const NvDsInferLayerInfo& num_dets = outputLayersInfo[0];
+    const NvDsInferLayerInfo& bboxes   = outputLayersInfo[1];
+    const NvDsInferLayerInfo& scores   = outputLayersInfo[2];
+    const NvDsInferLayerInfo& labels   = outputLayersInfo[3];
 
-	// num_dets(int) bboxes(float) scores(float) labels(int)
-	assert (num_dets.dims.numDims == 2);
-	assert (bboxes.dims.numDims == 3);
-	assert (scores.dims.numDims == 2);
-	assert (labels.dims.numDims == 2);
+    // num_dets(int) bboxes(float) scores(float) labels(int)
+    assert(num_dets.dims.numDims == 2);
+    assert(bboxes.dims.numDims == 3);
+    assert(scores.dims.numDims == 2);
+    assert(labels.dims.numDims == 2);
 
+    // Decoding the output tensor of YOLOv8 to the NvDsInferParseObjectInfo format.
+    std::vector<NvDsInferParseObjectInfo> objects = decodeYoloV8Tensor((const int*)(num_dets.buffer),
+                                                                       (const float*)(bboxes.buffer),
+                                                                       (const float*)(scores.buffer),
+                                                                       (const int*)(labels.buffer),
+                                                                       networkInfo.width,
+                                                                       networkInfo.height);
 
-	// Decoding the output tensor of YOLOv8 to the NvDsInferParseObjectInfo format.
-	std::vector<NvDsInferParseObjectInfo> objects =
-		decodeYoloV8Tensor(
-			(const int*)(num_dets.buffer),
-			(const float*)(bboxes.buffer),
-			(const float*)(scores.buffer),
-			(const int*)(labels.buffer),
-			networkInfo.width,
-			networkInfo.height
-		);
-
-	objectList.clear();
-	objectList = objects;
-	return true;
+    objectList.clear();
+    objectList = objects;
+    return true;
 }
 
 /* Check that the custom function has been defined correctly */
diff --git a/csrc/detect/end2end/include/common.hpp b/csrc/detect/end2end/include/common.hpp
index b27deb3..9ad8d73 100644
--- a/csrc/detect/end2end/include/common.hpp
+++ b/csrc/detect/end2end/include/common.hpp
@@ -4,153 +4,138 @@
 
 #ifndef DETECT_END2END_COMMON_HPP
 #define DETECT_END2END_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace det
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace det {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-	};
+struct Object {
+    cv::Rect_<float> rect;
+    int              label = 0;
+    float            prob  = 0.0;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //DETECT_END2END_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace det
+#endif  // DETECT_END2END_COMMON_HPP
diff --git a/csrc/detect/end2end/include/yolov8.hpp b/csrc/detect/end2end/include/yolov8.hpp
index c08a8e9..22ea548 100644
--- a/csrc/detect/end2end/include/yolov8.hpp
+++ b/csrc/detect/end2end/include/yolov8.hpp
@@ -3,424 +3,281 @@
 //
 #ifndef DETECT_END2END_YOLOV8_HPP
 #define DETECT_END2END_YOLOV8_HPP
-#include "fstream"
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include "fstream"
 using namespace det;
 
-class YOLOv8
-{
+class YOLOv8 {
 public:
-	explicit YOLOv8(const std::string& engine_file_path);
-	~YOLOv8();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
-	void letterbox(
-		const cv::Mat& image,
-		cv::Mat& out,
-		cv::Size& size
-	);
-	void infer();
-	void postprocess(std::vector<Object>& objs);
-	static void draw_objects(
-		const cv::Mat& image,
-		cv::Mat& res,
-		const std::vector<Object>& objs,
-		const std::vector<std::string>& CLASS_NAMES,
-		const std::vector<std::vector<unsigned int>>& COLORS
-	);
-	int num_bindings;
-	int num_inputs = 0;
-	int num_outputs = 0;
-	std::vector<Binding> input_bindings;
-	std::vector<Binding> output_bindings;
-	std::vector<void*> host_ptrs;
-	std::vector<void*> device_ptrs;
-
-	PreParam pparam;
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
+    explicit YOLOv8(const std::string& engine_file_path);
+    ~YOLOv8();
+
+    void                 make_pipe(bool warmup = true);
+    void                 copy_from_Mat(const cv::Mat& image);
+    void                 copy_from_Mat(const cv::Mat& image, cv::Size& size);
+    void                 letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
+    void                 infer();
+    void                 postprocess(std::vector<Object>& objs);
+    static void          draw_objects(const cv::Mat&                                image,
+                                      cv::Mat&                                      res,
+                                      const std::vector<Object>&                    objs,
+                                      const std::vector<std::string>&               CLASS_NAMES,
+                                      const std::vector<std::vector<unsigned int>>& COLORS);
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
+
+    PreParam pparam;
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
 YOLOv8::YOLOv8(const std::string& engine_file_path)
 {
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-	delete[] trtModelStream;
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-	this->num_bindings = this->engine->getNbBindings();
-
-	for (int i = 0; i < this->num_bindings; ++i)
-	{
-		Binding binding;
-		nvinfer1::Dims dims;
-		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-		std::string name = this->engine->getBindingName(i);
-		binding.name = name;
-		binding.dsize = type_to_size(dtype);
-
-		bool IsInput = engine->bindingIsInput(i);
-		if (IsInput)
-		{
-			this->num_inputs += 1;
-			dims = this->engine->getProfileDimensions(
-				i,
-				0,
-				nvinfer1::OptProfileSelector::kMAX);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->input_bindings.push_back(binding);
-			// set max opt shape
-			this->context->setBindingDimensions(i, dims);
-
-		}
-		else
-		{
-			dims = this->context->getBindingDimensions(i);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->output_bindings.push_back(binding);
-			this->num_outputs += 1;
-		}
-	}
-
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char* trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+    delete[] trtModelStream;
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding            binding;
+        nvinfer1::Dims     dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
 }
 
 YOLOv8::~YOLOv8()
 {
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->device_ptrs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->host_ptrs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
-
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto& ptr : this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto& ptr : this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
 }
 void YOLOv8::make_pipe(bool warmup)
 {
 
-	for (auto& bindings : this->input_bindings)
-	{
-		void* d_ptr;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			bindings.size * bindings.dsize,
-			this->stream)
-		);
-		this->device_ptrs.push_back(d_ptr);
-	}
-
-	for (auto& bindings : this->output_bindings)
-	{
-		void* d_ptr, * h_ptr;
-		size_t size = bindings.size * bindings.dsize;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			size,
-			this->stream)
-		);
-		CHECK(cudaHostAlloc(
-			&h_ptr,
-			size,
-			0)
-		);
-		this->device_ptrs.push_back(d_ptr);
-		this->host_ptrs.push_back(h_ptr);
-	}
-
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			for (auto& bindings : this->input_bindings)
-			{
-				size_t size = bindings.size * bindings.dsize;
-				void* h_ptr = malloc(size);
-				memset(h_ptr, 0, size);
-				CHECK(cudaMemcpyAsync(
-					this->device_ptrs[0],
-					h_ptr,
-					size,
-					cudaMemcpyHostToDevice,
-					this->stream)
-				);
-				free(h_ptr);
-			}
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMallocAsync(&d_ptr, size, this->stream));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+    }
 }
 
 void YOLOv8::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
 {
-	const float inp_h = size.height;
-	const float inp_w = size.width;
-	float height = image.rows;
-	float width = image.cols;
-
-	float r = std::min(inp_h / height, inp_w / width);
-	int padw = std::round(width * r);
-	int padh = std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(
-			image,
-			tmp,
-			cv::Size(padw, padh)
-		);
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float dw = inp_w - padw;
-	float dh = inp_h - padh;
-
-	dw /= 2.0f;
-	dh /= 2.0f;
-	int top = int(std::round(dh - 0.1f));
-	int bottom = int(std::round(dh + 0.1f));
-	int left = int(std::round(dw - 0.1f));
-	int right = int(std::round(dw + 0.1f));
-
-	cv::copyMakeBorder(
-		tmp,
-		tmp,
-		top,
-		bottom,
-		left,
-		right,
-		cv::BORDER_CONSTANT,
-		{ 114, 114, 114 }
-	);
-
-	cv::dnn::blobFromImage(tmp,
-		out,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F
-	);
-	this->pparam.ratio = 1 / r;
-	this->pparam.dw = dw;
-	this->pparam.dh = dh;
-	this->pparam.height = height;
-	this->pparam.width = width;;
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
+
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top    = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
+    this->pparam.height = height;
+    this->pparam.width  = width;
+    ;
 }
 
 void YOLOv8::copy_from_Mat(const cv::Mat& image)
 {
-	cv::Mat nchw;
-	auto& in_binding = this->input_bindings[0];
-	auto width = in_binding.dims.d[3];
-	auto height = in_binding.dims.d[2];
-	cv::Size size{ width, height };
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{
-				4,
-				{ 1, 3, height, width }
-			}
-	);
-
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
+
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8::copy_from_Mat(const cv::Mat& image, cv::Size& size)
 {
-	cv::Mat nchw;
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{ 4,
-			  { 1, 3, size.height, size.width }
-			}
-	);
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat nchw;
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8::infer()
 {
 
-	this->context->enqueueV2(
-		this->device_ptrs.data(),
-		this->stream,
-		nullptr
-	);
-	for (int i = 0; i < this->num_outputs; i++)
-	{
-		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-			this->device_ptrs[i + this->num_inputs],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream)
-		);
-
-	}
-	cudaStreamSynchronize(this->stream);
-
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
+    }
+    cudaStreamSynchronize(this->stream);
 }
 
 void YOLOv8::postprocess(std::vector<Object>& objs)
 {
-	objs.clear();
-	int* num_dets = static_cast<int*>(this->host_ptrs[0]);
-	auto* boxes = static_cast<float*>(this->host_ptrs[1]);
-	auto* scores = static_cast<float*>(this->host_ptrs[2]);
-	int* labels = static_cast<int*>(this->host_ptrs[3]);
-	auto& dw = this->pparam.dw;
-	auto& dh = this->pparam.dh;
-	auto& width = this->pparam.width;
-	auto& height = this->pparam.height;
-	auto& ratio = this->pparam.ratio;
-	for (int i = 0; i < num_dets[0]; i++)
-	{
-		float* ptr = boxes + i * 4;
-
-		float x0 = *ptr++ - dw;
-		float y0 = *ptr++ - dh;
-		float x1 = *ptr++ - dw;
-		float y1 = *ptr - dh;
-
-		x0 = clamp(x0 * ratio, 0.f, width);
-		y0 = clamp(y0 * ratio, 0.f, height);
-		x1 = clamp(x1 * ratio, 0.f, width);
-		y1 = clamp(y1 * ratio, 0.f, height);
-		Object obj;
-		obj.rect.x = x0;
-		obj.rect.y = y0;
-		obj.rect.width = x1 - x0;
-		obj.rect.height = y1 - y0;
-		obj.prob = *(scores + i);
-		obj.label = *(labels + i);
-		objs.push_back(obj);
-	}
+    objs.clear();
+    int*  num_dets = static_cast<int*>(this->host_ptrs[0]);
+    auto* boxes    = static_cast<float*>(this->host_ptrs[1]);
+    auto* scores   = static_cast<float*>(this->host_ptrs[2]);
+    int*  labels   = static_cast<int*>(this->host_ptrs[3]);
+    auto& dw       = this->pparam.dw;
+    auto& dh       = this->pparam.dh;
+    auto& width    = this->pparam.width;
+    auto& height   = this->pparam.height;
+    auto& ratio    = this->pparam.ratio;
+    for (int i = 0; i < num_dets[0]; i++) {
+        float* ptr = boxes + i * 4;
+
+        float x0 = *ptr++ - dw;
+        float y0 = *ptr++ - dh;
+        float x1 = *ptr++ - dw;
+        float y1 = *ptr - dh;
+
+        x0 = clamp(x0 * ratio, 0.f, width);
+        y0 = clamp(y0 * ratio, 0.f, height);
+        x1 = clamp(x1 * ratio, 0.f, width);
+        y1 = clamp(y1 * ratio, 0.f, height);
+        Object obj;
+        obj.rect.x      = x0;
+        obj.rect.y      = y0;
+        obj.rect.width  = x1 - x0;
+        obj.rect.height = y1 - y0;
+        obj.prob        = *(scores + i);
+        obj.label       = *(labels + i);
+        objs.push_back(obj);
+    }
 }
 
-void YOLOv8::draw_objects(
-	const cv::Mat& image,
-	cv::Mat& res,
-	const std::vector<Object>& objs,
-	const std::vector<std::string>& CLASS_NAMES,
-	const std::vector<std::vector<unsigned int>>& COLORS
-)
+void YOLOv8::draw_objects(const cv::Mat&                                image,
+                          cv::Mat&                                      res,
+                          const std::vector<Object>&                    objs,
+                          const std::vector<std::string>&               CLASS_NAMES,
+                          const std::vector<std::vector<unsigned int>>& COLORS)
 {
-	res = image.clone();
-	for (auto& obj : objs)
-	{
-		cv::Scalar color = cv::Scalar(
-			COLORS[obj.label][0],
-			COLORS[obj.label][1],
-			COLORS[obj.label][2]
-		);
-		cv::rectangle(
-			res,
-			obj.rect,
-			color,
-			2
-		);
-
-		char text[256];
-		sprintf(
-			text,
-			"%s %.1f%%",
-			CLASS_NAMES[obj.label].c_str(),
-			obj.prob * 100
-		);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(
-			text,
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			1,
-			&baseLine
-		);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(
-			res,
-			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-			{ 0, 0, 255 },
-			-1
-		);
-
-		cv::putText(
-			res,
-			text,
-			cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			{ 255, 255, 255 },
-			1
-		);
-	}
+    res = image.clone();
+    for (auto& obj : objs) {
+        cv::Scalar color = cv::Scalar(COLORS[obj.label][0], COLORS[obj.label][1], COLORS[obj.label][2]);
+        cv::rectangle(res, obj.rect, color, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", CLASS_NAMES[obj.label].c_str(), obj.prob * 100);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+    }
 }
-#endif //DETECT_END2END_YOLOV8_HPP
+#endif  // DETECT_END2END_YOLOV8_HPP
diff --git a/csrc/detect/end2end/main.cpp b/csrc/detect/end2end/main.cpp
index 6730c58..aab2156 100644
--- a/csrc/detect/end2end/main.cpp
+++ b/csrc/detect/end2end/main.cpp
@@ -2,160 +2,119 @@
 // Created by ubuntu on 1/20/23.
 //
 #include "chrono"
-#include "yolov8.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8.hpp"
 
 const std::vector<std::string> CLASS_NAMES = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-	"train", "truck", "boat", "traffic light", "fire hydrant",
-	"stop sign", "parking meter", "bench", "bird", "cat",
-	"dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella",
-	"handbag", "tie", "suitcase", "frisbee", "skis",
-	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-	"cup", "fork", "knife", "spoon", "bowl",
-	"banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake",
-	"chair", "couch", "potted plant", "bed", "dining table",
-	"toilet", "tv", "laptop", "mouse", "remote",
-	"keyboard", "cell phone", "microwave", "oven",
-	"toaster", "sink", "refrigerator", "book", "clock", "vase",
-	"scissors", "teddy bear", "hair drier", "toothbrush" };
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"};
 
 const std::vector<std::vector<unsigned int>> COLORS = {
-	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-	{ 80, 183, 189 }, { 128, 128, 0 }
-};
+    {0, 114, 189},   {217, 83, 25},   {237, 177, 32},  {126, 47, 142},  {119, 172, 48},  {77, 190, 238},
+    {162, 20, 47},   {76, 76, 76},    {153, 153, 153}, {255, 0, 0},     {255, 128, 0},   {191, 191, 0},
+    {0, 255, 0},     {0, 0, 255},     {170, 0, 255},   {85, 85, 0},     {85, 170, 0},    {85, 255, 0},
+    {170, 85, 0},    {170, 170, 0},   {170, 255, 0},   {255, 85, 0},    {255, 170, 0},   {255, 255, 0},
+    {0, 85, 128},    {0, 170, 128},   {0, 255, 128},   {85, 0, 128},    {85, 85, 128},   {85, 170, 128},
+    {85, 255, 128},  {170, 0, 128},   {170, 85, 128},  {170, 170, 128}, {170, 255, 128}, {255, 0, 128},
+    {255, 85, 128},  {255, 170, 128}, {255, 255, 128}, {0, 85, 255},    {0, 170, 255},   {0, 255, 255},
+    {85, 0, 255},    {85, 85, 255},   {85, 170, 255},  {85, 255, 255},  {170, 0, 255},   {170, 85, 255},
+    {170, 170, 255}, {170, 255, 255}, {255, 0, 255},   {255, 85, 255},  {255, 170, 255}, {85, 0, 0},
+    {128, 0, 0},     {170, 0, 0},     {212, 0, 0},     {255, 0, 0},     {0, 43, 0},      {0, 85, 0},
+    {0, 128, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},     {0, 0, 43},      {0, 0, 85},
+    {0, 0, 128},     {0, 0, 170},     {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {73, 73, 73},    {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189},
+    {80, 183, 189},  {128, 128, 0}};
 
 int main(int argc, char** argv)
 {
-	// cuda:0
-	cudaSetDevice(0);
+    // cuda:0
+    cudaSetDevice(0);
 
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
 
-	std::vector<std::string> imagePathList;
-	bool isVideo{ false };
+    std::vector<std::string> imagePathList;
+    bool                     isVideo{false};
 
-	assert(argc == 3);
+    assert(argc == 3);
 
-	auto yolov8 = new YOLOv8(engine_file_path);
-	yolov8->make_pipe(true);
+    auto yolov8 = new YOLOv8(engine_file_path);
+    yolov8->make_pipe(true);
 
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (
-			suffix == "jpg" ||
-				suffix == "jpeg" ||
-				suffix == "png"
-			)
-		{
-			imagePathList.push_back(path);
-		}
-		else if (
-			suffix == "mp4" ||
-				suffix == "avi" ||
-				suffix == "m4v" ||
-				suffix == "mpeg" ||
-				suffix == "mov" ||
-				suffix == "mkv"
-			)
-		{
-			isVideo = true;
-		}
-		else
-		{
-			printf("suffix %s is wrong !!!\n", suffix.c_str());
-			std::abort();
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    }
+    else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
 
-	cv::Mat res, image;
-	cv::Size size = cv::Size{ 640, 640 };
-	std::vector<Object> objs;
+    cv::Mat             res, image;
+    cv::Size            size = cv::Size{640, 640};
+    std::vector<Object> objs;
 
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
 
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
+    if (isVideo) {
+        cv::VideoCapture cap(path);
 
-		if (!cap.isOpened())
-		{
-			printf("can not open %s\n", path.c_str());
-			return -1;
-		}
-		while (cap.read(image))
-		{
-			objs.clear();
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			if (cv::waitKey(10) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto& path : imagePathList)
-		{
-			objs.clear();
-			image = cv::imread(path);
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    }
+    else {
+        for (auto& path : imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8;
+    return 0;
 }
diff --git a/csrc/detect/normal/include/common.hpp b/csrc/detect/normal/include/common.hpp
index 4ad0ae6..e590da2 100644
--- a/csrc/detect/normal/include/common.hpp
+++ b/csrc/detect/normal/include/common.hpp
@@ -4,153 +4,138 @@
 
 #ifndef DETECT_NORMAL_COMMON_HPP
 #define DETECT_NORMAL_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace det
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace det {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-	};
+struct Object {
+    cv::Rect_<float> rect;
+    int              label = 0;
+    float            prob  = 0.0;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //DETECT_NORMAL_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace det
+#endif  // DETECT_NORMAL_COMMON_HPP
diff --git a/csrc/detect/normal/include/yolov8.hpp b/csrc/detect/normal/include/yolov8.hpp
index 26510d0..a445a0f 100644
--- a/csrc/detect/normal/include/yolov8.hpp
+++ b/csrc/detect/normal/include/yolov8.hpp
@@ -3,495 +3,319 @@
 //
 #ifndef DETECT_NORMAL_YOLOV8_HPP
 #define DETECT_NORMAL_YOLOV8_HPP
-#include "fstream"
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include "fstream"
 using namespace det;
 
-class YOLOv8
-{
+class YOLOv8 {
 public:
-	explicit YOLOv8(const std::string& engine_file_path);
-	~YOLOv8();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
-	void letterbox(
-		const cv::Mat& image,
-		cv::Mat& out,
-		cv::Size& size
-	);
-	void infer();
-	void postprocess(
-		std::vector<Object>& objs,
-		float score_thres = 0.25f,
-		float iou_thres = 0.65f,
-		int topk = 100,
-		int num_labels = 80
-	);
-	static void draw_objects(
-		const cv::Mat& image,
-		cv::Mat& res,
-		const std::vector<Object>& objs,
-		const std::vector<std::string>& CLASS_NAMES,
-		const std::vector<std::vector<unsigned int>>& COLORS
-	);
-	int num_bindings;
-	int num_inputs = 0;
-	int num_outputs = 0;
-	std::vector<Binding> input_bindings;
-	std::vector<Binding> output_bindings;
-	std::vector<void*> host_ptrs;
-	std::vector<void*> device_ptrs;
-
-	PreParam pparam;
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
+    explicit YOLOv8(const std::string& engine_file_path);
+    ~YOLOv8();
+
+    void                 make_pipe(bool warmup = true);
+    void                 copy_from_Mat(const cv::Mat& image);
+    void                 copy_from_Mat(const cv::Mat& image, cv::Size& size);
+    void                 letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
+    void                 infer();
+    void                 postprocess(std::vector<Object>& objs,
+                                     float                score_thres = 0.25f,
+                                     float                iou_thres   = 0.65f,
+                                     int                  topk        = 100,
+                                     int                  num_labels  = 80);
+    static void          draw_objects(const cv::Mat&                                image,
+                                      cv::Mat&                                      res,
+                                      const std::vector<Object>&                    objs,
+                                      const std::vector<std::string>&               CLASS_NAMES,
+                                      const std::vector<std::vector<unsigned int>>& COLORS);
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
+
+    PreParam pparam;
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
 YOLOv8::YOLOv8(const std::string& engine_file_path)
 {
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-	delete[] trtModelStream;
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-	this->num_bindings = this->engine->getNbBindings();
-
-	for (int i = 0; i < this->num_bindings; ++i)
-	{
-		Binding binding;
-		nvinfer1::Dims dims;
-		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-		std::string name = this->engine->getBindingName(i);
-		binding.name = name;
-		binding.dsize = type_to_size(dtype);
-
-		bool IsInput = engine->bindingIsInput(i);
-		if (IsInput)
-		{
-			this->num_inputs += 1;
-			dims = this->engine->getProfileDimensions(
-				i,
-				0,
-				nvinfer1::OptProfileSelector::kMAX);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->input_bindings.push_back(binding);
-			// set max opt shape
-			this->context->setBindingDimensions(i, dims);
-
-		}
-		else
-		{
-			dims = this->context->getBindingDimensions(i);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->output_bindings.push_back(binding);
-			this->num_outputs += 1;
-		}
-	}
-
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char* trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+    delete[] trtModelStream;
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding            binding;
+        nvinfer1::Dims     dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
 }
 
 YOLOv8::~YOLOv8()
 {
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->device_ptrs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->host_ptrs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
-
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto& ptr : this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto& ptr : this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
 }
 void YOLOv8::make_pipe(bool warmup)
 {
 
-	for (auto& bindings : this->input_bindings)
-	{
-		void* d_ptr;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			bindings.size * bindings.dsize,
-			this->stream)
-		);
-		this->device_ptrs.push_back(d_ptr);
-	}
-
-	for (auto& bindings : this->output_bindings)
-	{
-		void* d_ptr, * h_ptr;
-		size_t size = bindings.size * bindings.dsize;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			size,
-			this->stream)
-		);
-		CHECK(cudaHostAlloc(
-			&h_ptr,
-			size,
-			0)
-		);
-		this->device_ptrs.push_back(d_ptr);
-		this->host_ptrs.push_back(h_ptr);
-	}
-
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			for (auto& bindings : this->input_bindings)
-			{
-				size_t size = bindings.size * bindings.dsize;
-				void* h_ptr = malloc(size);
-				memset(h_ptr, 0, size);
-				CHECK(cudaMemcpyAsync(
-					this->device_ptrs[0],
-					h_ptr,
-					size,
-					cudaMemcpyHostToDevice,
-					this->stream)
-				);
-				free(h_ptr);
-			}
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMallocAsync(&d_ptr, size, this->stream));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+    }
 }
 
-void YOLOv8::letterbox(
-	const cv::Mat& image,
-	cv::Mat& out,
-	cv::Size& size
-)
+void YOLOv8::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
 {
-	const float inp_h = size.height;
-	const float inp_w = size.width;
-	float height = image.rows;
-	float width = image.cols;
-
-	float r = std::min(inp_h / height, inp_w / width);
-	int padw = std::round(width * r);
-	int padh = std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(
-			image,
-			tmp,
-			cv::Size(padw, padh)
-		);
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float dw = inp_w - padw;
-	float dh = inp_h - padh;
-
-	dw /= 2.0f;
-	dh /= 2.0f;
-	int top = int(std::round(dh - 0.1f));
-	int bottom = int(std::round(dh + 0.1f));
-	int left = int(std::round(dw - 0.1f));
-	int right = int(std::round(dw + 0.1f));
-
-	cv::copyMakeBorder(
-		tmp,
-		tmp,
-		top,
-		bottom,
-		left,
-		right,
-		cv::BORDER_CONSTANT,
-		{ 114, 114, 114 }
-	);
-
-	cv::dnn::blobFromImage(tmp,
-		out,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F
-	);
-	this->pparam.ratio = 1 / r;
-	this->pparam.dw = dw;
-	this->pparam.dh = dh;
-	this->pparam.height = height;
-	this->pparam.width = width;;
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
+
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top    = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
+    this->pparam.height = height;
+    this->pparam.width  = width;
+    ;
 }
 
 void YOLOv8::copy_from_Mat(const cv::Mat& image)
 {
-	cv::Mat nchw;
-	auto& in_binding = this->input_bindings[0];
-	auto width = in_binding.dims.d[3];
-	auto height = in_binding.dims.d[2];
-	cv::Size size{ width, height };
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{
-				4,
-				{ 1, 3, height, width }
-			}
-	);
-
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
+
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8::copy_from_Mat(const cv::Mat& image, cv::Size& size)
 {
-	cv::Mat nchw;
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{ 4,
-			  { 1, 3, size.height, size.width }
-			}
-	);
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat nchw;
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8::infer()
 {
 
-	this->context->enqueueV2(
-		this->device_ptrs.data(),
-		this->stream,
-		nullptr
-	);
-	for (int i = 0; i < this->num_outputs; i++)
-	{
-		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-			this->device_ptrs[i + this->num_inputs],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream)
-		);
-
-	}
-	cudaStreamSynchronize(this->stream);
-
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
+    }
+    cudaStreamSynchronize(this->stream);
 }
 
-void YOLOv8::postprocess(
-	std::vector<Object>& objs,
-	float score_thres,
-	float iou_thres,
-	int topk,
-	int num_labels
-)
+void YOLOv8::postprocess(std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int num_labels)
 {
-	objs.clear();
-	auto num_channels = this->output_bindings[0].dims.d[1];
-	auto num_anchors = this->output_bindings[0].dims.d[2];
-
-	auto& dw = this->pparam.dw;
-	auto& dh = this->pparam.dh;
-	auto& width = this->pparam.width;
-	auto& height = this->pparam.height;
-	auto& ratio = this->pparam.ratio;
-
-	std::vector<cv::Rect> bboxes;
-	std::vector<float> scores;
-	std::vector<int> labels;
-	std::vector<int> indices;
-
-	cv::Mat output = cv::Mat(
-		num_channels,
-		num_anchors,
-		CV_32F,
-		static_cast<float*>(this->host_ptrs[0])
-	);
-	output = output.t();
-	for (int i = 0; i < num_anchors; i++)
-	{
-		auto row_ptr = output.row(i).ptr<float>();
-		auto bboxes_ptr = row_ptr;
-		auto scores_ptr = row_ptr + 4;
-		auto max_s_ptr = std::max_element(scores_ptr, scores_ptr + num_labels);
-		float score = *max_s_ptr;
-		if (score > score_thres)
-		{
-			float x = *bboxes_ptr++ - dw;
-			float y = *bboxes_ptr++ - dh;
-			float w = *bboxes_ptr++;
-			float h = *bboxes_ptr;
-
-			float x0 = clamp((x - 0.5f * w) * ratio, 0.f, width);
-			float y0 = clamp((y - 0.5f * h) * ratio, 0.f, height);
-			float x1 = clamp((x + 0.5f * w) * ratio, 0.f, width);
-			float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
-
-			int label = max_s_ptr - scores_ptr;
-			cv::Rect_<float> bbox;
-			bbox.x = x0;
-			bbox.y = y0;
-			bbox.width = x1 - x0;
-			bbox.height = y1 - y0;
-
-			bboxes.push_back(bbox);
-			labels.push_back(label);
-			scores.push_back(score);
-		}
-	}
+    objs.clear();
+    auto num_channels = this->output_bindings[0].dims.d[1];
+    auto num_anchors  = this->output_bindings[0].dims.d[2];
+
+    auto& dw     = this->pparam.dw;
+    auto& dh     = this->pparam.dh;
+    auto& width  = this->pparam.width;
+    auto& height = this->pparam.height;
+    auto& ratio  = this->pparam.ratio;
+
+    std::vector<cv::Rect> bboxes;
+    std::vector<float>    scores;
+    std::vector<int>      labels;
+    std::vector<int>      indices;
+
+    cv::Mat output = cv::Mat(num_channels, num_anchors, CV_32F, static_cast<float*>(this->host_ptrs[0]));
+    output         = output.t();
+    for (int i = 0; i < num_anchors; i++) {
+        auto  row_ptr    = output.row(i).ptr<float>();
+        auto  bboxes_ptr = row_ptr;
+        auto  scores_ptr = row_ptr + 4;
+        auto  max_s_ptr  = std::max_element(scores_ptr, scores_ptr + num_labels);
+        float score      = *max_s_ptr;
+        if (score > score_thres) {
+            float x = *bboxes_ptr++ - dw;
+            float y = *bboxes_ptr++ - dh;
+            float w = *bboxes_ptr++;
+            float h = *bboxes_ptr;
+
+            float x0 = clamp((x - 0.5f * w) * ratio, 0.f, width);
+            float y0 = clamp((y - 0.5f * h) * ratio, 0.f, height);
+            float x1 = clamp((x + 0.5f * w) * ratio, 0.f, width);
+            float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
+
+            int              label = max_s_ptr - scores_ptr;
+            cv::Rect_<float> bbox;
+            bbox.x      = x0;
+            bbox.y      = y0;
+            bbox.width  = x1 - x0;
+            bbox.height = y1 - y0;
+
+            bboxes.push_back(bbox);
+            labels.push_back(label);
+            scores.push_back(score);
+        }
+    }
 
 #ifdef BATCHED_NMS
-	cv::dnn::NMSBoxesBatched(
-		bboxes,
-		scores,
-		labels,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
 #else
-	cv::dnn::NMSBoxes(
-		bboxes,
-		scores,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
 #endif
 
-	int cnt = 0;
-	for (auto& i : indices)
-	{
-		if (cnt >= topk)
-		{
-			break;
-		}
-		Object obj;
-		obj.rect = bboxes[i];
-		obj.prob = scores[i];
-		obj.label = labels[i];
-		objs.push_back(obj);
-		cnt += 1;
-	}
+    int cnt = 0;
+    for (auto& i : indices) {
+        if (cnt >= topk) {
+            break;
+        }
+        Object obj;
+        obj.rect  = bboxes[i];
+        obj.prob  = scores[i];
+        obj.label = labels[i];
+        objs.push_back(obj);
+        cnt += 1;
+    }
 }
 
-void YOLOv8::draw_objects(
-	const cv::Mat& image,
-	cv::Mat& res,
-	const std::vector<Object>& objs,
-	const std::vector<std::string>& CLASS_NAMES,
-	const std::vector<std::vector<unsigned int>>& COLORS
-)
+void YOLOv8::draw_objects(const cv::Mat&                                image,
+                          cv::Mat&                                      res,
+                          const std::vector<Object>&                    objs,
+                          const std::vector<std::string>&               CLASS_NAMES,
+                          const std::vector<std::vector<unsigned int>>& COLORS)
 {
-	res = image.clone();
-	for (auto& obj : objs)
-	{
-		cv::Scalar color = cv::Scalar(
-			COLORS[obj.label][0],
-			COLORS[obj.label][1],
-			COLORS[obj.label][2]
-		);
-		cv::rectangle(
-			res,
-			obj.rect,
-			color,
-			2
-		);
-
-		char text[256];
-		sprintf(
-			text,
-			"%s %.1f%%",
-			CLASS_NAMES[obj.label].c_str(),
-			obj.prob * 100
-		);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(
-			text,
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			1,
-			&baseLine
-		);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(
-			res,
-			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-			{ 0, 0, 255 },
-			-1
-		);
-
-		cv::putText(
-			res,
-			text,
-			cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			{ 255, 255, 255 },
-			1
-		);
-	}
+    res = image.clone();
+    for (auto& obj : objs) {
+        cv::Scalar color = cv::Scalar(COLORS[obj.label][0], COLORS[obj.label][1], COLORS[obj.label][2]);
+        cv::rectangle(res, obj.rect, color, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", CLASS_NAMES[obj.label].c_str(), obj.prob * 100);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+    }
 }
-#endif //DETECT_NORMAL_YOLOV8_HPP
+#endif  // DETECT_NORMAL_YOLOV8_HPP
diff --git a/csrc/detect/normal/main.cpp b/csrc/detect/normal/main.cpp
index b1226a5..e0f2f22 100644
--- a/csrc/detect/normal/main.cpp
+++ b/csrc/detect/normal/main.cpp
@@ -2,165 +2,124 @@
 // Created by ubuntu on 1/20/23.
 //
 #include "chrono"
-#include "yolov8.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8.hpp"
 
 const std::vector<std::string> CLASS_NAMES = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-	"train", "truck", "boat", "traffic light", "fire hydrant",
-	"stop sign", "parking meter", "bench", "bird", "cat",
-	"dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella",
-	"handbag", "tie", "suitcase", "frisbee", "skis",
-	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-	"cup", "fork", "knife", "spoon", "bowl",
-	"banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake",
-	"chair", "couch", "potted plant", "bed", "dining table",
-	"toilet", "tv", "laptop", "mouse", "remote",
-	"keyboard", "cell phone", "microwave", "oven",
-	"toaster", "sink", "refrigerator", "book", "clock", "vase",
-	"scissors", "teddy bear", "hair drier", "toothbrush" };
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"};
 
 const std::vector<std::vector<unsigned int>> COLORS = {
-	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-	{ 80, 183, 189 }, { 128, 128, 0 }
-};
+    {0, 114, 189},   {217, 83, 25},   {237, 177, 32},  {126, 47, 142},  {119, 172, 48},  {77, 190, 238},
+    {162, 20, 47},   {76, 76, 76},    {153, 153, 153}, {255, 0, 0},     {255, 128, 0},   {191, 191, 0},
+    {0, 255, 0},     {0, 0, 255},     {170, 0, 255},   {85, 85, 0},     {85, 170, 0},    {85, 255, 0},
+    {170, 85, 0},    {170, 170, 0},   {170, 255, 0},   {255, 85, 0},    {255, 170, 0},   {255, 255, 0},
+    {0, 85, 128},    {0, 170, 128},   {0, 255, 128},   {85, 0, 128},    {85, 85, 128},   {85, 170, 128},
+    {85, 255, 128},  {170, 0, 128},   {170, 85, 128},  {170, 170, 128}, {170, 255, 128}, {255, 0, 128},
+    {255, 85, 128},  {255, 170, 128}, {255, 255, 128}, {0, 85, 255},    {0, 170, 255},   {0, 255, 255},
+    {85, 0, 255},    {85, 85, 255},   {85, 170, 255},  {85, 255, 255},  {170, 0, 255},   {170, 85, 255},
+    {170, 170, 255}, {170, 255, 255}, {255, 0, 255},   {255, 85, 255},  {255, 170, 255}, {85, 0, 0},
+    {128, 0, 0},     {170, 0, 0},     {212, 0, 0},     {255, 0, 0},     {0, 43, 0},      {0, 85, 0},
+    {0, 128, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},     {0, 0, 43},      {0, 0, 85},
+    {0, 0, 128},     {0, 0, 170},     {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {73, 73, 73},    {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189},
+    {80, 183, 189},  {128, 128, 0}};
 
 int main(int argc, char** argv)
 {
-	// cuda:0
-	cudaSetDevice(0);
+    // cuda:0
+    cudaSetDevice(0);
 
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
 
-	std::vector<std::string> imagePathList;
-	bool isVideo{ false };
+    std::vector<std::string> imagePathList;
+    bool                     isVideo{false};
 
-	assert(argc == 3);
+    assert(argc == 3);
 
-	auto yolov8 = new YOLOv8(engine_file_path);
-	yolov8->make_pipe(true);
+    auto yolov8 = new YOLOv8(engine_file_path);
+    yolov8->make_pipe(true);
 
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (
-			suffix == "jpg" ||
-				suffix == "jpeg" ||
-				suffix == "png"
-			)
-		{
-			imagePathList.push_back(path);
-		}
-		else if (
-			suffix == "mp4" ||
-				suffix == "avi" ||
-				suffix == "m4v" ||
-				suffix == "mpeg" ||
-				suffix == "mov" ||
-				suffix == "mkv"
-			)
-		{
-			isVideo = true;
-		}
-		else
-		{
-			printf("suffix %s is wrong !!!\n", suffix.c_str());
-			std::abort();
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    }
+    else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
 
-	cv::Mat res, image;
-	cv::Size size = cv::Size{ 640, 640 };
-	int num_labels = 80;
-	int topk = 100;
-	float score_thres = 0.25f;
-	float iou_thres = 0.65f;
+    cv::Mat  res, image;
+    cv::Size size        = cv::Size{640, 640};
+    int      num_labels  = 80;
+    int      topk        = 100;
+    float    score_thres = 0.25f;
+    float    iou_thres   = 0.65f;
 
-	std::vector<Object> objs;
+    std::vector<Object> objs;
 
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
 
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
+    if (isVideo) {
+        cv::VideoCapture cap(path);
 
-		if (!cap.isOpened())
-		{
-			printf("can not open %s\n", path.c_str());
-			return -1;
-		}
-		while (cap.read(image))
-		{
-			objs.clear();
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, num_labels);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			if (cv::waitKey(10) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto& path : imagePathList)
-		{
-			objs.clear();
-			image = cv::imread(path);
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, num_labels);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, num_labels);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    }
+    else {
+        for (auto& path : imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, num_labels);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8;
+    return 0;
 }
diff --git a/csrc/jetson/detect/include/common.hpp b/csrc/jetson/detect/include/common.hpp
index 3b2fbeb..9492bce 100644
--- a/csrc/jetson/detect/include/common.hpp
+++ b/csrc/jetson/detect/include/common.hpp
@@ -4,153 +4,138 @@
 
 #ifndef JETSON_DETECT_COMMON_HPP
 #define JETSON_DETECT_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace det
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace det {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-	};
+struct Object {
+    cv::Rect_<float> rect;
+    int              label = 0;
+    float            prob  = 0.0;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //JETSON_DETECT_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace det
+#endif  // JETSON_DETECT_COMMON_HPP
diff --git a/csrc/jetson/detect/include/yolov8.hpp b/csrc/jetson/detect/include/yolov8.hpp
index 739e024..627b8e5 100644
--- a/csrc/jetson/detect/include/yolov8.hpp
+++ b/csrc/jetson/detect/include/yolov8.hpp
@@ -3,422 +3,281 @@
 //
 #ifndef JETSON_DETECT_YOLOV8_HPP
 #define JETSON_DETECT_YOLOV8_HPP
-#include "fstream"
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include "fstream"
 using namespace det;
 
-class YOLOv8
-{
+class YOLOv8 {
 public:
-	explicit YOLOv8(const std::string& engine_file_path);
-	~YOLOv8();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
-	void letterbox(
-		const cv::Mat& image,
-		cv::Mat& out,
-		cv::Size& size
-	);
-	void infer();
-	void postprocess(std::vector<Object>& objs);
-	static void draw_objects(
-		const cv::Mat& image,
-		cv::Mat& res,
-		const std::vector<Object>& objs,
-		const std::vector<std::string>& CLASS_NAMES,
-		const std::vector<std::vector<unsigned int>>& COLORS
-	);
-	int num_bindings;
-	int num_inputs = 0;
-	int num_outputs = 0;
-	std::vector<Binding> input_bindings;
-	std::vector<Binding> output_bindings;
-	std::vector<void*> host_ptrs;
-	std::vector<void*> device_ptrs;
-
-	PreParam pparam;
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
+    explicit YOLOv8(const std::string& engine_file_path);
+    ~YOLOv8();
+
+    void                 make_pipe(bool warmup = true);
+    void                 copy_from_Mat(const cv::Mat& image);
+    void                 copy_from_Mat(const cv::Mat& image, cv::Size& size);
+    void                 letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
+    void                 infer();
+    void                 postprocess(std::vector<Object>& objs);
+    static void          draw_objects(const cv::Mat&                                image,
+                                      cv::Mat&                                      res,
+                                      const std::vector<Object>&                    objs,
+                                      const std::vector<std::string>&               CLASS_NAMES,
+                                      const std::vector<std::vector<unsigned int>>& COLORS);
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
+
+    PreParam pparam;
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
 YOLOv8::YOLOv8(const std::string& engine_file_path)
 {
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-	delete[] trtModelStream;
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-	this->num_bindings = this->engine->getNbBindings();
-
-	for (int i = 0; i < this->num_bindings; ++i)
-	{
-		Binding binding;
-		nvinfer1::Dims dims;
-		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-		std::string name = this->engine->getBindingName(i);
-		binding.name = name;
-		binding.dsize = type_to_size(dtype);
-
-		bool IsInput = engine->bindingIsInput(i);
-		if (IsInput)
-		{
-			this->num_inputs += 1;
-			dims = this->engine->getProfileDimensions(
-				i,
-				0,
-				nvinfer1::OptProfileSelector::kMAX);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->input_bindings.push_back(binding);
-			// set max opt shape
-			this->context->setBindingDimensions(i, dims);
-
-		}
-		else
-		{
-			dims = this->context->getBindingDimensions(i);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->output_bindings.push_back(binding);
-			this->num_outputs += 1;
-		}
-	}
-
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char* trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+    delete[] trtModelStream;
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding            binding;
+        nvinfer1::Dims     dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
 }
 
 YOLOv8::~YOLOv8()
 {
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->device_ptrs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->host_ptrs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
-
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto& ptr : this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto& ptr : this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
 }
 void YOLOv8::make_pipe(bool warmup)
 {
 
-	for (auto& bindings : this->input_bindings)
-	{
-		void* d_ptr;
-		CHECK(cudaMalloc(
-			&d_ptr,
-			bindings.size * bindings.dsize)
-		);
-		this->device_ptrs.push_back(d_ptr);
-	}
-
-	for (auto& bindings : this->output_bindings)
-	{
-		void* d_ptr, * h_ptr;
-		size_t size = bindings.size * bindings.dsize;
-		CHECK(cudaMalloc(
-			&d_ptr,
-			size)
-		);
-		CHECK(cudaHostAlloc(
-			&h_ptr,
-			size,
-			0)
-		);
-		this->device_ptrs.push_back(d_ptr);
-		this->host_ptrs.push_back(h_ptr);
-	}
-
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			for (auto& bindings : this->input_bindings)
-			{
-				size_t size = bindings.size * bindings.dsize;
-				void* h_ptr = malloc(size);
-				memset(h_ptr, 0, size);
-				CHECK(cudaMemcpyAsync(
-					this->device_ptrs[0],
-					h_ptr,
-					size,
-					cudaMemcpyHostToDevice,
-					this->stream)
-				);
-				free(h_ptr);
-			}
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMalloc(&d_ptr, size));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+    }
 }
 
 void YOLOv8::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
 {
-	const float inp_h = size.height;
-	const float inp_w = size.width;
-	float height = image.rows;
-	float width = image.cols;
-
-	float r = std::min(inp_h / height, inp_w / width);
-	int padw = std::round(width * r);
-	int padh = std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(
-			image,
-			tmp,
-			cv::Size(padw, padh)
-		);
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float dw = inp_w - padw;
-	float dh = inp_h - padh;
-
-	dw /= 2.0f;
-	dh /= 2.0f;
-	int top = int(std::round(dh - 0.1f));
-	int bottom = int(std::round(dh + 0.1f));
-	int left = int(std::round(dw - 0.1f));
-	int right = int(std::round(dw + 0.1f));
-
-	cv::copyMakeBorder(
-		tmp,
-		tmp,
-		top,
-		bottom,
-		left,
-		right,
-		cv::BORDER_CONSTANT,
-		{ 114, 114, 114 }
-	);
-
-	cv::dnn::blobFromImage(tmp,
-		out,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F
-	);
-	this->pparam.ratio = 1 / r;
-	this->pparam.dw = dw;
-	this->pparam.dh = dh;
-	this->pparam.height = height;
-	this->pparam.width = width;;
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
+
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top    = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
+    this->pparam.height = height;
+    this->pparam.width  = width;
+    ;
 }
 
 void YOLOv8::copy_from_Mat(const cv::Mat& image)
 {
-	cv::Mat nchw;
-	auto& in_binding = this->input_bindings[0];
-	auto width = in_binding.dims.d[3];
-	auto height = in_binding.dims.d[2];
-	cv::Size size{ width, height };
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{
-				4,
-				{ 1, 3, height, width }
-			}
-	);
-
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
+
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8::copy_from_Mat(const cv::Mat& image, cv::Size& size)
 {
-	cv::Mat nchw;
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{ 4,
-			  { 1, 3, size.height, size.width }
-			}
-	);
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat nchw;
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8::infer()
 {
 
-	this->context->enqueueV2(
-		this->device_ptrs.data(),
-		this->stream,
-		nullptr
-	);
-	for (int i = 0; i < this->num_outputs; i++)
-	{
-		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-			this->device_ptrs[i + this->num_inputs],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream)
-		);
-
-	}
-	cudaStreamSynchronize(this->stream);
-
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
+    }
+    cudaStreamSynchronize(this->stream);
 }
 
 void YOLOv8::postprocess(std::vector<Object>& objs)
 {
-	objs.clear();
-	int* num_dets = static_cast<int*>(this->host_ptrs[0]);
-	auto* boxes = static_cast<float*>(this->host_ptrs[1]);
-	auto* scores = static_cast<float*>(this->host_ptrs[2]);
-	int* labels = static_cast<int*>(this->host_ptrs[3]);
-	auto& dw = this->pparam.dw;
-	auto& dh = this->pparam.dh;
-	auto& width = this->pparam.width;
-	auto& height = this->pparam.height;
-	auto& ratio = this->pparam.ratio;
-	for (int i = 0; i < num_dets[0]; i++)
-	{
-		float* ptr = boxes + i * 4;
-
-		float x0 = *ptr++ - dw;
-		float y0 = *ptr++ - dh;
-		float x1 = *ptr++ - dw;
-		float y1 = *ptr - dh;
-
-		x0 = clamp(x0 * ratio, 0.f, width);
-		y0 = clamp(y0 * ratio, 0.f, height);
-		x1 = clamp(x1 * ratio, 0.f, width);
-		y1 = clamp(y1 * ratio, 0.f, height);
-		Object obj;
-		obj.rect.x = x0;
-		obj.rect.y = y0;
-		obj.rect.width = x1 - x0;
-		obj.rect.height = y1 - y0;
-		obj.prob = *(scores + i);
-		obj.label = *(labels + i);
-		objs.push_back(obj);
-	}
+    objs.clear();
+    int*  num_dets = static_cast<int*>(this->host_ptrs[0]);
+    auto* boxes    = static_cast<float*>(this->host_ptrs[1]);
+    auto* scores   = static_cast<float*>(this->host_ptrs[2]);
+    int*  labels   = static_cast<int*>(this->host_ptrs[3]);
+    auto& dw       = this->pparam.dw;
+    auto& dh       = this->pparam.dh;
+    auto& width    = this->pparam.width;
+    auto& height   = this->pparam.height;
+    auto& ratio    = this->pparam.ratio;
+    for (int i = 0; i < num_dets[0]; i++) {
+        float* ptr = boxes + i * 4;
+
+        float x0 = *ptr++ - dw;
+        float y0 = *ptr++ - dh;
+        float x1 = *ptr++ - dw;
+        float y1 = *ptr - dh;
+
+        x0 = clamp(x0 * ratio, 0.f, width);
+        y0 = clamp(y0 * ratio, 0.f, height);
+        x1 = clamp(x1 * ratio, 0.f, width);
+        y1 = clamp(y1 * ratio, 0.f, height);
+        Object obj;
+        obj.rect.x      = x0;
+        obj.rect.y      = y0;
+        obj.rect.width  = x1 - x0;
+        obj.rect.height = y1 - y0;
+        obj.prob        = *(scores + i);
+        obj.label       = *(labels + i);
+        objs.push_back(obj);
+    }
 }
 
-void YOLOv8::draw_objects(
-	const cv::Mat& image,
-	cv::Mat& res,
-	const std::vector<Object>& objs,
-	const std::vector<std::string>& CLASS_NAMES,
-	const std::vector<std::vector<unsigned int>>& COLORS
-)
+void YOLOv8::draw_objects(const cv::Mat&                                image,
+                          cv::Mat&                                      res,
+                          const std::vector<Object>&                    objs,
+                          const std::vector<std::string>&               CLASS_NAMES,
+                          const std::vector<std::vector<unsigned int>>& COLORS)
 {
-	res = image.clone();
-	for (auto& obj : objs)
-	{
-		cv::Scalar color = cv::Scalar(
-			COLORS[obj.label][0],
-			COLORS[obj.label][1],
-			COLORS[obj.label][2]
-		);
-		cv::rectangle(
-			res,
-			obj.rect,
-			color,
-			2
-		);
-
-		char text[256];
-		sprintf(
-			text,
-			"%s %.1f%%",
-			CLASS_NAMES[obj.label].c_str(),
-			obj.prob * 100
-		);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(
-			text,
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			1,
-			&baseLine
-		);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(
-			res,
-			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-			{ 0, 0, 255 },
-			-1
-		);
-
-		cv::putText(
-			res,
-			text,
-			cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			{ 255, 255, 255 },
-			1
-		);
-	}
+    res = image.clone();
+    for (auto& obj : objs) {
+        cv::Scalar color = cv::Scalar(COLORS[obj.label][0], COLORS[obj.label][1], COLORS[obj.label][2]);
+        cv::rectangle(res, obj.rect, color, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", CLASS_NAMES[obj.label].c_str(), obj.prob * 100);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+    }
 }
-#endif //JETSON_DETECT_YOLOV8_HPP
+#endif  // JETSON_DETECT_YOLOV8_HPP
diff --git a/csrc/jetson/detect/main.cpp b/csrc/jetson/detect/main.cpp
index c9e5c96..0b20904 100644
--- a/csrc/jetson/detect/main.cpp
+++ b/csrc/jetson/detect/main.cpp
@@ -2,157 +2,116 @@
 // Created by ubuntu on 3/16/23.
 //
 #include "chrono"
-#include "yolov8.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8.hpp"
 
 const std::vector<std::string> CLASS_NAMES = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-	"train", "truck", "boat", "traffic light", "fire hydrant",
-	"stop sign", "parking meter", "bench", "bird", "cat",
-	"dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella",
-	"handbag", "tie", "suitcase", "frisbee", "skis",
-	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-	"cup", "fork", "knife", "spoon", "bowl",
-	"banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake",
-	"chair", "couch", "potted plant", "bed", "dining table",
-	"toilet", "tv", "laptop", "mouse", "remote",
-	"keyboard", "cell phone", "microwave", "oven",
-	"toaster", "sink", "refrigerator", "book", "clock", "vase",
-	"scissors", "teddy bear", "hair drier", "toothbrush" };
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"};
 
 const std::vector<std::vector<unsigned int>> COLORS = {
-	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-	{ 80, 183, 189 }, { 128, 128, 0 }
-};
+    {0, 114, 189},   {217, 83, 25},   {237, 177, 32},  {126, 47, 142},  {119, 172, 48},  {77, 190, 238},
+    {162, 20, 47},   {76, 76, 76},    {153, 153, 153}, {255, 0, 0},     {255, 128, 0},   {191, 191, 0},
+    {0, 255, 0},     {0, 0, 255},     {170, 0, 255},   {85, 85, 0},     {85, 170, 0},    {85, 255, 0},
+    {170, 85, 0},    {170, 170, 0},   {170, 255, 0},   {255, 85, 0},    {255, 170, 0},   {255, 255, 0},
+    {0, 85, 128},    {0, 170, 128},   {0, 255, 128},   {85, 0, 128},    {85, 85, 128},   {85, 170, 128},
+    {85, 255, 128},  {170, 0, 128},   {170, 85, 128},  {170, 170, 128}, {170, 255, 128}, {255, 0, 128},
+    {255, 85, 128},  {255, 170, 128}, {255, 255, 128}, {0, 85, 255},    {0, 170, 255},   {0, 255, 255},
+    {85, 0, 255},    {85, 85, 255},   {85, 170, 255},  {85, 255, 255},  {170, 0, 255},   {170, 85, 255},
+    {170, 170, 255}, {170, 255, 255}, {255, 0, 255},   {255, 85, 255},  {255, 170, 255}, {85, 0, 0},
+    {128, 0, 0},     {170, 0, 0},     {212, 0, 0},     {255, 0, 0},     {0, 43, 0},      {0, 85, 0},
+    {0, 128, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},     {0, 0, 43},      {0, 0, 85},
+    {0, 0, 128},     {0, 0, 170},     {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {73, 73, 73},    {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189},
+    {80, 183, 189},  {128, 128, 0}};
 
 int main(int argc, char** argv)
 {
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
 
-	std::vector<std::string> imagePathList;
-	bool isVideo{ false };
+    std::vector<std::string> imagePathList;
+    bool                     isVideo{false};
 
-	assert(argc == 3);
+    assert(argc == 3);
 
-	auto yolov8 = new YOLOv8(engine_file_path);
-	yolov8->make_pipe(true);
+    auto yolov8 = new YOLOv8(engine_file_path);
+    yolov8->make_pipe(true);
 
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (
-			suffix == "jpg" ||
-				suffix == "jpeg" ||
-				suffix == "png"
-			)
-		{
-			imagePathList.push_back(path);
-		}
-		else if (
-			suffix == "mp4" ||
-				suffix == "avi" ||
-				suffix == "m4v" ||
-				suffix == "mpeg" ||
-				suffix == "mov" ||
-				suffix == "mkv"
-			)
-		{
-			isVideo = true;
-		}
-		else
-		{
-			printf("suffix %s is wrong !!!\n", suffix.c_str());
-			std::abort();
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    }
+    else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
 
-	cv::Mat res, image;
-	cv::Size size = cv::Size{ 640, 640 };
-	std::vector<Object> objs;
+    cv::Mat             res, image;
+    cv::Size            size = cv::Size{640, 640};
+    std::vector<Object> objs;
 
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
 
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
+    if (isVideo) {
+        cv::VideoCapture cap(path);
 
-		if (!cap.isOpened())
-		{
-			printf("can not open %s\n", path.c_str());
-			return -1;
-		}
-		while (cap.read(image))
-		{
-			objs.clear();
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			if (cv::waitKey(10) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto& path : imagePathList)
-		{
-			objs.clear();
-			image = cv::imread(path);
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    }
+    else {
+        for (auto& path : imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8;
+    return 0;
 }
diff --git a/csrc/jetson/pose/include/common.hpp b/csrc/jetson/pose/include/common.hpp
index e23ac01..82790fd 100644
--- a/csrc/jetson/pose/include/common.hpp
+++ b/csrc/jetson/pose/include/common.hpp
@@ -4,154 +4,139 @@
 
 #ifndef JETSON_POSE_COMMON_HPP
 #define JETSON_POSE_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace pose
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace pose {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-        std::vector<float> kps;
-	};
+struct Object {
+    cv::Rect_<float>   rect;
+    int                label = 0;
+    float              prob  = 0.0;
+    std::vector<float> kps;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //JETSON_POSE_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace pose
+#endif  // JETSON_POSE_COMMON_HPP
diff --git a/csrc/jetson/pose/include/yolov8-pose.hpp b/csrc/jetson/pose/include/yolov8-pose.hpp
index b53a25f..f3f215d 100644
--- a/csrc/jetson/pose/include/yolov8-pose.hpp
+++ b/csrc/jetson/pose/include/yolov8-pose.hpp
@@ -4,73 +4,63 @@
 #ifndef JETSON_POSE_YOLOV8_POSE_HPP
 #define JETSON_POSE_YOLOV8_POSE_HPP
 
-#include "fstream"
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include "fstream"
 
 using namespace pose;
 
 class YOLOv8_pose {
 public:
-    explicit YOLOv8_pose(const std::string &engine_file_path);
+    explicit YOLOv8_pose(const std::string& engine_file_path);
 
     ~YOLOv8_pose();
 
     void make_pipe(bool warmup = true);
 
-    void copy_from_Mat(const cv::Mat &image);
+    void copy_from_Mat(const cv::Mat& image);
 
-    void copy_from_Mat(const cv::Mat &image, cv::Size &size);
+    void copy_from_Mat(const cv::Mat& image, cv::Size& size);
 
-    void letterbox(
-            const cv::Mat &image,
-            cv::Mat &out,
-            cv::Size &size
-    );
+    void letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
 
     void infer();
 
-    void postprocess(
-            std::vector<Object> &objs,
-            float score_thres = 0.25f,
-            float iou_thres = 0.65f,
-            int topk = 100
-    );
-
-    static void draw_objects(
-            const cv::Mat &image,
-            cv::Mat &res,
-            const std::vector<Object> &objs,
-            const std::vector<std::vector<unsigned int>> &SKELETON,
-            const std::vector<std::vector<unsigned int>> &KPS_COLORS,
-            const std::vector<std::vector<unsigned int>> &LIMB_COLORS
-    );
-
-    int num_bindings;
-    int num_inputs = 0;
-    int num_outputs = 0;
+    void postprocess(std::vector<Object>& objs, float score_thres = 0.25f, float iou_thres = 0.65f, int topk = 100);
+
+    static void draw_objects(const cv::Mat&                                image,
+                             cv::Mat&                                      res,
+                             const std::vector<Object>&                    objs,
+                             const std::vector<std::vector<unsigned int>>& SKELETON,
+                             const std::vector<std::vector<unsigned int>>& KPS_COLORS,
+                             const std::vector<std::vector<unsigned int>>& LIMB_COLORS);
+
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
     std::vector<Binding> input_bindings;
     std::vector<Binding> output_bindings;
-    std::vector<void *> host_ptrs;
-    std::vector<void *> device_ptrs;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
 
     PreParam pparam;
-private:
-    nvinfer1::ICudaEngine *engine = nullptr;
-    nvinfer1::IRuntime *runtime = nullptr;
-    nvinfer1::IExecutionContext *context = nullptr;
-    cudaStream_t stream = nullptr;
-    Logger gLogger{nvinfer1::ILogger::Severity::kERROR};
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
-YOLOv8_pose::YOLOv8_pose(const std::string &engine_file_path) {
+YOLOv8_pose::YOLOv8_pose(const std::string& engine_file_path)
+{
     std::ifstream file(engine_file_path, std::ios::binary);
     assert(file.good());
     file.seekg(0, std::ios::end);
     auto size = file.tellg();
     file.seekg(0, std::ios::beg);
-    char *trtModelStream = new char[size];
+    char* trtModelStream = new char[size];
     assert(trtModelStream);
     file.read(trtModelStream, size);
     file.close();
@@ -88,124 +78,97 @@ YOLOv8_pose::YOLOv8_pose(const std::string &engine_file_path) {
     this->num_bindings = this->engine->getNbBindings();
 
     for (int i = 0; i < this->num_bindings; ++i) {
-        Binding binding;
-        nvinfer1::Dims dims;
+        Binding            binding;
+        nvinfer1::Dims     dims;
         nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-        std::string name = this->engine->getBindingName(i);
-        binding.name = name;
-        binding.dsize = type_to_size(dtype);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
 
         bool IsInput = engine->bindingIsInput(i);
         if (IsInput) {
             this->num_inputs += 1;
-            dims = this->engine->getProfileDimensions(
-                    i,
-                    0,
-                    nvinfer1::OptProfileSelector::kMAX);
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
             binding.size = get_size_by_dims(dims);
             binding.dims = dims;
             this->input_bindings.push_back(binding);
             // set max opt shape
             this->context->setBindingDimensions(i, dims);
-
-        } else {
-            dims = this->context->getBindingDimensions(i);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
             binding.size = get_size_by_dims(dims);
             binding.dims = dims;
             this->output_bindings.push_back(binding);
             this->num_outputs += 1;
         }
     }
-
 }
 
-YOLOv8_pose::~YOLOv8_pose() {
+YOLOv8_pose::~YOLOv8_pose()
+{
     this->context->destroy();
     this->engine->destroy();
     this->runtime->destroy();
     cudaStreamDestroy(this->stream);
-    for (auto &ptr: this->device_ptrs) {
+    for (auto& ptr : this->device_ptrs) {
         CHECK(cudaFree(ptr));
     }
 
-    for (auto &ptr: this->host_ptrs) {
+    for (auto& ptr : this->host_ptrs) {
         CHECK(cudaFreeHost(ptr));
     }
-
 }
 
-void YOLOv8_pose::make_pipe(bool warmup) {
+void YOLOv8_pose::make_pipe(bool warmup)
+{
 
-    for (auto &bindings: this->input_bindings) {
-        void *d_ptr;
-        CHECK(cudaMalloc(
-                &d_ptr,
-                bindings.size * bindings.dsize
-                )
-        );
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
         this->device_ptrs.push_back(d_ptr);
     }
 
-    for (auto &bindings: this->output_bindings) {
-        void *d_ptr, *h_ptr;
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
         size_t size = bindings.size * bindings.dsize;
-        CHECK(cudaMalloc(
-                &d_ptr,
-                size)
-        );
-        CHECK(cudaHostAlloc(
-                &h_ptr,
-                size,
-                0)
-        );
+        CHECK(cudaMalloc(&d_ptr, size));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
         this->device_ptrs.push_back(d_ptr);
         this->host_ptrs.push_back(h_ptr);
     }
 
     if (warmup) {
         for (int i = 0; i < 10; i++) {
-            for (auto &bindings: this->input_bindings) {
-                size_t size = bindings.size * bindings.dsize;
-                void *h_ptr = malloc(size);
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
                 memset(h_ptr, 0, size);
-                CHECK(cudaMemcpyAsync(
-                        this->device_ptrs[0],
-                        h_ptr,
-                        size,
-                        cudaMemcpyHostToDevice,
-                        this->stream)
-                );
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
                 free(h_ptr);
             }
             this->infer();
         }
         printf("model warmup 10 times\n");
-
     }
 }
 
-void YOLOv8_pose::letterbox(
-        const cv::Mat &image,
-        cv::Mat &out,
-        cv::Size &size
-) {
-    const float inp_h = size.height;
-    const float inp_w = size.width;
-    float height = image.rows;
-    float width = image.cols;
+void YOLOv8_pose::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
+{
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
 
-    float r = std::min(inp_h / height, inp_w / width);
-    int padw = std::round(width * r);
-    int padh = std::round(height * r);
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
 
     cv::Mat tmp;
-    if ((int) width != padw || (int) height != padh) {
-        cv::resize(
-                image,
-                tmp,
-                cv::Size(padw, padh)
-        );
-    } else {
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
         tmp = image.clone();
     }
 
@@ -214,146 +177,83 @@ void YOLOv8_pose::letterbox(
 
     dw /= 2.0f;
     dh /= 2.0f;
-    int top = int(std::round(dh - 0.1f));
+    int top    = int(std::round(dh - 0.1f));
     int bottom = int(std::round(dh + 0.1f));
-    int left = int(std::round(dw - 0.1f));
-    int right = int(std::round(dw + 0.1f));
-
-    cv::copyMakeBorder(
-            tmp,
-            tmp,
-            top,
-            bottom,
-            left,
-            right,
-            cv::BORDER_CONSTANT,
-            {114, 114, 114}
-    );
-
-    cv::dnn::blobFromImage(tmp,
-                           out,
-                           1 / 255.f,
-                           cv::Size(),
-                           cv::Scalar(0, 0, 0),
-                           true,
-                           false,
-                           CV_32F
-    );
-    this->pparam.ratio = 1 / r;
-    this->pparam.dw = dw;
-    this->pparam.dh = dh;
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
     this->pparam.height = height;
-    this->pparam.width = width;;
+    this->pparam.width  = width;
+    ;
 }
 
-void YOLOv8_pose::copy_from_Mat(const cv::Mat &image) {
-    cv::Mat nchw;
-    auto &in_binding = this->input_bindings[0];
-    auto width = in_binding.dims.d[3];
-    auto height = in_binding.dims.d[2];
+void YOLOv8_pose::copy_from_Mat(const cv::Mat& image)
+{
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
     cv::Size size{width, height};
-    this->letterbox(
-            image,
-            nchw,
-            size
-    );
-
-    this->context->setBindingDimensions(
-            0,
-            nvinfer1::Dims
-                    {
-                            4,
-                            {1, 3, height, width}
-                    }
-    );
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
 
     CHECK(cudaMemcpyAsync(
-            this->device_ptrs[0],
-            nchw.ptr<float>(),
-            nchw.total() * nchw.elemSize(),
-            cudaMemcpyHostToDevice,
-            this->stream)
-    );
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
-void YOLOv8_pose::copy_from_Mat(const cv::Mat &image, cv::Size &size) {
+void YOLOv8_pose::copy_from_Mat(const cv::Mat& image, cv::Size& size)
+{
     cv::Mat nchw;
-    this->letterbox(
-            image,
-            nchw,
-            size
-    );
-    this->context->setBindingDimensions(
-            0,
-            nvinfer1::Dims
-                    {4,
-                     {1, 3, size.height, size.width}
-                    }
-    );
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
     CHECK(cudaMemcpyAsync(
-            this->device_ptrs[0],
-            nchw.ptr<float>(),
-            nchw.total() * nchw.elemSize(),
-            cudaMemcpyHostToDevice,
-            this->stream)
-    );
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
-void YOLOv8_pose::infer() {
+void YOLOv8_pose::infer()
+{
 
-    this->context->enqueueV2(
-            this->device_ptrs.data(),
-            this->stream,
-            nullptr
-    );
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
     for (int i = 0; i < this->num_outputs; i++) {
         size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-        CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-                              this->device_ptrs[i + this->num_inputs],
-                              osize,
-                              cudaMemcpyDeviceToHost,
-                              this->stream)
-        );
-
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
     }
     cudaStreamSynchronize(this->stream);
-
 }
 
-void YOLOv8_pose::postprocess(
-        std::vector<Object> &objs,
-        float score_thres,
-        float iou_thres,
-        int topk
-) {
+void YOLOv8_pose::postprocess(std::vector<Object>& objs, float score_thres, float iou_thres, int topk)
+{
     objs.clear();
     auto num_channels = this->output_bindings[0].dims.d[1];
-    auto num_anchors = this->output_bindings[0].dims.d[2];
-
-    auto &dw = this->pparam.dw;
-    auto &dh = this->pparam.dh;
-    auto &width = this->pparam.width;
-    auto &height = this->pparam.height;
-    auto &ratio = this->pparam.ratio;
-
-    std::vector<cv::Rect> bboxes;
-    std::vector<float> scores;
-    std::vector<int> labels;
-    std::vector<int> indices;
+    auto num_anchors  = this->output_bindings[0].dims.d[2];
+
+    auto& dw     = this->pparam.dw;
+    auto& dh     = this->pparam.dh;
+    auto& width  = this->pparam.width;
+    auto& height = this->pparam.height;
+    auto& ratio  = this->pparam.ratio;
+
+    std::vector<cv::Rect>           bboxes;
+    std::vector<float>              scores;
+    std::vector<int>                labels;
+    std::vector<int>                indices;
     std::vector<std::vector<float>> kpss;
 
-    cv::Mat output = cv::Mat(
-            num_channels,
-            num_anchors,
-            CV_32F,
-            static_cast<float *>(this->host_ptrs[0])
-    );
-    output = output.t();
+    cv::Mat output = cv::Mat(num_channels, num_anchors, CV_32F, static_cast<float*>(this->host_ptrs[0]));
+    output         = output.t();
     for (int i = 0; i < num_anchors; i++) {
-        auto row_ptr = output.row(i).ptr<float>();
+        auto row_ptr    = output.row(i).ptr<float>();
         auto bboxes_ptr = row_ptr;
         auto scores_ptr = row_ptr + 4;
-        auto kps_ptr = row_ptr + 5;
+        auto kps_ptr    = row_ptr + 5;
 
         float score = *scores_ptr;
         if (score > score_thres) {
@@ -368,17 +268,17 @@ void YOLOv8_pose::postprocess(
             float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
 
             cv::Rect_<float> bbox;
-            bbox.x = x0;
-            bbox.y = y0;
-            bbox.width = x1 - x0;
+            bbox.x      = x0;
+            bbox.y      = y0;
+            bbox.width  = x1 - x0;
             bbox.height = y1 - y0;
             std::vector<float> kps;
             for (int k = 0; k < 17; k++) {
                 float kps_x = (*(kps_ptr + 3 * k) - dw) * ratio;
                 float kps_y = (*(kps_ptr + 3 * k + 1) - dh) * ratio;
                 float kps_s = *(kps_ptr + 3 * k + 2);
-                kps_x = clamp(kps_x, 0.f, width);
-                kps_y = clamp(kps_y, 0.f, height);
+                kps_x       = clamp(kps_x, 0.f, width);
+                kps_y       = clamp(kps_y, 0.f, height);
                 kps.push_back(kps_x);
                 kps.push_back(kps_y);
                 kps.push_back(kps_s);
@@ -392,110 +292,68 @@ void YOLOv8_pose::postprocess(
     }
 
 #ifdef BATCHED_NMS
-    cv::dnn::NMSBoxesBatched(
-            bboxes,
-            scores,
-            labels,
-            score_thres,
-            iou_thres,
-            indices
-    );
+    cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
 #else
-    cv::dnn::NMSBoxes(
-            bboxes,
-            scores,
-            score_thres,
-            iou_thres,
-            indices
-    );
+    cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
 #endif
 
     int cnt = 0;
-    for (auto &i: indices) {
+    for (auto& i : indices) {
         if (cnt >= topk) {
             break;
         }
         Object obj;
-        obj.rect = bboxes[i];
-        obj.prob = scores[i];
+        obj.rect  = bboxes[i];
+        obj.prob  = scores[i];
         obj.label = labels[i];
-        obj.kps = kpss[i];
+        obj.kps   = kpss[i];
         objs.push_back(obj);
         cnt += 1;
     }
 }
 
-void YOLOv8_pose::draw_objects(
-        const cv::Mat &image,
-        cv::Mat &res,
-        const std::vector<Object> &objs,
-        const std::vector<std::vector<unsigned int>> &SKELETON,
-        const std::vector<std::vector<unsigned int>> &KPS_COLORS,
-        const std::vector<std::vector<unsigned int>> &LIMB_COLORS
-) {
-    res = image.clone();
+void YOLOv8_pose::draw_objects(const cv::Mat&                                image,
+                               cv::Mat&                                      res,
+                               const std::vector<Object>&                    objs,
+                               const std::vector<std::vector<unsigned int>>& SKELETON,
+                               const std::vector<std::vector<unsigned int>>& KPS_COLORS,
+                               const std::vector<std::vector<unsigned int>>& LIMB_COLORS)
+{
+    res                 = image.clone();
     const int num_point = 17;
-    for (auto &obj: objs) {
-        cv::rectangle(
-                res,
-                obj.rect,
-                {0, 0, 255},
-                2
-        );
+    for (auto& obj : objs) {
+        cv::rectangle(res, obj.rect, {0, 0, 255}, 2);
 
         char text[256];
-        sprintf(
-                text,
-                "person %.1f%%",
-                obj.prob * 100
-        );
-
-        int baseLine = 0;
-        cv::Size label_size = cv::getTextSize(
-                text,
-                cv::FONT_HERSHEY_SIMPLEX,
-                0.4,
-                1,
-                &baseLine
-        );
-
-        int x = (int) obj.rect.x;
-        int y = (int) obj.rect.y + 1;
+        sprintf(text, "person %.1f%%", obj.prob * 100);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
 
         if (y > res.rows)
             y = res.rows;
 
-        cv::rectangle(
-                res,
-                cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-                {0, 0, 255},
-                -1
-        );
-
-        cv::putText(
-                res,
-                text,
-                cv::Point(x, y + label_size.height),
-                cv::FONT_HERSHEY_SIMPLEX,
-                0.4,
-                {255, 255, 255},
-                1
-        );
-
-        auto &kps = obj.kps;
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+
+        auto& kps = obj.kps;
         for (int k = 0; k < num_point + 2; k++) {
             if (k < num_point) {
-                int kps_x = std::round(kps[k * 3]);
-                int kps_y = std::round(kps[k * 3 + 1]);
+                int   kps_x = std::round(kps[k * 3]);
+                int   kps_y = std::round(kps[k * 3 + 1]);
                 float kps_s = kps[k * 3 + 2];
                 if (kps_s > 0.5f) {
                     cv::Scalar kps_color = cv::Scalar(KPS_COLORS[k][0], KPS_COLORS[k][1], KPS_COLORS[k][2]);
                     cv::circle(res, {kps_x, kps_y}, 5, kps_color, -1);
                 }
             }
-            auto &ske = SKELETON[k];
-            int pos1_x = std::round(kps[(ske[0] - 1) * 3]);
-            int pos1_y = std::round(kps[(ske[0] - 1) * 3 + 1]);
+            auto& ske    = SKELETON[k];
+            int   pos1_x = std::round(kps[(ske[0] - 1) * 3]);
+            int   pos1_y = std::round(kps[(ske[0] - 1) * 3 + 1]);
 
             int pos2_x = std::round(kps[(ske[1] - 1) * 3]);
             int pos2_y = std::round(kps[(ske[1] - 1) * 3 + 1]);
@@ -503,7 +361,6 @@ void YOLOv8_pose::draw_objects(
             float pos1_s = kps[(ske[0] - 1) * 3 + 2];
             float pos2_s = kps[(ske[1] - 1) * 3 + 2];
 
-
             if (pos1_s > 0.5f && pos2_s > 0.5f) {
                 cv::Scalar limb_color = cv::Scalar(LIMB_COLORS[k][0], LIMB_COLORS[k][1], LIMB_COLORS[k][2]);
                 cv::line(res, {pos1_x, pos1_y}, {pos2_x, pos2_y}, limb_color, 2);
@@ -512,4 +369,4 @@ void YOLOv8_pose::draw_objects(
     }
 }
 
-#endif //JETSON_POSE_YOLOV8_POSE_HPP
+#endif  // JETSON_POSE_YOLOV8_POSE_HPP
diff --git a/csrc/jetson/pose/main.cpp b/csrc/jetson/pose/main.cpp
index 8a0720f..50d3ac2 100644
--- a/csrc/jetson/pose/main.cpp
+++ b/csrc/jetson/pose/main.cpp
@@ -2,70 +2,69 @@
 // Created by ubuntu on 4/7/23.
 //
 #include "chrono"
-#include "yolov8-pose.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8-pose.hpp"
 
-
-const std::vector<std::vector<unsigned int>> KPS_COLORS =
-        {{0,   255, 0},
-         {0,   255, 0},
-         {0,   255, 0},
-         {0,   255, 0},
-         {0,   255, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255}};
+const std::vector<std::vector<unsigned int>> KPS_COLORS = {{0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255}};
 
 const std::vector<std::vector<unsigned int>> SKELETON = {{16, 14},
                                                          {14, 12},
                                                          {17, 15},
                                                          {15, 13},
                                                          {12, 13},
-                                                         {6,  12},
-                                                         {7,  13},
-                                                         {6,  7},
-                                                         {6,  8},
-                                                         {7,  9},
-                                                         {8,  10},
-                                                         {9,  11},
-                                                         {2,  3},
-                                                         {1,  2},
-                                                         {1,  3},
-                                                         {2,  4},
-                                                         {3,  5},
-                                                         {4,  6},
-                                                         {5,  7}};
+                                                         {6, 12},
+                                                         {7, 13},
+                                                         {6, 7},
+                                                         {6, 8},
+                                                         {7, 9},
+                                                         {8, 10},
+                                                         {9, 11},
+                                                         {2, 3},
+                                                         {1, 2},
+                                                         {1, 3},
+                                                         {2, 4},
+                                                         {3, 5},
+                                                         {4, 6},
+                                                         {5, 7}};
 
-const std::vector<std::vector<unsigned int>> LIMB_COLORS = {{51,  153, 255},
-                                                            {51,  153, 255},
-                                                            {51,  153, 255},
-                                                            {51,  153, 255},
-                                                            {255, 51,  255},
-                                                            {255, 51,  255},
-                                                            {255, 51,  255},
+const std::vector<std::vector<unsigned int>> LIMB_COLORS = {{51, 153, 255},
+                                                            {51, 153, 255},
+                                                            {51, 153, 255},
+                                                            {51, 153, 255},
+                                                            {255, 51, 255},
+                                                            {255, 51, 255},
+                                                            {255, 51, 255},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0}};
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0}};
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv)
+{
     // cuda:0
     cudaSetDevice(0);
 
@@ -73,7 +72,7 @@ int main(int argc, char **argv) {
     const std::string path{argv[2]};
 
     std::vector<std::string> imagePathList;
-    bool isVideo{false};
+    bool                     isVideo{false};
 
     assert(argc == 3);
 
@@ -82,34 +81,27 @@ int main(int argc, char **argv) {
 
     if (IsFile(path)) {
         std::string suffix = path.substr(path.find_last_of('.') + 1);
-        if (
-                suffix == "jpg" ||
-                suffix == "jpeg" ||
-                suffix == "png"
-                ) {
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
             imagePathList.push_back(path);
-        } else if (
-                suffix == "mp4" ||
-                suffix == "avi" ||
-                suffix == "m4v" ||
-                suffix == "mpeg" ||
-                suffix == "mov" ||
-                suffix == "mkv"
-                ) {
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
             isVideo = true;
-        } else {
+        }
+        else {
             printf("suffix %s is wrong !!!\n", suffix.c_str());
             std::abort();
         }
-    } else if (IsFolder(path)) {
+    }
+    else if (IsFolder(path)) {
         cv::glob(path + "/*.jpg", imagePathList);
     }
 
-    cv::Mat res, image;
-    cv::Size size = cv::Size{640, 640};
-    int topk = 100;
-    float score_thres = 0.25f;
-    float iou_thres = 0.65f;
+    cv::Mat  res, image;
+    cv::Size size        = cv::Size{640, 640};
+    int      topk        = 100;
+    float    score_thres = 0.25f;
+    float    iou_thres   = 0.65f;
 
     std::vector<Object> objs;
 
@@ -130,16 +122,16 @@ int main(int argc, char **argv) {
             auto end = std::chrono::system_clock::now();
             yolov8_pose->postprocess(objs, score_thres, iou_thres, topk);
             yolov8_pose->draw_objects(image, res, objs, SKELETON, KPS_COLORS, LIMB_COLORS);
-            auto tc = (double)
-                              std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
             printf("cost %2.4lf ms\n", tc);
             cv::imshow("result", res);
             if (cv::waitKey(10) == 'q') {
                 break;
             }
         }
-    } else {
-        for (auto &path: imagePathList) {
+    }
+    else {
+        for (auto& path : imagePathList) {
             objs.clear();
             image = cv::imread(path);
             yolov8_pose->copy_from_Mat(image, size);
@@ -148,8 +140,7 @@ int main(int argc, char **argv) {
             auto end = std::chrono::system_clock::now();
             yolov8_pose->postprocess(objs, score_thres, iou_thres, topk);
             yolov8_pose->draw_objects(image, res, objs, SKELETON, KPS_COLORS, LIMB_COLORS);
-            auto tc = (double)
-                              std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
             printf("cost %2.4lf ms\n", tc);
             cv::imshow("result", res);
             cv::waitKey(0);
diff --git a/csrc/jetson/segment/include/common.hpp b/csrc/jetson/segment/include/common.hpp
index 3db19e1..203228d 100644
--- a/csrc/jetson/segment/include/common.hpp
+++ b/csrc/jetson/segment/include/common.hpp
@@ -4,154 +4,139 @@
 
 #ifndef JETSON_SEGMENT_COMMON_HPP
 #define JETSON_SEGMENT_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace seg
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace seg {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-		cv::Mat boxMask;
-	};
+struct Object {
+    cv::Rect_<float> rect;
+    int              label = 0;
+    float            prob  = 0.0;
+    cv::Mat          boxMask;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //JETSON_SEGMENT_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace seg
+#endif  // JETSON_SEGMENT_COMMON_HPP
diff --git a/csrc/jetson/segment/include/yolov8-seg.hpp b/csrc/jetson/segment/include/yolov8-seg.hpp
index 259c389..662db8a 100644
--- a/csrc/jetson/segment/include/yolov8-seg.hpp
+++ b/csrc/jetson/segment/include/yolov8-seg.hpp
@@ -3,546 +3,355 @@
 //
 #ifndef JETSON_SEGMENT_YOLOV8_SEG_HPP
 #define JETSON_SEGMENT_YOLOV8_SEG_HPP
-#include <fstream>
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include <fstream>
 
 using namespace seg;
 
-class YOLOv8_seg
-{
+class YOLOv8_seg {
 public:
-	explicit YOLOv8_seg(const std::string& engine_file_path);
-	~YOLOv8_seg();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
-	void letterbox(
-		const cv::Mat& image,
-		cv::Mat& out,
-		cv::Size& size
-	);
-	void infer();
-	void postprocess(
-		std::vector<Object>& objs,
-		float score_thres = 0.25f,
-		float iou_thres = 0.65f,
-		int topk = 100,
-		int seg_channels = 32,
-		int seg_h = 160,
-		int seg_w = 160
-	);
-	static void draw_objects(
-		const cv::Mat& image,
-		cv::Mat& res,
-		const std::vector<Object>& objs,
-		const std::vector<std::string>& CLASS_NAMES,
-		const std::vector<std::vector<unsigned int>>& COLORS,
-		const std::vector<std::vector<unsigned int>>& MASK_COLORS
-	);
-	int num_bindings;
-	int num_inputs = 0;
-	int num_outputs = 0;
-	std::vector<Binding> input_bindings;
-	std::vector<Binding> output_bindings;
-	std::vector<void*> host_ptrs;
-	std::vector<void*> device_ptrs;
-
-	PreParam pparam;
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
+    explicit YOLOv8_seg(const std::string& engine_file_path);
+    ~YOLOv8_seg();
+
+    void                 make_pipe(bool warmup = true);
+    void                 copy_from_Mat(const cv::Mat& image);
+    void                 copy_from_Mat(const cv::Mat& image, cv::Size& size);
+    void                 letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
+    void                 infer();
+    void                 postprocess(std::vector<Object>& objs,
+                                     float                score_thres  = 0.25f,
+                                     float                iou_thres    = 0.65f,
+                                     int                  topk         = 100,
+                                     int                  seg_channels = 32,
+                                     int                  seg_h        = 160,
+                                     int                  seg_w        = 160);
+    static void          draw_objects(const cv::Mat&                                image,
+                                      cv::Mat&                                      res,
+                                      const std::vector<Object>&                    objs,
+                                      const std::vector<std::string>&               CLASS_NAMES,
+                                      const std::vector<std::vector<unsigned int>>& COLORS,
+                                      const std::vector<std::vector<unsigned int>>& MASK_COLORS);
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
+
+    PreParam pparam;
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
 YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)
 {
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-	delete[] trtModelStream;
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-	this->num_bindings = this->engine->getNbBindings();
-
-	for (int i = 0; i < this->num_bindings; ++i)
-	{
-		Binding binding;
-		nvinfer1::Dims dims;
-		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-		std::string name = this->engine->getBindingName(i);
-		binding.name = name;
-		binding.dsize = type_to_size(dtype);
-
-		bool IsInput = engine->bindingIsInput(i);
-		if (IsInput)
-		{
-			this->num_inputs += 1;
-			dims = this->engine->getProfileDimensions(
-				i,
-				0,
-				nvinfer1::OptProfileSelector::kMAX);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->input_bindings.push_back(binding);
-			// set max opt shape
-			this->context->setBindingDimensions(i, dims);
-
-		}
-		else
-		{
-			dims = this->context->getBindingDimensions(i);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->output_bindings.push_back(binding);
-			this->num_outputs += 1;
-		}
-	}
-
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char* trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+    delete[] trtModelStream;
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding            binding;
+        nvinfer1::Dims     dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
 }
 
 YOLOv8_seg::~YOLOv8_seg()
 {
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->device_ptrs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->host_ptrs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto& ptr : this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto& ptr : this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
 }
 
 void YOLOv8_seg::make_pipe(bool warmup)
 {
 
-	for (auto& bindings : this->input_bindings)
-	{
-		void* d_ptr;
-		CHECK(cudaMalloc(
-			&d_ptr,
-			bindings.size * bindings.dsize)
-		);
-		this->device_ptrs.push_back(d_ptr);
-	}
-
-	for (auto& bindings : this->output_bindings)
-	{
-		void* d_ptr, * h_ptr;
-		size_t size = bindings.size * bindings.dsize;
-		CHECK(cudaMalloc(
-			&d_ptr,
-			size)
-		);
-		CHECK(cudaHostAlloc(
-			&h_ptr,
-			size,
-			0)
-		);
-		this->device_ptrs.push_back(d_ptr);
-		this->host_ptrs.push_back(h_ptr);
-	}
-
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			for (auto& bindings : this->input_bindings)
-			{
-				size_t size = bindings.size * bindings.dsize;
-				void* h_ptr = malloc(size);
-				memset(h_ptr, 0, size);
-				CHECK(cudaMemcpyAsync(
-					this->device_ptrs[0],
-					h_ptr,
-					size,
-					cudaMemcpyHostToDevice,
-					this->stream)
-				);
-				free(h_ptr);
-			}
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMalloc(&d_ptr, size));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+    }
 }
 
-void YOLOv8_seg::letterbox(
-	const cv::Mat& image,
-	cv::Mat& out,
-	cv::Size& size
-)
+void YOLOv8_seg::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
 {
-	const float inp_h = size.height;
-	const float inp_w = size.width;
-	float height = image.rows;
-	float width = image.cols;
-
-	float r = std::min(inp_h / height, inp_w / width);
-	int padw = std::round(width * r);
-	int padh = std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(
-			image,
-			tmp,
-			cv::Size(padw, padh)
-		);
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float dw = inp_w - padw;
-	float dh = inp_h - padh;
-
-	dw /= 2.0f;
-	dh /= 2.0f;
-	int top = int(std::round(dh - 0.1f));
-	int bottom = int(std::round(dh + 0.1f));
-	int left = int(std::round(dw - 0.1f));
-	int right = int(std::round(dw + 0.1f));
-
-	cv::copyMakeBorder(
-		tmp,
-		tmp,
-		top,
-		bottom,
-		left,
-		right,
-		cv::BORDER_CONSTANT,
-		{ 114, 114, 114 }
-	);
-
-	cv::dnn::blobFromImage(tmp,
-		out,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F
-	);
-	this->pparam.ratio = 1 / r;
-	this->pparam.dw = dw;
-	this->pparam.dh = dh;
-	this->pparam.height = height;
-	this->pparam.width = width;;
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
+
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top    = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
+    this->pparam.height = height;
+    this->pparam.width  = width;
+    ;
 }
 
 void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
 {
-	cv::Mat nchw;
-	auto& in_binding = this->input_bindings[0];
-	auto width = in_binding.dims.d[3];
-	auto height = in_binding.dims.d[2];
-	cv::Size size{ width, height };
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{
-				4,
-				{ 1, 3, height, width }
-			}
-	);
-
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
+
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size)
 {
-	cv::Mat nchw;
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{ 4,
-			  { 1, 3, size.height, size.width }
-			}
-	);
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat nchw;
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8_seg::infer()
 {
 
-	this->context->enqueueV2(
-		this->device_ptrs.data(),
-		this->stream,
-		nullptr
-	);
-	for (int i = 0; i < this->num_outputs; i++)
-	{
-		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-			this->device_ptrs[i + this->num_inputs],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream)
-		);
-
-	}
-	cudaStreamSynchronize(this->stream);
-
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
+    }
+    cudaStreamSynchronize(this->stream);
 }
 
-void YOLOv8_seg::postprocess(std::vector<Object>& objs,
-	float score_thres,
-	float iou_thres,
-	int topk,
-	int seg_channels,
-	int seg_h,
-	int seg_w
-)
+void YOLOv8_seg::postprocess(
+    std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int seg_channels, int seg_h, int seg_w)
 {
-	objs.clear();
-	auto input_h = this->input_bindings[0].dims.d[2];
-	auto input_w = this->input_bindings[0].dims.d[3];
-	auto num_anchors = this->output_bindings[0].dims.d[1];
-	auto num_channels = this->output_bindings[0].dims.d[2];
-
-	auto& dw = this->pparam.dw;
-	auto& dh = this->pparam.dh;
-	auto& width = this->pparam.width;
-	auto& height = this->pparam.height;
-	auto& ratio = this->pparam.ratio;
-
-	auto* output = static_cast<float*>(this->host_ptrs[0]);
-	cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F,
-		static_cast<float*>(this->host_ptrs[1]));
-
-	std::vector<int> labels;
-	std::vector<float> scores;
-	std::vector<cv::Rect> bboxes;
-	std::vector<cv::Mat> mask_confs;
-	std::vector<int> indices;
-
-	for (int i = 0; i < num_anchors; i++)
-	{
-		float* ptr = output + i * num_channels;
-		float score = *(ptr + 4);
-		if (score > score_thres)
-		{
-			float x0 = *ptr++ - dw;
-			float y0 = *ptr++ - dh;
-			float x1 = *ptr++ - dw;
-			float y1 = *ptr++ - dh;
-
-			x0 = clamp(x0 * ratio, 0.f, width);
-			y0 = clamp(y0 * ratio, 0.f, height);
-			x1 = clamp(x1 * ratio, 0.f, width);
-			y1 = clamp(y1 * ratio, 0.f, height);
-
-			int label = *(++ptr);
-			cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
-			mask_confs.push_back(mask_conf);
-			labels.push_back(label);
-			scores.push_back(score);
-			bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
-
-		}
-	}
+    objs.clear();
+    auto input_h      = this->input_bindings[0].dims.d[2];
+    auto input_w      = this->input_bindings[0].dims.d[3];
+    auto num_anchors  = this->output_bindings[0].dims.d[1];
+    auto num_channels = this->output_bindings[0].dims.d[2];
+
+    auto& dw     = this->pparam.dw;
+    auto& dh     = this->pparam.dh;
+    auto& width  = this->pparam.width;
+    auto& height = this->pparam.height;
+    auto& ratio  = this->pparam.ratio;
+
+    auto*   output = static_cast<float*>(this->host_ptrs[0]);
+    cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F, static_cast<float*>(this->host_ptrs[1]));
+
+    std::vector<int>      labels;
+    std::vector<float>    scores;
+    std::vector<cv::Rect> bboxes;
+    std::vector<cv::Mat>  mask_confs;
+    std::vector<int>      indices;
+
+    for (int i = 0; i < num_anchors; i++) {
+        float* ptr   = output + i * num_channels;
+        float  score = *(ptr + 4);
+        if (score > score_thres) {
+            float x0 = *ptr++ - dw;
+            float y0 = *ptr++ - dh;
+            float x1 = *ptr++ - dw;
+            float y1 = *ptr++ - dh;
+
+            x0 = clamp(x0 * ratio, 0.f, width);
+            y0 = clamp(y0 * ratio, 0.f, height);
+            x1 = clamp(x1 * ratio, 0.f, width);
+            y1 = clamp(y1 * ratio, 0.f, height);
+
+            int     label     = *(++ptr);
+            cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
+            mask_confs.push_back(mask_conf);
+            labels.push_back(label);
+            scores.push_back(score);
+            bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
+        }
+    }
 
 #if defined(BATCHED_NMS)
-	cv::dnn::NMSBoxesBatched(
-		bboxes,
-		scores,
-		labels,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
 #else
-	cv::dnn::NMSBoxes(
-		bboxes,
-		scores,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
 #endif
 
-	cv::Mat masks;
-	int cnt = 0;
-	for (auto& i : indices)
-	{
-		if (cnt >= topk)
-		{
-			break;
-		}
-		cv::Rect tmp = bboxes[i];
-		Object obj;
-		obj.label = labels[i];
-		obj.rect = tmp;
-		obj.prob = scores[i];
-		masks.push_back(mask_confs[i]);
-		objs.push_back(obj);
-		cnt += 1;
-	}
-	if(masks.empty())
-	{
-		//masks is empty
-	}
-	else
-	{
-		cv::Mat matmulRes = (masks * protos).t();
-		cv::Mat maskMat = matmulRes.reshape(indices.size(), { seg_w, seg_h });
-
-		std::vector<cv::Mat> maskChannels;
-		cv::split(maskMat, maskChannels);
-		int scale_dw = dw / input_w * seg_w;
-		int scale_dh = dh / input_h * seg_h;
-
-		cv::Rect roi(
-			scale_dw,
-			scale_dh,
-			seg_w - 2 * scale_dw,
-			seg_h - 2 * scale_dh);
-
-		for (int i = 0; i < indices.size(); i++)
-		{
-			cv::Mat dest, mask;
-			cv::exp(-maskChannels[i], dest);
-			dest = 1.0 / (1.0 + dest);
-			dest = dest(roi);
-			cv::resize(
-				dest,
-				mask,
-				cv::Size((int)width, (int)height),
-				cv::INTER_LINEAR
-			);
-			objs[i].boxMask = mask(objs[i].rect) > 0.5f;
-		}
-	}
+    cv::Mat masks;
+    int     cnt = 0;
+    for (auto& i : indices) {
+        if (cnt >= topk) {
+            break;
+        }
+        cv::Rect tmp = bboxes[i];
+        Object   obj;
+        obj.label = labels[i];
+        obj.rect  = tmp;
+        obj.prob  = scores[i];
+        masks.push_back(mask_confs[i]);
+        objs.push_back(obj);
+        cnt += 1;
+    }
+    if (masks.empty()) {
+        // masks is empty
+    }
+    else {
+        cv::Mat matmulRes = (masks * protos).t();
+        cv::Mat maskMat   = matmulRes.reshape(indices.size(), {seg_w, seg_h});
+
+        std::vector<cv::Mat> maskChannels;
+        cv::split(maskMat, maskChannels);
+        int scale_dw = dw / input_w * seg_w;
+        int scale_dh = dh / input_h * seg_h;
+
+        cv::Rect roi(scale_dw, scale_dh, seg_w - 2 * scale_dw, seg_h - 2 * scale_dh);
+
+        for (int i = 0; i < indices.size(); i++) {
+            cv::Mat dest, mask;
+            cv::exp(-maskChannels[i], dest);
+            dest = 1.0 / (1.0 + dest);
+            dest = dest(roi);
+            cv::resize(dest, mask, cv::Size((int)width, (int)height), cv::INTER_LINEAR);
+            objs[i].boxMask = mask(objs[i].rect) > 0.5f;
+        }
+    }
 }
 
-void YOLOv8_seg::draw_objects(const cv::Mat& image,
-	cv::Mat& res,
-	const std::vector<Object>& objs,
-	const std::vector<std::string>& CLASS_NAMES,
-	const std::vector<std::vector<unsigned int>>& COLORS,
-	const std::vector<std::vector<unsigned int>>& MASK_COLORS
-)
+void YOLOv8_seg::draw_objects(const cv::Mat&                                image,
+                              cv::Mat&                                      res,
+                              const std::vector<Object>&                    objs,
+                              const std::vector<std::string>&               CLASS_NAMES,
+                              const std::vector<std::vector<unsigned int>>& COLORS,
+                              const std::vector<std::vector<unsigned int>>& MASK_COLORS)
 {
-	res = image.clone();
-	cv::Mat mask = image.clone();
-	for (auto& obj : objs)
-	{
-		int idx = obj.label;
-		cv::Scalar color = cv::Scalar(
-			COLORS[idx][0],
-			COLORS[idx][1],
-			COLORS[idx][2]
-		);
-		cv::Scalar mask_color = cv::Scalar(
-			MASK_COLORS[idx % 20][0],
-			MASK_COLORS[idx % 20][1],
-			MASK_COLORS[idx % 20][2]
-		);
-		cv::rectangle(
-			res,
-			obj.rect,
-			color,
-			2
-		);
-
-		char text[256];
-		sprintf(
-			text,
-			"%s %.1f%%",
-			CLASS_NAMES[idx].c_str(),
-			obj.prob * 100
-		);
-		mask(obj.rect).setTo(mask_color, obj.boxMask);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(
-			text,
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			1,
-			&baseLine
-		);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(
-			res,
-			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-			{ 0, 0, 255 },
-			-1
-		);
-
-		cv::putText(
-			res,
-			text,
-			cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			{ 255, 255, 255 },
-			1
-		);
-	}
-	cv::addWeighted(
-		res,
-		0.5,
-		mask,
-		0.8,
-		1,
-		res
-	);
+    res          = image.clone();
+    cv::Mat mask = image.clone();
+    for (auto& obj : objs) {
+        int        idx   = obj.label;
+        cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]);
+        cv::Scalar mask_color =
+            cv::Scalar(MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]);
+        cv::rectangle(res, obj.rect, color, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", CLASS_NAMES[idx].c_str(), obj.prob * 100);
+        mask(obj.rect).setTo(mask_color, obj.boxMask);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+    }
+    cv::addWeighted(res, 0.5, mask, 0.8, 1, res);
 }
-#endif //JETSON_SEGMENT_YOLOV8_SEG_HPP
+#endif  // JETSON_SEGMENT_YOLOV8_SEG_HPP
diff --git a/csrc/jetson/segment/main.cpp b/csrc/jetson/segment/main.cpp
index 2fadb9f..b41c737 100644
--- a/csrc/jetson/segment/main.cpp
+++ b/csrc/jetson/segment/main.cpp
@@ -2,177 +2,131 @@
 // Created by ubuntu on 3/16/23.
 //
 #include "chrono"
-#include "yolov8-seg.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8-seg.hpp"
 
 const std::vector<std::string> CLASS_NAMES = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-	"train", "truck", "boat", "traffic light", "fire hydrant",
-	"stop sign", "parking meter", "bench", "bird", "cat",
-	"dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella",
-	"handbag", "tie", "suitcase", "frisbee", "skis",
-	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-	"cup", "fork", "knife", "spoon", "bowl",
-	"banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake",
-	"chair", "couch", "potted plant", "bed", "dining table",
-	"toilet", "tv", "laptop", "mouse", "remote",
-	"keyboard", "cell phone", "microwave", "oven",
-	"toaster", "sink", "refrigerator", "book", "clock", "vase",
-	"scissors", "teddy bear", "hair drier", "toothbrush" };
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"};
 
 const std::vector<std::vector<unsigned int>> COLORS = {
-	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-	{ 80, 183, 189 }, { 128, 128, 0 }
-};
+    {0, 114, 189},   {217, 83, 25},   {237, 177, 32},  {126, 47, 142},  {119, 172, 48},  {77, 190, 238},
+    {162, 20, 47},   {76, 76, 76},    {153, 153, 153}, {255, 0, 0},     {255, 128, 0},   {191, 191, 0},
+    {0, 255, 0},     {0, 0, 255},     {170, 0, 255},   {85, 85, 0},     {85, 170, 0},    {85, 255, 0},
+    {170, 85, 0},    {170, 170, 0},   {170, 255, 0},   {255, 85, 0},    {255, 170, 0},   {255, 255, 0},
+    {0, 85, 128},    {0, 170, 128},   {0, 255, 128},   {85, 0, 128},    {85, 85, 128},   {85, 170, 128},
+    {85, 255, 128},  {170, 0, 128},   {170, 85, 128},  {170, 170, 128}, {170, 255, 128}, {255, 0, 128},
+    {255, 85, 128},  {255, 170, 128}, {255, 255, 128}, {0, 85, 255},    {0, 170, 255},   {0, 255, 255},
+    {85, 0, 255},    {85, 85, 255},   {85, 170, 255},  {85, 255, 255},  {170, 0, 255},   {170, 85, 255},
+    {170, 170, 255}, {170, 255, 255}, {255, 0, 255},   {255, 85, 255},  {255, 170, 255}, {85, 0, 0},
+    {128, 0, 0},     {170, 0, 0},     {212, 0, 0},     {255, 0, 0},     {0, 43, 0},      {0, 85, 0},
+    {0, 128, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},     {0, 0, 43},      {0, 0, 85},
+    {0, 0, 128},     {0, 0, 170},     {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {73, 73, 73},    {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189},
+    {80, 183, 189},  {128, 128, 0}};
 
 const std::vector<std::vector<unsigned int>> MASK_COLORS = {
-	{ 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 },
-	{ 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 },
-	{ 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 },
-	{ 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 },
-	{ 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 },
-	{ 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 },
-	{ 255, 149, 200 }, { 255, 55, 199 }
-};
+    {255, 56, 56},  {255, 157, 151}, {255, 112, 31}, {255, 178, 29}, {207, 210, 49},  {72, 249, 10}, {146, 204, 23},
+    {61, 219, 134}, {26, 147, 52},   {0, 212, 187},  {44, 153, 168}, {0, 194, 255},   {52, 69, 147}, {100, 115, 255},
+    {0, 24, 236},   {132, 56, 255},  {82, 0, 133},   {203, 56, 255}, {255, 149, 200}, {255, 55, 199}};
 
 int main(int argc, char** argv)
 {
-	// cuda:0
-	cudaSetDevice(0);
+    // cuda:0
+    cudaSetDevice(0);
 
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
 
-	std::vector<std::string> imagePathList;
-	bool isVideo{ false };
+    std::vector<std::string> imagePathList;
+    bool                     isVideo{false};
 
-	assert(argc == 3);
+    assert(argc == 3);
 
-	auto yolov8 = new YOLOv8_seg(engine_file_path);
-	yolov8->make_pipe(true);
+    auto yolov8 = new YOLOv8_seg(engine_file_path);
+    yolov8->make_pipe(true);
 
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (
-			suffix == "jpg" ||
-				suffix == "jpeg" ||
-				suffix == "png"
-			)
-		{
-			imagePathList.push_back(path);
-		}
-		else if (
-			suffix == "mp4" ||
-				suffix == "avi" ||
-				suffix == "m4v" ||
-				suffix == "mpeg" ||
-				suffix == "mov" ||
-				suffix == "mkv"
-			)
-		{
-			isVideo = true;
-		}
-		else
-		{
-			printf("suffix %s is wrong !!!\n", suffix.c_str());
-			std::abort();
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    }
+    else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
 
-	cv::Mat res, image;
-	cv::Size size = cv::Size{ 640, 640 };
-	int topk = 100;
-	int seg_h = 160;
-	int seg_w = 160;
-	int seg_channels = 32;
-	float score_thres = 0.25f;
-	float iou_thres = 0.65f;
+    cv::Mat  res, image;
+    cv::Size size         = cv::Size{640, 640};
+    int      topk         = 100;
+    int      seg_h        = 160;
+    int      seg_w        = 160;
+    int      seg_channels = 32;
+    float    score_thres  = 0.25f;
+    float    iou_thres    = 0.65f;
 
-	std::vector<Object> objs;
+    std::vector<Object> objs;
 
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
 
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
+    if (isVideo) {
+        cv::VideoCapture cap(path);
 
-		if (!cap.isOpened())
-		{
-			printf("can not open %s\n", path.c_str());
-			return -1;
-		}
-		while (cap.read(image))
-		{
-			objs.clear();
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			if (cv::waitKey(10) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto& path : imagePathList)
-		{
-			objs.clear();
-			image = cv::imread(path);
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    }
+    else {
+        for (auto& path : imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8;
+    return 0;
 }
diff --git a/csrc/pose/normal/include/common.hpp b/csrc/pose/normal/include/common.hpp
index c74fa80..cd1e14b 100644
--- a/csrc/pose/normal/include/common.hpp
+++ b/csrc/pose/normal/include/common.hpp
@@ -4,154 +4,139 @@
 
 #ifndef POSE_NORMAL_COMMON_HPP
 #define POSE_NORMAL_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace pose
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace pose {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-        std::vector<float> kps;
-	};
+struct Object {
+    cv::Rect_<float>   rect;
+    int                label = 0;
+    float              prob  = 0.0;
+    std::vector<float> kps;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //POSE_NORMAL_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace pose
+#endif  // POSE_NORMAL_COMMON_HPP
diff --git a/csrc/pose/normal/include/yolov8-pose.hpp b/csrc/pose/normal/include/yolov8-pose.hpp
index 1280df4..7d88524 100644
--- a/csrc/pose/normal/include/yolov8-pose.hpp
+++ b/csrc/pose/normal/include/yolov8-pose.hpp
@@ -4,73 +4,63 @@
 #ifndef POSE_NORMAL_YOLOv8_pose_HPP
 #define POSE_NORMAL_YOLOv8_pose_HPP
 
-#include "fstream"
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include "fstream"
 
 using namespace pose;
 
 class YOLOv8_pose {
 public:
-    explicit YOLOv8_pose(const std::string &engine_file_path);
+    explicit YOLOv8_pose(const std::string& engine_file_path);
 
     ~YOLOv8_pose();
 
     void make_pipe(bool warmup = true);
 
-    void copy_from_Mat(const cv::Mat &image);
+    void copy_from_Mat(const cv::Mat& image);
 
-    void copy_from_Mat(const cv::Mat &image, cv::Size &size);
+    void copy_from_Mat(const cv::Mat& image, cv::Size& size);
 
-    void letterbox(
-            const cv::Mat &image,
-            cv::Mat &out,
-            cv::Size &size
-    );
+    void letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
 
     void infer();
 
-    void postprocess(
-            std::vector<Object> &objs,
-            float score_thres = 0.25f,
-            float iou_thres = 0.65f,
-            int topk = 100
-    );
-
-    static void draw_objects(
-            const cv::Mat &image,
-            cv::Mat &res,
-            const std::vector<Object> &objs,
-            const std::vector<std::vector<unsigned int>> &SKELETON,
-            const std::vector<std::vector<unsigned int>> &KPS_COLORS,
-            const std::vector<std::vector<unsigned int>> &LIMB_COLORS
-    );
-
-    int num_bindings;
-    int num_inputs = 0;
-    int num_outputs = 0;
+    void postprocess(std::vector<Object>& objs, float score_thres = 0.25f, float iou_thres = 0.65f, int topk = 100);
+
+    static void draw_objects(const cv::Mat&                                image,
+                             cv::Mat&                                      res,
+                             const std::vector<Object>&                    objs,
+                             const std::vector<std::vector<unsigned int>>& SKELETON,
+                             const std::vector<std::vector<unsigned int>>& KPS_COLORS,
+                             const std::vector<std::vector<unsigned int>>& LIMB_COLORS);
+
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
     std::vector<Binding> input_bindings;
     std::vector<Binding> output_bindings;
-    std::vector<void *> host_ptrs;
-    std::vector<void *> device_ptrs;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
 
     PreParam pparam;
-private:
-    nvinfer1::ICudaEngine *engine = nullptr;
-    nvinfer1::IRuntime *runtime = nullptr;
-    nvinfer1::IExecutionContext *context = nullptr;
-    cudaStream_t stream = nullptr;
-    Logger gLogger{nvinfer1::ILogger::Severity::kERROR};
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
-YOLOv8_pose::YOLOv8_pose(const std::string &engine_file_path) {
+YOLOv8_pose::YOLOv8_pose(const std::string& engine_file_path)
+{
     std::ifstream file(engine_file_path, std::ios::binary);
     assert(file.good());
     file.seekg(0, std::ios::end);
     auto size = file.tellg();
     file.seekg(0, std::ios::beg);
-    char *trtModelStream = new char[size];
+    char* trtModelStream = new char[size];
     assert(trtModelStream);
     file.read(trtModelStream, size);
     file.close();
@@ -88,125 +78,97 @@ YOLOv8_pose::YOLOv8_pose(const std::string &engine_file_path) {
     this->num_bindings = this->engine->getNbBindings();
 
     for (int i = 0; i < this->num_bindings; ++i) {
-        Binding binding;
-        nvinfer1::Dims dims;
+        Binding            binding;
+        nvinfer1::Dims     dims;
         nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-        std::string name = this->engine->getBindingName(i);
-        binding.name = name;
-        binding.dsize = type_to_size(dtype);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
 
         bool IsInput = engine->bindingIsInput(i);
         if (IsInput) {
             this->num_inputs += 1;
-            dims = this->engine->getProfileDimensions(
-                    i,
-                    0,
-                    nvinfer1::OptProfileSelector::kMAX);
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
             binding.size = get_size_by_dims(dims);
             binding.dims = dims;
             this->input_bindings.push_back(binding);
             // set max opt shape
             this->context->setBindingDimensions(i, dims);
-
-        } else {
-            dims = this->context->getBindingDimensions(i);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
             binding.size = get_size_by_dims(dims);
             binding.dims = dims;
             this->output_bindings.push_back(binding);
             this->num_outputs += 1;
         }
     }
-
 }
 
-YOLOv8_pose::~YOLOv8_pose() {
+YOLOv8_pose::~YOLOv8_pose()
+{
     this->context->destroy();
     this->engine->destroy();
     this->runtime->destroy();
     cudaStreamDestroy(this->stream);
-    for (auto &ptr: this->device_ptrs) {
+    for (auto& ptr : this->device_ptrs) {
         CHECK(cudaFree(ptr));
     }
 
-    for (auto &ptr: this->host_ptrs) {
+    for (auto& ptr : this->host_ptrs) {
         CHECK(cudaFreeHost(ptr));
     }
-
 }
 
-void YOLOv8_pose::make_pipe(bool warmup) {
+void YOLOv8_pose::make_pipe(bool warmup)
+{
 
-    for (auto &bindings: this->input_bindings) {
-        void *d_ptr;
-        CHECK(cudaMallocAsync(
-                &d_ptr,
-                bindings.size * bindings.dsize,
-                this->stream)
-        );
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
         this->device_ptrs.push_back(d_ptr);
     }
 
-    for (auto &bindings: this->output_bindings) {
-        void *d_ptr, *h_ptr;
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
         size_t size = bindings.size * bindings.dsize;
-        CHECK(cudaMallocAsync(
-                &d_ptr,
-                size,
-                this->stream)
-        );
-        CHECK(cudaHostAlloc(
-                &h_ptr,
-                size,
-                0)
-        );
+        CHECK(cudaMallocAsync(&d_ptr, size, this->stream));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
         this->device_ptrs.push_back(d_ptr);
         this->host_ptrs.push_back(h_ptr);
     }
 
     if (warmup) {
         for (int i = 0; i < 10; i++) {
-            for (auto &bindings: this->input_bindings) {
-                size_t size = bindings.size * bindings.dsize;
-                void *h_ptr = malloc(size);
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
                 memset(h_ptr, 0, size);
-                CHECK(cudaMemcpyAsync(
-                        this->device_ptrs[0],
-                        h_ptr,
-                        size,
-                        cudaMemcpyHostToDevice,
-                        this->stream)
-                );
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
                 free(h_ptr);
             }
             this->infer();
         }
         printf("model warmup 10 times\n");
-
     }
 }
 
-void YOLOv8_pose::letterbox(
-        const cv::Mat &image,
-        cv::Mat &out,
-        cv::Size &size
-) {
-    const float inp_h = size.height;
-    const float inp_w = size.width;
-    float height = image.rows;
-    float width = image.cols;
+void YOLOv8_pose::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
+{
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
 
-    float r = std::min(inp_h / height, inp_w / width);
-    int padw = std::round(width * r);
-    int padh = std::round(height * r);
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
 
     cv::Mat tmp;
-    if ((int) width != padw || (int) height != padh) {
-        cv::resize(
-                image,
-                tmp,
-                cv::Size(padw, padh)
-        );
-    } else {
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
         tmp = image.clone();
     }
 
@@ -215,146 +177,83 @@ void YOLOv8_pose::letterbox(
 
     dw /= 2.0f;
     dh /= 2.0f;
-    int top = int(std::round(dh - 0.1f));
+    int top    = int(std::round(dh - 0.1f));
     int bottom = int(std::round(dh + 0.1f));
-    int left = int(std::round(dw - 0.1f));
-    int right = int(std::round(dw + 0.1f));
-
-    cv::copyMakeBorder(
-            tmp,
-            tmp,
-            top,
-            bottom,
-            left,
-            right,
-            cv::BORDER_CONSTANT,
-            {114, 114, 114}
-    );
-
-    cv::dnn::blobFromImage(tmp,
-                           out,
-                           1 / 255.f,
-                           cv::Size(),
-                           cv::Scalar(0, 0, 0),
-                           true,
-                           false,
-                           CV_32F
-    );
-    this->pparam.ratio = 1 / r;
-    this->pparam.dw = dw;
-    this->pparam.dh = dh;
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
     this->pparam.height = height;
-    this->pparam.width = width;;
+    this->pparam.width  = width;
+    ;
 }
 
-void YOLOv8_pose::copy_from_Mat(const cv::Mat &image) {
-    cv::Mat nchw;
-    auto &in_binding = this->input_bindings[0];
-    auto width = in_binding.dims.d[3];
-    auto height = in_binding.dims.d[2];
+void YOLOv8_pose::copy_from_Mat(const cv::Mat& image)
+{
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
     cv::Size size{width, height};
-    this->letterbox(
-            image,
-            nchw,
-            size
-    );
-
-    this->context->setBindingDimensions(
-            0,
-            nvinfer1::Dims
-                    {
-                            4,
-                            {1, 3, height, width}
-                    }
-    );
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
 
     CHECK(cudaMemcpyAsync(
-            this->device_ptrs[0],
-            nchw.ptr<float>(),
-            nchw.total() * nchw.elemSize(),
-            cudaMemcpyHostToDevice,
-            this->stream)
-    );
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
-void YOLOv8_pose::copy_from_Mat(const cv::Mat &image, cv::Size &size) {
+void YOLOv8_pose::copy_from_Mat(const cv::Mat& image, cv::Size& size)
+{
     cv::Mat nchw;
-    this->letterbox(
-            image,
-            nchw,
-            size
-    );
-    this->context->setBindingDimensions(
-            0,
-            nvinfer1::Dims
-                    {4,
-                     {1, 3, size.height, size.width}
-                    }
-    );
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
     CHECK(cudaMemcpyAsync(
-            this->device_ptrs[0],
-            nchw.ptr<float>(),
-            nchw.total() * nchw.elemSize(),
-            cudaMemcpyHostToDevice,
-            this->stream)
-    );
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
-void YOLOv8_pose::infer() {
+void YOLOv8_pose::infer()
+{
 
-    this->context->enqueueV2(
-            this->device_ptrs.data(),
-            this->stream,
-            nullptr
-    );
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
     for (int i = 0; i < this->num_outputs; i++) {
         size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-        CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-                              this->device_ptrs[i + this->num_inputs],
-                              osize,
-                              cudaMemcpyDeviceToHost,
-                              this->stream)
-        );
-
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
     }
     cudaStreamSynchronize(this->stream);
-
 }
 
-void YOLOv8_pose::postprocess(
-        std::vector<Object> &objs,
-        float score_thres,
-        float iou_thres,
-        int topk
-) {
+void YOLOv8_pose::postprocess(std::vector<Object>& objs, float score_thres, float iou_thres, int topk)
+{
     objs.clear();
     auto num_channels = this->output_bindings[0].dims.d[1];
-    auto num_anchors = this->output_bindings[0].dims.d[2];
-
-    auto &dw = this->pparam.dw;
-    auto &dh = this->pparam.dh;
-    auto &width = this->pparam.width;
-    auto &height = this->pparam.height;
-    auto &ratio = this->pparam.ratio;
-
-    std::vector<cv::Rect> bboxes;
-    std::vector<float> scores;
-    std::vector<int> labels;
-    std::vector<int> indices;
+    auto num_anchors  = this->output_bindings[0].dims.d[2];
+
+    auto& dw     = this->pparam.dw;
+    auto& dh     = this->pparam.dh;
+    auto& width  = this->pparam.width;
+    auto& height = this->pparam.height;
+    auto& ratio  = this->pparam.ratio;
+
+    std::vector<cv::Rect>           bboxes;
+    std::vector<float>              scores;
+    std::vector<int>                labels;
+    std::vector<int>                indices;
     std::vector<std::vector<float>> kpss;
 
-    cv::Mat output = cv::Mat(
-            num_channels,
-            num_anchors,
-            CV_32F,
-            static_cast<float *>(this->host_ptrs[0])
-    );
-    output = output.t();
+    cv::Mat output = cv::Mat(num_channels, num_anchors, CV_32F, static_cast<float*>(this->host_ptrs[0]));
+    output         = output.t();
     for (int i = 0; i < num_anchors; i++) {
-        auto row_ptr = output.row(i).ptr<float>();
+        auto row_ptr    = output.row(i).ptr<float>();
         auto bboxes_ptr = row_ptr;
         auto scores_ptr = row_ptr + 4;
-        auto kps_ptr = row_ptr + 5;
+        auto kps_ptr    = row_ptr + 5;
 
         float score = *scores_ptr;
         if (score > score_thres) {
@@ -369,17 +268,17 @@ void YOLOv8_pose::postprocess(
             float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
 
             cv::Rect_<float> bbox;
-            bbox.x = x0;
-            bbox.y = y0;
-            bbox.width = x1 - x0;
+            bbox.x      = x0;
+            bbox.y      = y0;
+            bbox.width  = x1 - x0;
             bbox.height = y1 - y0;
             std::vector<float> kps;
             for (int k = 0; k < 17; k++) {
                 float kps_x = (*(kps_ptr + 3 * k) - dw) * ratio;
                 float kps_y = (*(kps_ptr + 3 * k + 1) - dh) * ratio;
                 float kps_s = *(kps_ptr + 3 * k + 2);
-                kps_x = clamp(kps_x, 0.f, width);
-                kps_y = clamp(kps_y, 0.f, height);
+                kps_x       = clamp(kps_x, 0.f, width);
+                kps_y       = clamp(kps_y, 0.f, height);
                 kps.push_back(kps_x);
                 kps.push_back(kps_y);
                 kps.push_back(kps_s);
@@ -393,110 +292,68 @@ void YOLOv8_pose::postprocess(
     }
 
 #ifdef BATCHED_NMS
-    cv::dnn::NMSBoxesBatched(
-            bboxes,
-            scores,
-            labels,
-            score_thres,
-            iou_thres,
-            indices
-    );
+    cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
 #else
-    cv::dnn::NMSBoxes(
-        bboxes,
-        scores,
-        score_thres,
-        iou_thres,
-        indices
-    );
+    cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
 #endif
 
     int cnt = 0;
-    for (auto &i: indices) {
+    for (auto& i : indices) {
         if (cnt >= topk) {
             break;
         }
         Object obj;
-        obj.rect = bboxes[i];
-        obj.prob = scores[i];
+        obj.rect  = bboxes[i];
+        obj.prob  = scores[i];
         obj.label = labels[i];
-        obj.kps = kpss[i];
+        obj.kps   = kpss[i];
         objs.push_back(obj);
         cnt += 1;
     }
 }
 
-void YOLOv8_pose::draw_objects(
-        const cv::Mat &image,
-        cv::Mat &res,
-        const std::vector<Object> &objs,
-        const std::vector<std::vector<unsigned int>> &SKELETON,
-        const std::vector<std::vector<unsigned int>> &KPS_COLORS,
-        const std::vector<std::vector<unsigned int>> &LIMB_COLORS
-) {
-    res = image.clone();
+void YOLOv8_pose::draw_objects(const cv::Mat&                                image,
+                               cv::Mat&                                      res,
+                               const std::vector<Object>&                    objs,
+                               const std::vector<std::vector<unsigned int>>& SKELETON,
+                               const std::vector<std::vector<unsigned int>>& KPS_COLORS,
+                               const std::vector<std::vector<unsigned int>>& LIMB_COLORS)
+{
+    res                 = image.clone();
     const int num_point = 17;
-    for (auto &obj: objs) {
-        cv::rectangle(
-                res,
-                obj.rect,
-                {0, 0, 255},
-                2
-        );
+    for (auto& obj : objs) {
+        cv::rectangle(res, obj.rect, {0, 0, 255}, 2);
 
         char text[256];
-        sprintf(
-                text,
-                "person %.1f%%",
-                obj.prob * 100
-        );
-
-        int baseLine = 0;
-        cv::Size label_size = cv::getTextSize(
-                text,
-                cv::FONT_HERSHEY_SIMPLEX,
-                0.4,
-                1,
-                &baseLine
-        );
-
-        int x = (int) obj.rect.x;
-        int y = (int) obj.rect.y + 1;
+        sprintf(text, "person %.1f%%", obj.prob * 100);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
 
         if (y > res.rows)
             y = res.rows;
 
-        cv::rectangle(
-                res,
-                cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-                {0, 0, 255},
-                -1
-        );
-
-        cv::putText(
-                res,
-                text,
-                cv::Point(x, y + label_size.height),
-                cv::FONT_HERSHEY_SIMPLEX,
-                0.4,
-                {255, 255, 255},
-                1
-        );
-
-        auto &kps = obj.kps;
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+
+        auto& kps = obj.kps;
         for (int k = 0; k < num_point + 2; k++) {
             if (k < num_point) {
-                int kps_x = std::round(kps[k * 3]);
-                int kps_y = std::round(kps[k * 3 + 1]);
+                int   kps_x = std::round(kps[k * 3]);
+                int   kps_y = std::round(kps[k * 3 + 1]);
                 float kps_s = kps[k * 3 + 2];
                 if (kps_s > 0.5f) {
                     cv::Scalar kps_color = cv::Scalar(KPS_COLORS[k][0], KPS_COLORS[k][1], KPS_COLORS[k][2]);
                     cv::circle(res, {kps_x, kps_y}, 5, kps_color, -1);
                 }
             }
-            auto &ske = SKELETON[k];
-            int pos1_x = std::round(kps[(ske[0] - 1) * 3]);
-            int pos1_y = std::round(kps[(ske[0] - 1) * 3 + 1]);
+            auto& ske    = SKELETON[k];
+            int   pos1_x = std::round(kps[(ske[0] - 1) * 3]);
+            int   pos1_y = std::round(kps[(ske[0] - 1) * 3 + 1]);
 
             int pos2_x = std::round(kps[(ske[1] - 1) * 3]);
             int pos2_y = std::round(kps[(ske[1] - 1) * 3 + 1]);
@@ -504,7 +361,6 @@ void YOLOv8_pose::draw_objects(
             float pos1_s = kps[(ske[0] - 1) * 3 + 2];
             float pos2_s = kps[(ske[1] - 1) * 3 + 2];
 
-
             if (pos1_s > 0.5f && pos2_s > 0.5f) {
                 cv::Scalar limb_color = cv::Scalar(LIMB_COLORS[k][0], LIMB_COLORS[k][1], LIMB_COLORS[k][2]);
                 cv::line(res, {pos1_x, pos1_y}, {pos2_x, pos2_y}, limb_color, 2);
@@ -513,4 +369,4 @@ void YOLOv8_pose::draw_objects(
     }
 }
 
-#endif //POSE_NORMAL_YOLOv8_pose_HPP
+#endif  // POSE_NORMAL_YOLOv8_pose_HPP
diff --git a/csrc/pose/normal/main.cpp b/csrc/pose/normal/main.cpp
index 8a0720f..50d3ac2 100644
--- a/csrc/pose/normal/main.cpp
+++ b/csrc/pose/normal/main.cpp
@@ -2,70 +2,69 @@
 // Created by ubuntu on 4/7/23.
 //
 #include "chrono"
-#include "yolov8-pose.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8-pose.hpp"
 
-
-const std::vector<std::vector<unsigned int>> KPS_COLORS =
-        {{0,   255, 0},
-         {0,   255, 0},
-         {0,   255, 0},
-         {0,   255, 0},
-         {0,   255, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {255, 128, 0},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255},
-         {51,  153, 255}};
+const std::vector<std::vector<unsigned int>> KPS_COLORS = {{0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {0, 255, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {255, 128, 0},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255},
+                                                           {51, 153, 255}};
 
 const std::vector<std::vector<unsigned int>> SKELETON = {{16, 14},
                                                          {14, 12},
                                                          {17, 15},
                                                          {15, 13},
                                                          {12, 13},
-                                                         {6,  12},
-                                                         {7,  13},
-                                                         {6,  7},
-                                                         {6,  8},
-                                                         {7,  9},
-                                                         {8,  10},
-                                                         {9,  11},
-                                                         {2,  3},
-                                                         {1,  2},
-                                                         {1,  3},
-                                                         {2,  4},
-                                                         {3,  5},
-                                                         {4,  6},
-                                                         {5,  7}};
+                                                         {6, 12},
+                                                         {7, 13},
+                                                         {6, 7},
+                                                         {6, 8},
+                                                         {7, 9},
+                                                         {8, 10},
+                                                         {9, 11},
+                                                         {2, 3},
+                                                         {1, 2},
+                                                         {1, 3},
+                                                         {2, 4},
+                                                         {3, 5},
+                                                         {4, 6},
+                                                         {5, 7}};
 
-const std::vector<std::vector<unsigned int>> LIMB_COLORS = {{51,  153, 255},
-                                                            {51,  153, 255},
-                                                            {51,  153, 255},
-                                                            {51,  153, 255},
-                                                            {255, 51,  255},
-                                                            {255, 51,  255},
-                                                            {255, 51,  255},
+const std::vector<std::vector<unsigned int>> LIMB_COLORS = {{51, 153, 255},
+                                                            {51, 153, 255},
+                                                            {51, 153, 255},
+                                                            {51, 153, 255},
+                                                            {255, 51, 255},
+                                                            {255, 51, 255},
+                                                            {255, 51, 255},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
                                                             {255, 128, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0},
-                                                            {0,   255, 0}};
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0},
+                                                            {0, 255, 0}};
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv)
+{
     // cuda:0
     cudaSetDevice(0);
 
@@ -73,7 +72,7 @@ int main(int argc, char **argv) {
     const std::string path{argv[2]};
 
     std::vector<std::string> imagePathList;
-    bool isVideo{false};
+    bool                     isVideo{false};
 
     assert(argc == 3);
 
@@ -82,34 +81,27 @@ int main(int argc, char **argv) {
 
     if (IsFile(path)) {
         std::string suffix = path.substr(path.find_last_of('.') + 1);
-        if (
-                suffix == "jpg" ||
-                suffix == "jpeg" ||
-                suffix == "png"
-                ) {
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
             imagePathList.push_back(path);
-        } else if (
-                suffix == "mp4" ||
-                suffix == "avi" ||
-                suffix == "m4v" ||
-                suffix == "mpeg" ||
-                suffix == "mov" ||
-                suffix == "mkv"
-                ) {
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
             isVideo = true;
-        } else {
+        }
+        else {
             printf("suffix %s is wrong !!!\n", suffix.c_str());
             std::abort();
         }
-    } else if (IsFolder(path)) {
+    }
+    else if (IsFolder(path)) {
         cv::glob(path + "/*.jpg", imagePathList);
     }
 
-    cv::Mat res, image;
-    cv::Size size = cv::Size{640, 640};
-    int topk = 100;
-    float score_thres = 0.25f;
-    float iou_thres = 0.65f;
+    cv::Mat  res, image;
+    cv::Size size        = cv::Size{640, 640};
+    int      topk        = 100;
+    float    score_thres = 0.25f;
+    float    iou_thres   = 0.65f;
 
     std::vector<Object> objs;
 
@@ -130,16 +122,16 @@ int main(int argc, char **argv) {
             auto end = std::chrono::system_clock::now();
             yolov8_pose->postprocess(objs, score_thres, iou_thres, topk);
             yolov8_pose->draw_objects(image, res, objs, SKELETON, KPS_COLORS, LIMB_COLORS);
-            auto tc = (double)
-                              std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
             printf("cost %2.4lf ms\n", tc);
             cv::imshow("result", res);
             if (cv::waitKey(10) == 'q') {
                 break;
             }
         }
-    } else {
-        for (auto &path: imagePathList) {
+    }
+    else {
+        for (auto& path : imagePathList) {
             objs.clear();
             image = cv::imread(path);
             yolov8_pose->copy_from_Mat(image, size);
@@ -148,8 +140,7 @@ int main(int argc, char **argv) {
             auto end = std::chrono::system_clock::now();
             yolov8_pose->postprocess(objs, score_thres, iou_thres, topk);
             yolov8_pose->draw_objects(image, res, objs, SKELETON, KPS_COLORS, LIMB_COLORS);
-            auto tc = (double)
-                              std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
             printf("cost %2.4lf ms\n", tc);
             cv::imshow("result", res);
             cv::waitKey(0);
diff --git a/csrc/segment/normal/include/common.hpp b/csrc/segment/normal/include/common.hpp
index f9a40c0..cca435a 100644
--- a/csrc/segment/normal/include/common.hpp
+++ b/csrc/segment/normal/include/common.hpp
@@ -4,154 +4,139 @@
 
 #ifndef SEGMENT_NORMAL_COMMON_HPP
 #define SEGMENT_NORMAL_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace seg
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace seg {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-		cv::Mat boxMask;
-	};
+struct Object {
+    cv::Rect_<float> rect;
+    int              label = 0;
+    float            prob  = 0.0;
+    cv::Mat          boxMask;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //SEGMENT_NORMAL_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace seg
+#endif  // SEGMENT_NORMAL_COMMON_HPP
diff --git a/csrc/segment/normal/include/yolov8-seg.hpp b/csrc/segment/normal/include/yolov8-seg.hpp
index 80691f8..e6d8cad 100644
--- a/csrc/segment/normal/include/yolov8-seg.hpp
+++ b/csrc/segment/normal/include/yolov8-seg.hpp
@@ -3,578 +3,382 @@
 //
 #ifndef SEGMENT_NORMAL_YOLOV8_SEG_HPP
 #define SEGMENT_NORMAL_YOLOV8_SEG_HPP
-#include <fstream>
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include <fstream>
 
 using namespace seg;
 
-class YOLOv8_seg
-{
+class YOLOv8_seg {
 public:
-	explicit YOLOv8_seg(const std::string& engine_file_path);
-	~YOLOv8_seg();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
-	void letterbox(
-		const cv::Mat& image,
-		cv::Mat& out,
-		cv::Size& size
-	);
-	void infer();
-	void postprocess(
-		std::vector<Object>& objs,
-		float score_thres = 0.25f,
-		float iou_thres = 0.65f,
-		int topk = 100,
-		int seg_channels = 32,
-		int seg_h = 160,
-		int seg_w = 160
-	);
-	static void draw_objects(
-		const cv::Mat& image,
-		cv::Mat& res,
-		const std::vector<Object>& objs,
-		const std::vector<std::string>& CLASS_NAMES,
-		const std::vector<std::vector<unsigned int>>& COLORS,
-		const std::vector<std::vector<unsigned int>>& MASK_COLORS
-	);
-	int num_bindings;
-	int num_inputs = 0;
-	int num_outputs = 0;
-	std::vector<Binding> input_bindings;
-	std::vector<Binding> output_bindings;
-	std::vector<void*> host_ptrs;
-	std::vector<void*> device_ptrs;
-
-	PreParam pparam;
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
+    explicit YOLOv8_seg(const std::string& engine_file_path);
+    ~YOLOv8_seg();
+
+    void                 make_pipe(bool warmup = true);
+    void                 copy_from_Mat(const cv::Mat& image);
+    void                 copy_from_Mat(const cv::Mat& image, cv::Size& size);
+    void                 letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
+    void                 infer();
+    void                 postprocess(std::vector<Object>& objs,
+                                     float                score_thres  = 0.25f,
+                                     float                iou_thres    = 0.65f,
+                                     int                  topk         = 100,
+                                     int                  seg_channels = 32,
+                                     int                  seg_h        = 160,
+                                     int                  seg_w        = 160);
+    static void          draw_objects(const cv::Mat&                                image,
+                                      cv::Mat&                                      res,
+                                      const std::vector<Object>&                    objs,
+                                      const std::vector<std::string>&               CLASS_NAMES,
+                                      const std::vector<std::vector<unsigned int>>& COLORS,
+                                      const std::vector<std::vector<unsigned int>>& MASK_COLORS);
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
+
+    PreParam pparam;
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
 YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)
 {
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-	delete[] trtModelStream;
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-	this->num_bindings = this->engine->getNbBindings();
-
-	for (int i = 0; i < this->num_bindings; ++i)
-	{
-		Binding binding;
-		nvinfer1::Dims dims;
-		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-		std::string name = this->engine->getBindingName(i);
-		binding.name = name;
-		binding.dsize = type_to_size(dtype);
-
-		bool IsInput = engine->bindingIsInput(i);
-		if (IsInput)
-		{
-			this->num_inputs += 1;
-			dims = this->engine->getProfileDimensions(
-				i,
-				0,
-				nvinfer1::OptProfileSelector::kMAX);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->input_bindings.push_back(binding);
-			// set max opt shape
-			this->context->setBindingDimensions(i, dims);
-
-		}
-		else
-		{
-			dims = this->context->getBindingDimensions(i);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->output_bindings.push_back(binding);
-			this->num_outputs += 1;
-		}
-	}
-
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char* trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+    delete[] trtModelStream;
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding            binding;
+        nvinfer1::Dims     dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
 }
 
 YOLOv8_seg::~YOLOv8_seg()
 {
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->device_ptrs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->host_ptrs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto& ptr : this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto& ptr : this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
 }
 
 void YOLOv8_seg::make_pipe(bool warmup)
 {
 
-	for (auto& bindings : this->input_bindings)
-	{
-		void* d_ptr;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			bindings.size * bindings.dsize,
-			this->stream)
-		);
-		this->device_ptrs.push_back(d_ptr);
-	}
-
-	for (auto& bindings : this->output_bindings)
-	{
-		void* d_ptr, * h_ptr;
-		size_t size = bindings.size * bindings.dsize;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			size,
-			this->stream)
-		);
-		CHECK(cudaHostAlloc(
-			&h_ptr,
-			size,
-			0)
-		);
-		this->device_ptrs.push_back(d_ptr);
-		this->host_ptrs.push_back(h_ptr);
-	}
-
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			for (auto& bindings : this->input_bindings)
-			{
-				size_t size = bindings.size * bindings.dsize;
-				void* h_ptr = malloc(size);
-				memset(h_ptr, 0, size);
-				CHECK(cudaMemcpyAsync(
-					this->device_ptrs[0],
-					h_ptr,
-					size,
-					cudaMemcpyHostToDevice,
-					this->stream)
-				);
-				free(h_ptr);
-			}
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMallocAsync(&d_ptr, size, this->stream));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+    }
 }
 
-void YOLOv8_seg::letterbox(
-	const cv::Mat& image,
-	cv::Mat& out,
-	cv::Size& size
-)
+void YOLOv8_seg::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
 {
-	const float inp_h = size.height;
-	const float inp_w = size.width;
-	float height = image.rows;
-	float width = image.cols;
-
-	float r = std::min(inp_h / height, inp_w / width);
-	int padw = std::round(width * r);
-	int padh = std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(
-			image,
-			tmp,
-			cv::Size(padw, padh)
-		);
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float dw = inp_w - padw;
-	float dh = inp_h - padh;
-
-	dw /= 2.0f;
-	dh /= 2.0f;
-	int top = int(std::round(dh - 0.1f));
-	int bottom = int(std::round(dh + 0.1f));
-	int left = int(std::round(dw - 0.1f));
-	int right = int(std::round(dw + 0.1f));
-
-	cv::copyMakeBorder(
-		tmp,
-		tmp,
-		top,
-		bottom,
-		left,
-		right,
-		cv::BORDER_CONSTANT,
-		{ 114, 114, 114 }
-	);
-
-	cv::dnn::blobFromImage(tmp,
-		out,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F
-	);
-	this->pparam.ratio = 1 / r;
-	this->pparam.dw = dw;
-	this->pparam.dh = dh;
-	this->pparam.height = height;
-	this->pparam.width = width;;
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
+
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top    = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
+    this->pparam.height = height;
+    this->pparam.width  = width;
+    ;
 }
 
 void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
 {
-	cv::Mat nchw;
-	auto& in_binding = this->input_bindings[0];
-	auto width = in_binding.dims.d[3];
-	auto height = in_binding.dims.d[2];
-	cv::Size size{ width, height };
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{
-				4,
-				{ 1, 3, height, width }
-			}
-	);
-
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
+
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size)
 {
-	cv::Mat nchw;
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{ 4,
-			  { 1, 3, size.height, size.width }
-			}
-	);
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat nchw;
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8_seg::infer()
 {
 
-	this->context->enqueueV2(
-		this->device_ptrs.data(),
-		this->stream,
-		nullptr
-	);
-	for (int i = 0; i < this->num_outputs; i++)
-	{
-		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-			this->device_ptrs[i + this->num_inputs],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream)
-		);
-
-	}
-	cudaStreamSynchronize(this->stream);
-
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
+    }
+    cudaStreamSynchronize(this->stream);
 }
 
-void YOLOv8_seg::postprocess(std::vector<Object>& objs,
-	float score_thres,
-	float iou_thres,
-	int topk,
-	int seg_channels,
-	int seg_h,
-	int seg_w
-)
+void YOLOv8_seg::postprocess(
+    std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int seg_channels, int seg_h, int seg_w)
 {
-	objs.clear();
-	auto input_h = this->input_bindings[0].dims.d[2];
-	auto input_w = this->input_bindings[0].dims.d[3];
-	int num_channels, num_anchors, num_classes;
-	bool flag = false;
-	int bid;
-	int bcnt = -1;
-	for (auto& o : this->output_bindings)
-	{
-		bcnt += 1;
-		if (o.dims.nbDims == 3)
-		{
-			num_channels = o.dims.d[1];
-			num_anchors = o.dims.d[2];
-			flag = true;
-			bid = bcnt;
-		}
-	}
-	assert(flag);
-	num_classes = num_channels - seg_channels - 4;
-
-	auto& dw = this->pparam.dw;
-	auto& dh = this->pparam.dh;
-	auto& width = this->pparam.width;
-	auto& height = this->pparam.height;
-	auto& ratio = this->pparam.ratio;
-
-	cv::Mat output = cv::Mat(num_channels, num_anchors, CV_32F,
-		static_cast<float*>(this->host_ptrs[bid]));
-	output = output.t();
-
-	cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F,
-		static_cast<float*>(this->host_ptrs[1 - bid]));
-
-	std::vector<int> labels;
-	std::vector<float> scores;
-	std::vector<cv::Rect> bboxes;
-	std::vector<cv::Mat> mask_confs;
-	std::vector<int> indices;
-
-	for (int i = 0; i < num_anchors; i++)
-	{
-		auto row_ptr = output.row(i).ptr<float>();
-		auto bboxes_ptr = row_ptr;
-		auto scores_ptr = row_ptr + 4;
-		auto mask_confs_ptr = row_ptr + 4 + num_classes;
-		auto max_s_ptr = std::max_element(scores_ptr, scores_ptr + num_classes);
-		float score = *max_s_ptr;
-		if (score > score_thres)
-		{
-			float x = *bboxes_ptr++ - dw;
-			float y = *bboxes_ptr++ - dh;
-			float w = *bboxes_ptr++;
-			float h = *bboxes_ptr;
-
-			float x0 = clamp((x - 0.5f * w) * ratio, 0.f, width);
-			float y0 = clamp((y - 0.5f * h) * ratio, 0.f, height);
-			float x1 = clamp((x + 0.5f * w) * ratio, 0.f, width);
-			float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
-
-			int label = max_s_ptr - scores_ptr;
-			cv::Rect_<float> bbox;
-			bbox.x = x0;
-			bbox.y = y0;
-			bbox.width = x1 - x0;
-			bbox.height = y1 - y0;
-
-			cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, mask_confs_ptr);
-
-			bboxes.push_back(bbox);
-			labels.push_back(label);
-			scores.push_back(score);
-			mask_confs.push_back(mask_conf);
-		}
-	}
+    objs.clear();
+    auto input_h = this->input_bindings[0].dims.d[2];
+    auto input_w = this->input_bindings[0].dims.d[3];
+    int  num_channels, num_anchors, num_classes;
+    bool flag = false;
+    int  bid;
+    int  bcnt = -1;
+    for (auto& o : this->output_bindings) {
+        bcnt += 1;
+        if (o.dims.nbDims == 3) {
+            num_channels = o.dims.d[1];
+            num_anchors  = o.dims.d[2];
+            flag         = true;
+            bid          = bcnt;
+        }
+    }
+    assert(flag);
+    num_classes = num_channels - seg_channels - 4;
+
+    auto& dw     = this->pparam.dw;
+    auto& dh     = this->pparam.dh;
+    auto& width  = this->pparam.width;
+    auto& height = this->pparam.height;
+    auto& ratio  = this->pparam.ratio;
+
+    cv::Mat output = cv::Mat(num_channels, num_anchors, CV_32F, static_cast<float*>(this->host_ptrs[bid]));
+    output         = output.t();
+
+    cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F, static_cast<float*>(this->host_ptrs[1 - bid]));
+
+    std::vector<int>      labels;
+    std::vector<float>    scores;
+    std::vector<cv::Rect> bboxes;
+    std::vector<cv::Mat>  mask_confs;
+    std::vector<int>      indices;
+
+    for (int i = 0; i < num_anchors; i++) {
+        auto  row_ptr        = output.row(i).ptr<float>();
+        auto  bboxes_ptr     = row_ptr;
+        auto  scores_ptr     = row_ptr + 4;
+        auto  mask_confs_ptr = row_ptr + 4 + num_classes;
+        auto  max_s_ptr      = std::max_element(scores_ptr, scores_ptr + num_classes);
+        float score          = *max_s_ptr;
+        if (score > score_thres) {
+            float x = *bboxes_ptr++ - dw;
+            float y = *bboxes_ptr++ - dh;
+            float w = *bboxes_ptr++;
+            float h = *bboxes_ptr;
+
+            float x0 = clamp((x - 0.5f * w) * ratio, 0.f, width);
+            float y0 = clamp((y - 0.5f * h) * ratio, 0.f, height);
+            float x1 = clamp((x + 0.5f * w) * ratio, 0.f, width);
+            float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);
+
+            int              label = max_s_ptr - scores_ptr;
+            cv::Rect_<float> bbox;
+            bbox.x      = x0;
+            bbox.y      = y0;
+            bbox.width  = x1 - x0;
+            bbox.height = y1 - y0;
+
+            cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, mask_confs_ptr);
+
+            bboxes.push_back(bbox);
+            labels.push_back(label);
+            scores.push_back(score);
+            mask_confs.push_back(mask_conf);
+        }
+    }
 
 #if defined(BATCHED_NMS)
-	cv::dnn::NMSBoxesBatched(
-		bboxes,
-		scores,
-		labels,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
 #else
-	cv::dnn::NMSBoxes(
-		bboxes,
-		scores,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
 #endif
 
-	cv::Mat masks;
-	int cnt = 0;
-	for (auto& i : indices)
-	{
-		if (cnt >= topk)
-		{
-			break;
-		}
-		cv::Rect tmp = bboxes[i];
-		Object obj;
-		obj.label = labels[i];
-		obj.rect = tmp;
-		obj.prob = scores[i];
-		masks.push_back(mask_confs[i]);
-		objs.push_back(obj);
-		cnt += 1;
-	}
-
-	if(masks.empty())
-	{
-		//masks is empty
-	}
-	else
-	{
-		cv::Mat matmulRes = (masks * protos).t();
-		cv::Mat maskMat = matmulRes.reshape(indices.size(), { seg_w, seg_h });
-
-		std::vector<cv::Mat> maskChannels;
-		cv::split(maskMat, maskChannels);
-		int scale_dw = dw / input_w * seg_w;
-		int scale_dh = dh / input_h * seg_h;
-
-		cv::Rect roi(
-			scale_dw,
-			scale_dh,
-			seg_w - 2 * scale_dw,
-			seg_h - 2 * scale_dh);
-
-		for (int i = 0; i < indices.size(); i++)
-		{
-			cv::Mat dest, mask;
-			cv::exp(-maskChannels[i], dest);
-			dest = 1.0 / (1.0 + dest);
-			dest = dest(roi);
-			cv::resize(
-				dest,
-				mask,
-				cv::Size((int)width, (int)height),
-				cv::INTER_LINEAR
-			);
-			objs[i].boxMask = mask(objs[i].rect) > 0.5f;
-		}
-	}
-
+    cv::Mat masks;
+    int     cnt = 0;
+    for (auto& i : indices) {
+        if (cnt >= topk) {
+            break;
+        }
+        cv::Rect tmp = bboxes[i];
+        Object   obj;
+        obj.label = labels[i];
+        obj.rect  = tmp;
+        obj.prob  = scores[i];
+        masks.push_back(mask_confs[i]);
+        objs.push_back(obj);
+        cnt += 1;
+    }
+
+    if (masks.empty()) {
+        // masks is empty
+    }
+    else {
+        cv::Mat matmulRes = (masks * protos).t();
+        cv::Mat maskMat   = matmulRes.reshape(indices.size(), {seg_w, seg_h});
+
+        std::vector<cv::Mat> maskChannels;
+        cv::split(maskMat, maskChannels);
+        int scale_dw = dw / input_w * seg_w;
+        int scale_dh = dh / input_h * seg_h;
+
+        cv::Rect roi(scale_dw, scale_dh, seg_w - 2 * scale_dw, seg_h - 2 * scale_dh);
+
+        for (int i = 0; i < indices.size(); i++) {
+            cv::Mat dest, mask;
+            cv::exp(-maskChannels[i], dest);
+            dest = 1.0 / (1.0 + dest);
+            dest = dest(roi);
+            cv::resize(dest, mask, cv::Size((int)width, (int)height), cv::INTER_LINEAR);
+            objs[i].boxMask = mask(objs[i].rect) > 0.5f;
+        }
+    }
 }
 
-void YOLOv8_seg::draw_objects(const cv::Mat& image,
-	cv::Mat& res,
-	const std::vector<Object>& objs,
-	const std::vector<std::string>& CLASS_NAMES,
-	const std::vector<std::vector<unsigned int>>& COLORS,
-	const std::vector<std::vector<unsigned int>>& MASK_COLORS
-)
+void YOLOv8_seg::draw_objects(const cv::Mat&                                image,
+                              cv::Mat&                                      res,
+                              const std::vector<Object>&                    objs,
+                              const std::vector<std::string>&               CLASS_NAMES,
+                              const std::vector<std::vector<unsigned int>>& COLORS,
+                              const std::vector<std::vector<unsigned int>>& MASK_COLORS)
 {
-	res = image.clone();
-	cv::Mat mask = image.clone();
-	for (auto& obj : objs)
-	{
-		int idx = obj.label;
-		cv::Scalar color = cv::Scalar(
-			COLORS[idx][0],
-			COLORS[idx][1],
-			COLORS[idx][2]
-		);
-		cv::Scalar mask_color = cv::Scalar(
-			MASK_COLORS[idx % 20][0],
-			MASK_COLORS[idx % 20][1],
-			MASK_COLORS[idx % 20][2]
-		);
-		cv::rectangle(
-			res,
-			obj.rect,
-			color,
-			2
-		);
-
-		char text[256];
-		sprintf(
-			text,
-			"%s %.1f%%",
-			CLASS_NAMES[idx].c_str(),
-			obj.prob * 100
-		);
-		mask(obj.rect).setTo(mask_color, obj.boxMask);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(
-			text,
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			1,
-			&baseLine
-		);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(
-			res,
-			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-			{ 0, 0, 255 },
-			-1
-		);
-
-		cv::putText(
-			res,
-			text,
-			cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			{ 255, 255, 255 },
-			1
-		);
-	}
-	cv::addWeighted(
-		res,
-		0.5,
-		mask,
-		0.8,
-		1,
-		res
-	);
+    res          = image.clone();
+    cv::Mat mask = image.clone();
+    for (auto& obj : objs) {
+        int        idx   = obj.label;
+        cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]);
+        cv::Scalar mask_color =
+            cv::Scalar(MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]);
+        cv::rectangle(res, obj.rect, color, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", CLASS_NAMES[idx].c_str(), obj.prob * 100);
+        mask(obj.rect).setTo(mask_color, obj.boxMask);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+    }
+    cv::addWeighted(res, 0.5, mask, 0.8, 1, res);
 }
-#endif //SEGMENT_NORMAL_YOLOV8_SEG_HPP
+#endif  // SEGMENT_NORMAL_YOLOV8_SEG_HPP
diff --git a/csrc/segment/normal/main.cpp b/csrc/segment/normal/main.cpp
index bc67e57..e6e8f8b 100644
--- a/csrc/segment/normal/main.cpp
+++ b/csrc/segment/normal/main.cpp
@@ -2,177 +2,131 @@
 // Created by ubuntu on 2/8/23.
 //
 #include "chrono"
-#include "yolov8-seg.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8-seg.hpp"
 
 const std::vector<std::string> CLASS_NAMES = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-	"train", "truck", "boat", "traffic light", "fire hydrant",
-	"stop sign", "parking meter", "bench", "bird", "cat",
-	"dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella",
-	"handbag", "tie", "suitcase", "frisbee", "skis",
-	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-	"cup", "fork", "knife", "spoon", "bowl",
-	"banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake",
-	"chair", "couch", "potted plant", "bed", "dining table",
-	"toilet", "tv", "laptop", "mouse", "remote",
-	"keyboard", "cell phone", "microwave", "oven",
-	"toaster", "sink", "refrigerator", "book", "clock", "vase",
-	"scissors", "teddy bear", "hair drier", "toothbrush" };
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"};
 
 const std::vector<std::vector<unsigned int>> COLORS = {
-	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-	{ 80, 183, 189 }, { 128, 128, 0 }
-};
+    {0, 114, 189},   {217, 83, 25},   {237, 177, 32},  {126, 47, 142},  {119, 172, 48},  {77, 190, 238},
+    {162, 20, 47},   {76, 76, 76},    {153, 153, 153}, {255, 0, 0},     {255, 128, 0},   {191, 191, 0},
+    {0, 255, 0},     {0, 0, 255},     {170, 0, 255},   {85, 85, 0},     {85, 170, 0},    {85, 255, 0},
+    {170, 85, 0},    {170, 170, 0},   {170, 255, 0},   {255, 85, 0},    {255, 170, 0},   {255, 255, 0},
+    {0, 85, 128},    {0, 170, 128},   {0, 255, 128},   {85, 0, 128},    {85, 85, 128},   {85, 170, 128},
+    {85, 255, 128},  {170, 0, 128},   {170, 85, 128},  {170, 170, 128}, {170, 255, 128}, {255, 0, 128},
+    {255, 85, 128},  {255, 170, 128}, {255, 255, 128}, {0, 85, 255},    {0, 170, 255},   {0, 255, 255},
+    {85, 0, 255},    {85, 85, 255},   {85, 170, 255},  {85, 255, 255},  {170, 0, 255},   {170, 85, 255},
+    {170, 170, 255}, {170, 255, 255}, {255, 0, 255},   {255, 85, 255},  {255, 170, 255}, {85, 0, 0},
+    {128, 0, 0},     {170, 0, 0},     {212, 0, 0},     {255, 0, 0},     {0, 43, 0},      {0, 85, 0},
+    {0, 128, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},     {0, 0, 43},      {0, 0, 85},
+    {0, 0, 128},     {0, 0, 170},     {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {73, 73, 73},    {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189},
+    {80, 183, 189},  {128, 128, 0}};
 
 const std::vector<std::vector<unsigned int>> MASK_COLORS = {
-	{ 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 },
-	{ 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 },
-	{ 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 },
-	{ 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 },
-	{ 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 },
-	{ 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 },
-	{ 255, 149, 200 }, { 255, 55, 199 }
-};
+    {255, 56, 56},  {255, 157, 151}, {255, 112, 31}, {255, 178, 29}, {207, 210, 49},  {72, 249, 10}, {146, 204, 23},
+    {61, 219, 134}, {26, 147, 52},   {0, 212, 187},  {44, 153, 168}, {0, 194, 255},   {52, 69, 147}, {100, 115, 255},
+    {0, 24, 236},   {132, 56, 255},  {82, 0, 133},   {203, 56, 255}, {255, 149, 200}, {255, 55, 199}};
 
 int main(int argc, char** argv)
 {
-	// cuda:0
-	cudaSetDevice(0);
+    // cuda:0
+    cudaSetDevice(0);
 
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
 
-	std::vector<std::string> imagePathList;
-	bool isVideo{ false };
+    std::vector<std::string> imagePathList;
+    bool                     isVideo{false};
 
-	assert(argc == 3);
+    assert(argc == 3);
 
-	auto yolov8 = new YOLOv8_seg(engine_file_path);
-	yolov8->make_pipe(true);
+    auto yolov8 = new YOLOv8_seg(engine_file_path);
+    yolov8->make_pipe(true);
 
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (
-			suffix == "jpg" ||
-				suffix == "jpeg" ||
-				suffix == "png"
-			)
-		{
-			imagePathList.push_back(path);
-		}
-		else if (
-			suffix == "mp4" ||
-				suffix == "avi" ||
-				suffix == "m4v" ||
-				suffix == "mpeg" ||
-				suffix == "mov" ||
-				suffix == "mkv"
-			)
-		{
-			isVideo = true;
-		}
-		else
-		{
-			printf("suffix %s is wrong !!!\n", suffix.c_str());
-			std::abort();
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    }
+    else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
 
-	cv::Mat res, image;
-	cv::Size size = cv::Size{ 640, 640 };
-	int topk = 100;
-	int seg_h = 160;
-	int seg_w = 160;
-	int seg_channels = 32;
-	float score_thres = 0.25f;
-	float iou_thres = 0.65f;
+    cv::Mat  res, image;
+    cv::Size size         = cv::Size{640, 640};
+    int      topk         = 100;
+    int      seg_h        = 160;
+    int      seg_w        = 160;
+    int      seg_channels = 32;
+    float    score_thres  = 0.25f;
+    float    iou_thres    = 0.65f;
 
-	std::vector<Object> objs;
+    std::vector<Object> objs;
 
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
 
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
+    if (isVideo) {
+        cv::VideoCapture cap(path);
 
-		if (!cap.isOpened())
-		{
-			printf("can not open %s\n", path.c_str());
-			return -1;
-		}
-		while (cap.read(image))
-		{
-			objs.clear();
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			if (cv::waitKey(10) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto& path : imagePathList)
-		{
-			objs.clear();
-			image = cv::imread(path);
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    }
+    else {
+        for (auto& path : imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8;
+    return 0;
 }
diff --git a/csrc/segment/simple/include/common.hpp b/csrc/segment/simple/include/common.hpp
index 6dc26d9..8f34344 100644
--- a/csrc/segment/simple/include/common.hpp
+++ b/csrc/segment/simple/include/common.hpp
@@ -4,154 +4,139 @@
 
 #ifndef SEGMENT_SIMPLE_COMMON_HPP
 #define SEGMENT_SIMPLE_COMMON_HPP
+#include "NvInfer.h"
 #include "opencv2/opencv.hpp"
 #include <sys/stat.h>
 #include <unistd.h>
-#include "NvInfer.h"
 
-#define CHECK(call)                                   \
-do                                                    \
-{                                                     \
-    const cudaError_t error_code = call;              \
-    if (error_code != cudaSuccess)                    \
-    {                                                 \
-        printf("CUDA Error:\n");                      \
-        printf("    File:       %s\n", __FILE__);     \
-        printf("    Line:       %d\n", __LINE__);     \
-        printf("    Error code: %d\n", error_code);   \
-        printf("    Error text: %s\n",                \
-            cudaGetErrorString(error_code));          \
-        exit(1);                                      \
-    }                                                 \
-} while (0)
+#define CHECK(call)                                                                                                    \
+    do {                                                                                                               \
+        const cudaError_t error_code = call;                                                                           \
+        if (error_code != cudaSuccess) {                                                                               \
+            printf("CUDA Error:\n");                                                                                   \
+            printf("    File:       %s\n", __FILE__);                                                                  \
+            printf("    Line:       %d\n", __LINE__);                                                                  \
+            printf("    Error code: %d\n", error_code);                                                                \
+            printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \
+            exit(1);                                                                                                   \
+        }                                                                                                              \
+    } while (0)
 
-class Logger : public nvinfer1::ILogger
-{
+class Logger: public nvinfer1::ILogger {
 public:
-	nvinfer1::ILogger::Severity reportableSeverity;
+    nvinfer1::ILogger::Severity reportableSeverity;
 
-	explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO) :
-		reportableSeverity(severity)
-	{
-	}
+    explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):
+        reportableSeverity(severity)
+    {
+    }
 
-	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
-	{
-		if (severity > reportableSeverity)
-		{
-			return;
-		}
-		switch (severity)
-		{
-		case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-			std::cerr << "INTERNAL_ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kERROR:
-			std::cerr << "ERROR: ";
-			break;
-		case nvinfer1::ILogger::Severity::kWARNING:
-			std::cerr << "WARNING: ";
-			break;
-		case nvinfer1::ILogger::Severity::kINFO:
-			std::cerr << "INFO: ";
-			break;
-		default:
-			std::cerr << "VERBOSE: ";
-			break;
-		}
-		std::cerr << msg << std::endl;
-	}
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
+    {
+        if (severity > reportableSeverity) {
+            return;
+        }
+        switch (severity) {
+            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+                std::cerr << "INTERNAL_ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kERROR:
+                std::cerr << "ERROR: ";
+                break;
+            case nvinfer1::ILogger::Severity::kWARNING:
+                std::cerr << "WARNING: ";
+                break;
+            case nvinfer1::ILogger::Severity::kINFO:
+                std::cerr << "INFO: ";
+                break;
+            default:
+                std::cerr << "VERBOSE: ";
+                break;
+        }
+        std::cerr << msg << std::endl;
+    }
 };
 
 inline int get_size_by_dims(const nvinfer1::Dims& dims)
 {
-	int size = 1;
-	for (int i = 0; i < dims.nbDims; i++)
-	{
-		size *= dims.d[i];
-	}
-	return size;
+    int size = 1;
+    for (int i = 0; i < dims.nbDims; i++) {
+        size *= dims.d[i];
+    }
+    return size;
 }
 
 inline int type_to_size(const nvinfer1::DataType& dataType)
 {
-	switch (dataType)
-	{
-	case nvinfer1::DataType::kFLOAT:
-		return 4;
-	case nvinfer1::DataType::kHALF:
-		return 2;
-	case nvinfer1::DataType::kINT32:
-		return 4;
-	case nvinfer1::DataType::kINT8:
-		return 1;
-	case nvinfer1::DataType::kBOOL:
-		return 1;
-	default:
-		return 4;
-	}
+    switch (dataType) {
+        case nvinfer1::DataType::kFLOAT:
+            return 4;
+        case nvinfer1::DataType::kHALF:
+            return 2;
+        case nvinfer1::DataType::kINT32:
+            return 4;
+        case nvinfer1::DataType::kINT8:
+            return 1;
+        case nvinfer1::DataType::kBOOL:
+            return 1;
+        default:
+            return 4;
+    }
 }
 
 inline static float clamp(float val, float min, float max)
 {
-	return val > min ? (val < max ? val : max) : min;
+    return val > min ? (val < max ? val : max) : min;
 }
 
 inline bool IsPathExist(const std::string& path)
 {
-	if (access(path.c_str(), 0) == F_OK)
-	{
-		return true;
-	}
-	return false;
+    if (access(path.c_str(), 0) == F_OK) {
+        return true;
+    }
+    return false;
 }
 
 inline bool IsFile(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
 }
 
 inline bool IsFolder(const std::string& path)
 {
-	if (!IsPathExist(path))
-	{
-		return false;
-	}
-	struct stat buffer;
-	return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
+    if (!IsPathExist(path)) {
+        return false;
+    }
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
 }
 
-namespace seg
-{
-	struct Binding
-	{
-		size_t size = 1;
-		size_t dsize = 1;
-		nvinfer1::Dims dims;
-		std::string name;
-	};
+namespace seg {
+struct Binding {
+    size_t         size  = 1;
+    size_t         dsize = 1;
+    nvinfer1::Dims dims;
+    std::string    name;
+};
 
-	struct Object
-	{
-		cv::Rect_<float> rect;
-		int label = 0;
-		float prob = 0.0;
-		cv::Mat boxMask;
-	};
+struct Object {
+    cv::Rect_<float> rect;
+    int              label = 0;
+    float            prob  = 0.0;
+    cv::Mat          boxMask;
+};
 
-	struct PreParam
-	{
-		float ratio = 1.0f;
-		float dw = 0.0f;
-		float dh = 0.0f;
-		float height = 0;
-		float width = 0;
-	};
-}
-#endif //SEGMENT_SIMPLE_COMMON_HPP
+struct PreParam {
+    float ratio  = 1.0f;
+    float dw     = 0.0f;
+    float dh     = 0.0f;
+    float height = 0;
+    float width  = 0;
+};
+}  // namespace seg
+#endif  // SEGMENT_SIMPLE_COMMON_HPP
diff --git a/csrc/segment/simple/include/yolov8-seg.hpp b/csrc/segment/simple/include/yolov8-seg.hpp
index b8640c9..d753088 100644
--- a/csrc/segment/simple/include/yolov8-seg.hpp
+++ b/csrc/segment/simple/include/yolov8-seg.hpp
@@ -3,549 +3,356 @@
 //
 #ifndef SEGMENT_SIMPLE_YOLOV8_SEG_HPP
 #define SEGMENT_SIMPLE_YOLOV8_SEG_HPP
-#include <fstream>
-#include "common.hpp"
 #include "NvInferPlugin.h"
+#include "common.hpp"
+#include <fstream>
 
 using namespace seg;
 
-class YOLOv8_seg
-{
+class YOLOv8_seg {
 public:
-	explicit YOLOv8_seg(const std::string& engine_file_path);
-	~YOLOv8_seg();
-
-	void make_pipe(bool warmup = true);
-	void copy_from_Mat(const cv::Mat& image);
-	void copy_from_Mat(const cv::Mat& image, cv::Size& size);
-	void letterbox(
-		const cv::Mat& image,
-		cv::Mat& out,
-		cv::Size& size
-	);
-	void infer();
-	void postprocess(
-		std::vector<Object>& objs,
-		float score_thres = 0.25f,
-		float iou_thres = 0.65f,
-		int topk = 100,
-		int seg_channels = 32,
-		int seg_h = 160,
-		int seg_w = 160
-	);
-	static void draw_objects(
-		const cv::Mat& image,
-		cv::Mat& res,
-		const std::vector<Object>& objs,
-		const std::vector<std::string>& CLASS_NAMES,
-		const std::vector<std::vector<unsigned int>>& COLORS,
-		const std::vector<std::vector<unsigned int>>& MASK_COLORS
-	);
-	int num_bindings;
-	int num_inputs = 0;
-	int num_outputs = 0;
-	std::vector<Binding> input_bindings;
-	std::vector<Binding> output_bindings;
-	std::vector<void*> host_ptrs;
-	std::vector<void*> device_ptrs;
-
-	PreParam pparam;
-private:
-	nvinfer1::ICudaEngine* engine = nullptr;
-	nvinfer1::IRuntime* runtime = nullptr;
-	nvinfer1::IExecutionContext* context = nullptr;
-	cudaStream_t stream = nullptr;
-	Logger gLogger{ nvinfer1::ILogger::Severity::kERROR };
+    explicit YOLOv8_seg(const std::string& engine_file_path);
+    ~YOLOv8_seg();
+
+    void                 make_pipe(bool warmup = true);
+    void                 copy_from_Mat(const cv::Mat& image);
+    void                 copy_from_Mat(const cv::Mat& image, cv::Size& size);
+    void                 letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);
+    void                 infer();
+    void                 postprocess(std::vector<Object>& objs,
+                                     float                score_thres  = 0.25f,
+                                     float                iou_thres    = 0.65f,
+                                     int                  topk         = 100,
+                                     int                  seg_channels = 32,
+                                     int                  seg_h        = 160,
+                                     int                  seg_w        = 160);
+    static void          draw_objects(const cv::Mat&                                image,
+                                      cv::Mat&                                      res,
+                                      const std::vector<Object>&                    objs,
+                                      const std::vector<std::string>&               CLASS_NAMES,
+                                      const std::vector<std::vector<unsigned int>>& COLORS,
+                                      const std::vector<std::vector<unsigned int>>& MASK_COLORS);
+    int                  num_bindings;
+    int                  num_inputs  = 0;
+    int                  num_outputs = 0;
+    std::vector<Binding> input_bindings;
+    std::vector<Binding> output_bindings;
+    std::vector<void*>   host_ptrs;
+    std::vector<void*>   device_ptrs;
+
+    PreParam pparam;
 
+private:
+    nvinfer1::ICudaEngine*       engine  = nullptr;
+    nvinfer1::IRuntime*          runtime = nullptr;
+    nvinfer1::IExecutionContext* context = nullptr;
+    cudaStream_t                 stream  = nullptr;
+    Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
 };
 
 YOLOv8_seg::YOLOv8_seg(const std::string& engine_file_path)
 {
-	std::ifstream file(engine_file_path, std::ios::binary);
-	assert(file.good());
-	file.seekg(0, std::ios::end);
-	auto size = file.tellg();
-	file.seekg(0, std::ios::beg);
-	char* trtModelStream = new char[size];
-	assert(trtModelStream);
-	file.read(trtModelStream, size);
-	file.close();
-	initLibNvInferPlugins(&this->gLogger, "");
-	this->runtime = nvinfer1::createInferRuntime(this->gLogger);
-	assert(this->runtime != nullptr);
-
-	this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
-	assert(this->engine != nullptr);
-	delete[] trtModelStream;
-	this->context = this->engine->createExecutionContext();
-
-	assert(this->context != nullptr);
-	cudaStreamCreate(&this->stream);
-	this->num_bindings = this->engine->getNbBindings();
-
-	for (int i = 0; i < this->num_bindings; ++i)
-	{
-		Binding binding;
-		nvinfer1::Dims dims;
-		nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
-		std::string name = this->engine->getBindingName(i);
-		binding.name = name;
-		binding.dsize = type_to_size(dtype);
-
-		bool IsInput = engine->bindingIsInput(i);
-		if (IsInput)
-		{
-			this->num_inputs += 1;
-			dims = this->engine->getProfileDimensions(
-				i,
-				0,
-				nvinfer1::OptProfileSelector::kMAX);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->input_bindings.push_back(binding);
-			// set max opt shape
-			this->context->setBindingDimensions(i, dims);
-
-		}
-		else
-		{
-			dims = this->context->getBindingDimensions(i);
-			binding.size = get_size_by_dims(dims);
-			binding.dims = dims;
-			this->output_bindings.push_back(binding);
-			this->num_outputs += 1;
-		}
-	}
-
+    std::ifstream file(engine_file_path, std::ios::binary);
+    assert(file.good());
+    file.seekg(0, std::ios::end);
+    auto size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    char* trtModelStream = new char[size];
+    assert(trtModelStream);
+    file.read(trtModelStream, size);
+    file.close();
+    initLibNvInferPlugins(&this->gLogger, "");
+    this->runtime = nvinfer1::createInferRuntime(this->gLogger);
+    assert(this->runtime != nullptr);
+
+    this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(this->engine != nullptr);
+    delete[] trtModelStream;
+    this->context = this->engine->createExecutionContext();
+
+    assert(this->context != nullptr);
+    cudaStreamCreate(&this->stream);
+    this->num_bindings = this->engine->getNbBindings();
+
+    for (int i = 0; i < this->num_bindings; ++i) {
+        Binding            binding;
+        nvinfer1::Dims     dims;
+        nvinfer1::DataType dtype = this->engine->getBindingDataType(i);
+        std::string        name  = this->engine->getBindingName(i);
+        binding.name             = name;
+        binding.dsize            = type_to_size(dtype);
+
+        bool IsInput = engine->bindingIsInput(i);
+        if (IsInput) {
+            this->num_inputs += 1;
+            dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->input_bindings.push_back(binding);
+            // set max opt shape
+            this->context->setBindingDimensions(i, dims);
+        }
+        else {
+            dims         = this->context->getBindingDimensions(i);
+            binding.size = get_size_by_dims(dims);
+            binding.dims = dims;
+            this->output_bindings.push_back(binding);
+            this->num_outputs += 1;
+        }
+    }
 }
 
 YOLOv8_seg::~YOLOv8_seg()
 {
-	this->context->destroy();
-	this->engine->destroy();
-	this->runtime->destroy();
-	cudaStreamDestroy(this->stream);
-	for (auto& ptr : this->device_ptrs)
-	{
-		CHECK(cudaFree(ptr));
-	}
-
-	for (auto& ptr : this->host_ptrs)
-	{
-		CHECK(cudaFreeHost(ptr));
-	}
+    this->context->destroy();
+    this->engine->destroy();
+    this->runtime->destroy();
+    cudaStreamDestroy(this->stream);
+    for (auto& ptr : this->device_ptrs) {
+        CHECK(cudaFree(ptr));
+    }
+
+    for (auto& ptr : this->host_ptrs) {
+        CHECK(cudaFreeHost(ptr));
+    }
 }
 
 void YOLOv8_seg::make_pipe(bool warmup)
 {
 
-	for (auto& bindings : this->input_bindings)
-	{
-		void* d_ptr;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			bindings.size * bindings.dsize,
-			this->stream)
-		);
-		this->device_ptrs.push_back(d_ptr);
-	}
-
-	for (auto& bindings : this->output_bindings)
-	{
-		void* d_ptr, * h_ptr;
-		size_t size = bindings.size * bindings.dsize;
-		CHECK(cudaMallocAsync(
-			&d_ptr,
-			size,
-			this->stream)
-		);
-		CHECK(cudaHostAlloc(
-			&h_ptr,
-			size,
-			0)
-		);
-		this->device_ptrs.push_back(d_ptr);
-		this->host_ptrs.push_back(h_ptr);
-	}
-
-	if (warmup)
-	{
-		for (int i = 0; i < 10; i++)
-		{
-			for (auto& bindings : this->input_bindings)
-			{
-				size_t size = bindings.size * bindings.dsize;
-				void* h_ptr = malloc(size);
-				memset(h_ptr, 0, size);
-				CHECK(cudaMemcpyAsync(
-					this->device_ptrs[0],
-					h_ptr,
-					size,
-					cudaMemcpyHostToDevice,
-					this->stream)
-				);
-				free(h_ptr);
-			}
-			this->infer();
-		}
-		printf("model warmup 10 times\n");
-
-	}
+    for (auto& bindings : this->input_bindings) {
+        void* d_ptr;
+        CHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
+        this->device_ptrs.push_back(d_ptr);
+    }
+
+    for (auto& bindings : this->output_bindings) {
+        void * d_ptr, *h_ptr;
+        size_t size = bindings.size * bindings.dsize;
+        CHECK(cudaMallocAsync(&d_ptr, size, this->stream));
+        CHECK(cudaHostAlloc(&h_ptr, size, 0));
+        this->device_ptrs.push_back(d_ptr);
+        this->host_ptrs.push_back(h_ptr);
+    }
+
+    if (warmup) {
+        for (int i = 0; i < 10; i++) {
+            for (auto& bindings : this->input_bindings) {
+                size_t size  = bindings.size * bindings.dsize;
+                void*  h_ptr = malloc(size);
+                memset(h_ptr, 0, size);
+                CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));
+                free(h_ptr);
+            }
+            this->infer();
+        }
+        printf("model warmup 10 times\n");
+    }
 }
 
-void YOLOv8_seg::letterbox(
-	const cv::Mat& image,
-	cv::Mat& out,
-	cv::Size& size
-)
+void YOLOv8_seg::letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
 {
-	const float inp_h = size.height;
-	const float inp_w = size.width;
-	float height = image.rows;
-	float width = image.cols;
-
-	float r = std::min(inp_h / height, inp_w / width);
-	int padw = std::round(width * r);
-	int padh = std::round(height * r);
-
-	cv::Mat tmp;
-	if ((int)width != padw || (int)height != padh)
-	{
-		cv::resize(
-			image,
-			tmp,
-			cv::Size(padw, padh)
-		);
-	}
-	else
-	{
-		tmp = image.clone();
-	}
-
-	float dw = inp_w - padw;
-	float dh = inp_h - padh;
-
-	dw /= 2.0f;
-	dh /= 2.0f;
-	int top = int(std::round(dh - 0.1f));
-	int bottom = int(std::round(dh + 0.1f));
-	int left = int(std::round(dw - 0.1f));
-	int right = int(std::round(dw + 0.1f));
-
-	cv::copyMakeBorder(
-		tmp,
-		tmp,
-		top,
-		bottom,
-		left,
-		right,
-		cv::BORDER_CONSTANT,
-		{ 114, 114, 114 }
-	);
-
-	cv::dnn::blobFromImage(tmp,
-		out,
-		1 / 255.f,
-		cv::Size(),
-		cv::Scalar(0, 0, 0),
-		true,
-		false,
-		CV_32F
-	);
-	this->pparam.ratio = 1 / r;
-	this->pparam.dw = dw;
-	this->pparam.dh = dh;
-	this->pparam.height = height;
-	this->pparam.width = width;;
+    const float inp_h  = size.height;
+    const float inp_w  = size.width;
+    float       height = image.rows;
+    float       width  = image.cols;
+
+    float r    = std::min(inp_h / height, inp_w / width);
+    int   padw = std::round(width * r);
+    int   padh = std::round(height * r);
+
+    cv::Mat tmp;
+    if ((int)width != padw || (int)height != padh) {
+        cv::resize(image, tmp, cv::Size(padw, padh));
+    }
+    else {
+        tmp = image.clone();
+    }
+
+    float dw = inp_w - padw;
+    float dh = inp_h - padh;
+
+    dw /= 2.0f;
+    dh /= 2.0f;
+    int top    = int(std::round(dh - 0.1f));
+    int bottom = int(std::round(dh + 0.1f));
+    int left   = int(std::round(dw - 0.1f));
+    int right  = int(std::round(dw + 0.1f));
+
+    cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});
+
+    cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);
+    this->pparam.ratio  = 1 / r;
+    this->pparam.dw     = dw;
+    this->pparam.dh     = dh;
+    this->pparam.height = height;
+    this->pparam.width  = width;
+    ;
 }
 
 void YOLOv8_seg::copy_from_Mat(const cv::Mat& image)
 {
-	cv::Mat nchw;
-	auto& in_binding = this->input_bindings[0];
-	auto width = in_binding.dims.d[3];
-	auto height = in_binding.dims.d[2];
-	cv::Size size{ width, height };
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{
-				4,
-				{ 1, 3, height, width }
-			}
-	);
-
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat  nchw;
+    auto&    in_binding = this->input_bindings[0];
+    auto     width      = in_binding.dims.d[3];
+    auto     height     = in_binding.dims.d[2];
+    cv::Size size{width, height};
+    this->letterbox(image, nchw, size);
+
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});
+
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8_seg::copy_from_Mat(const cv::Mat& image, cv::Size& size)
 {
-	cv::Mat nchw;
-	this->letterbox(
-		image,
-		nchw,
-		size
-	);
-	this->context->setBindingDimensions(
-		0,
-		nvinfer1::Dims
-			{ 4,
-			  { 1, 3, size.height, size.width }
-			}
-	);
-	CHECK(cudaMemcpyAsync(
-		this->device_ptrs[0],
-		nchw.ptr<float>(),
-		nchw.total() * nchw.elemSize(),
-		cudaMemcpyHostToDevice,
-		this->stream)
-	);
+    cv::Mat nchw;
+    this->letterbox(image, nchw, size);
+    this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});
+    CHECK(cudaMemcpyAsync(
+        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
 }
 
 void YOLOv8_seg::infer()
 {
 
-	this->context->enqueueV2(
-		this->device_ptrs.data(),
-		this->stream,
-		nullptr
-	);
-	for (int i = 0; i < this->num_outputs; i++)
-	{
-		size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
-		CHECK(cudaMemcpyAsync(this->host_ptrs[i],
-			this->device_ptrs[i + this->num_inputs],
-			osize,
-			cudaMemcpyDeviceToHost,
-			this->stream)
-		);
-
-	}
-	cudaStreamSynchronize(this->stream);
-
+    this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);
+    for (int i = 0; i < this->num_outputs; i++) {
+        size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;
+        CHECK(cudaMemcpyAsync(
+            this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));
+    }
+    cudaStreamSynchronize(this->stream);
 }
 
-void YOLOv8_seg::postprocess(std::vector<Object>& objs,
-	float score_thres,
-	float iou_thres,
-	int topk,
-	int seg_channels,
-	int seg_h,
-	int seg_w
-)
+void YOLOv8_seg::postprocess(
+    std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int seg_channels, int seg_h, int seg_w)
 {
-	objs.clear();
-	auto input_h = this->input_bindings[0].dims.d[2];
-	auto input_w = this->input_bindings[0].dims.d[3];
-	auto num_anchors = this->output_bindings[0].dims.d[1];
-	auto num_channels = this->output_bindings[0].dims.d[2];
-
-	auto& dw = this->pparam.dw;
-	auto& dh = this->pparam.dh;
-	auto& width = this->pparam.width;
-	auto& height = this->pparam.height;
-	auto& ratio = this->pparam.ratio;
-
-	auto* output = static_cast<float*>(this->host_ptrs[0]);
-	cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F,
-		static_cast<float*>(this->host_ptrs[1]));
-
-	std::vector<int> labels;
-	std::vector<float> scores;
-	std::vector<cv::Rect> bboxes;
-	std::vector<cv::Mat> mask_confs;
-	std::vector<int> indices;
-
-	for (int i = 0; i < num_anchors; i++)
-	{
-		float* ptr = output + i * num_channels;
-		float score = *(ptr + 4);
-		if (score > score_thres)
-		{
-			float x0 = *ptr++ - dw;
-			float y0 = *ptr++ - dh;
-			float x1 = *ptr++ - dw;
-			float y1 = *ptr++ - dh;
-
-			x0 = clamp(x0 * ratio, 0.f, width);
-			y0 = clamp(y0 * ratio, 0.f, height);
-			x1 = clamp(x1 * ratio, 0.f, width);
-			y1 = clamp(y1 * ratio, 0.f, height);
-
-			int label = *(++ptr);
-			cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
-			mask_confs.push_back(mask_conf);
-			labels.push_back(label);
-			scores.push_back(score);
-			bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
-
-		}
-	}
+    objs.clear();
+    auto input_h      = this->input_bindings[0].dims.d[2];
+    auto input_w      = this->input_bindings[0].dims.d[3];
+    auto num_anchors  = this->output_bindings[0].dims.d[1];
+    auto num_channels = this->output_bindings[0].dims.d[2];
+
+    auto& dw     = this->pparam.dw;
+    auto& dh     = this->pparam.dh;
+    auto& width  = this->pparam.width;
+    auto& height = this->pparam.height;
+    auto& ratio  = this->pparam.ratio;
+
+    auto*   output = static_cast<float*>(this->host_ptrs[0]);
+    cv::Mat protos = cv::Mat(seg_channels, seg_h * seg_w, CV_32F, static_cast<float*>(this->host_ptrs[1]));
+
+    std::vector<int>      labels;
+    std::vector<float>    scores;
+    std::vector<cv::Rect> bboxes;
+    std::vector<cv::Mat>  mask_confs;
+    std::vector<int>      indices;
+
+    for (int i = 0; i < num_anchors; i++) {
+        float* ptr   = output + i * num_channels;
+        float  score = *(ptr + 4);
+        if (score > score_thres) {
+            float x0 = *ptr++ - dw;
+            float y0 = *ptr++ - dh;
+            float x1 = *ptr++ - dw;
+            float y1 = *ptr++ - dh;
+
+            x0 = clamp(x0 * ratio, 0.f, width);
+            y0 = clamp(y0 * ratio, 0.f, height);
+            x1 = clamp(x1 * ratio, 0.f, width);
+            y1 = clamp(y1 * ratio, 0.f, height);
+
+            int     label     = *(++ptr);
+            cv::Mat mask_conf = cv::Mat(1, seg_channels, CV_32F, ++ptr);
+            mask_confs.push_back(mask_conf);
+            labels.push_back(label);
+            scores.push_back(score);
+            bboxes.push_back(cv::Rect_<float>(x0, y0, x1 - x0, y1 - y0));
+        }
+    }
 
 #if defined(BATCHED_NMS)
-	cv::dnn::NMSBoxesBatched(
-		bboxes,
-		scores,
-		labels,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
 #else
-	cv::dnn::NMSBoxes(
-		bboxes,
-		scores,
-		score_thres,
-		iou_thres,
-		indices
-	);
+    cv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
 #endif
 
-	cv::Mat masks;
-	int cnt = 0;
-	for (auto& i : indices)
-	{
-		if (cnt >= topk)
-		{
-			break;
-		}
-		cv::Rect tmp = bboxes[i];
-		Object obj;
-		obj.label = labels[i];
-		obj.rect = tmp;
-		obj.prob = scores[i];
-		masks.push_back(mask_confs[i]);
-		objs.push_back(obj);
-		cnt += 1;
-	}
-
-	if(masks.empty())
-	{
-		//masks is empty
-	}
-	else
-	{
-		cv::Mat matmulRes = (masks * protos).t();
-		cv::Mat maskMat = matmulRes.reshape(indices.size(), { seg_w, seg_h });
-
-		std::vector<cv::Mat> maskChannels;
-		cv::split(maskMat, maskChannels);
-		int scale_dw = dw / input_w * seg_w;
-		int scale_dh = dh / input_h * seg_h;
-
-		cv::Rect roi(
-			scale_dw,
-			scale_dh,
-			seg_w - 2 * scale_dw,
-			seg_h - 2 * scale_dh);
-
-		for (int i = 0; i < indices.size(); i++)
-		{
-			cv::Mat dest, mask;
-			cv::exp(-maskChannels[i], dest);
-			dest = 1.0 / (1.0 + dest);
-			dest = dest(roi);
-			cv::resize(
-				dest,
-				mask,
-				cv::Size((int)width, (int)height),
-				cv::INTER_LINEAR
-			);
-			objs[i].boxMask = mask(objs[i].rect) > 0.5f;
-		}
-	}
+    cv::Mat masks;
+    int     cnt = 0;
+    for (auto& i : indices) {
+        if (cnt >= topk) {
+            break;
+        }
+        cv::Rect tmp = bboxes[i];
+        Object   obj;
+        obj.label = labels[i];
+        obj.rect  = tmp;
+        obj.prob  = scores[i];
+        masks.push_back(mask_confs[i]);
+        objs.push_back(obj);
+        cnt += 1;
+    }
+
+    if (masks.empty()) {
+        // masks is empty
+    }
+    else {
+        cv::Mat matmulRes = (masks * protos).t();
+        cv::Mat maskMat   = matmulRes.reshape(indices.size(), {seg_w, seg_h});
+
+        std::vector<cv::Mat> maskChannels;
+        cv::split(maskMat, maskChannels);
+        int scale_dw = dw / input_w * seg_w;
+        int scale_dh = dh / input_h * seg_h;
+
+        cv::Rect roi(scale_dw, scale_dh, seg_w - 2 * scale_dw, seg_h - 2 * scale_dh);
+
+        for (int i = 0; i < indices.size(); i++) {
+            cv::Mat dest, mask;
+            cv::exp(-maskChannels[i], dest);
+            dest = 1.0 / (1.0 + dest);
+            dest = dest(roi);
+            cv::resize(dest, mask, cv::Size((int)width, (int)height), cv::INTER_LINEAR);
+            objs[i].boxMask = mask(objs[i].rect) > 0.5f;
+        }
+    }
 }
 
-void YOLOv8_seg::draw_objects(const cv::Mat& image,
-	cv::Mat& res,
-	const std::vector<Object>& objs,
-	const std::vector<std::string>& CLASS_NAMES,
-	const std::vector<std::vector<unsigned int>>& COLORS,
-	const std::vector<std::vector<unsigned int>>& MASK_COLORS
-)
+void YOLOv8_seg::draw_objects(const cv::Mat&                                image,
+                              cv::Mat&                                      res,
+                              const std::vector<Object>&                    objs,
+                              const std::vector<std::string>&               CLASS_NAMES,
+                              const std::vector<std::vector<unsigned int>>& COLORS,
+                              const std::vector<std::vector<unsigned int>>& MASK_COLORS)
 {
-	res = image.clone();
-	cv::Mat mask = image.clone();
-	for (auto& obj : objs)
-	{
-		int idx = obj.label;
-		cv::Scalar color = cv::Scalar(
-			COLORS[idx][0],
-			COLORS[idx][1],
-			COLORS[idx][2]
-		);
-		cv::Scalar mask_color = cv::Scalar(
-			MASK_COLORS[idx % 20][0],
-			MASK_COLORS[idx % 20][1],
-			MASK_COLORS[idx % 20][2]
-		);
-		cv::rectangle(
-			res,
-			obj.rect,
-			color,
-			2
-		);
-
-		char text[256];
-		sprintf(
-			text,
-			"%s %.1f%%",
-			CLASS_NAMES[idx].c_str(),
-			obj.prob * 100
-		);
-		mask(obj.rect).setTo(mask_color, obj.boxMask);
-
-		int baseLine = 0;
-		cv::Size label_size = cv::getTextSize(
-			text,
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			1,
-			&baseLine
-		);
-
-		int x = (int)obj.rect.x;
-		int y = (int)obj.rect.y + 1;
-
-		if (y > res.rows)
-			y = res.rows;
-
-		cv::rectangle(
-			res,
-			cv::Rect(x, y, label_size.width, label_size.height + baseLine),
-			{ 0, 0, 255 },
-			-1
-		);
-
-		cv::putText(
-			res,
-			text,
-			cv::Point(x, y + label_size.height),
-			cv::FONT_HERSHEY_SIMPLEX,
-			0.4,
-			{ 255, 255, 255 },
-			1
-		);
-	}
-	cv::addWeighted(
-		res,
-		0.5,
-		mask,
-		0.8,
-		1,
-		res
-	);
+    res          = image.clone();
+    cv::Mat mask = image.clone();
+    for (auto& obj : objs) {
+        int        idx   = obj.label;
+        cv::Scalar color = cv::Scalar(COLORS[idx][0], COLORS[idx][1], COLORS[idx][2]);
+        cv::Scalar mask_color =
+            cv::Scalar(MASK_COLORS[idx % 20][0], MASK_COLORS[idx % 20][1], MASK_COLORS[idx % 20][2]);
+        cv::rectangle(res, obj.rect, color, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", CLASS_NAMES[idx].c_str(), obj.prob * 100);
+        mask(obj.rect).setTo(mask_color, obj.boxMask);
+
+        int      baseLine   = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        int x = (int)obj.rect.x;
+        int y = (int)obj.rect.y + 1;
+
+        if (y > res.rows)
+            y = res.rows;
+
+        cv::rectangle(res, cv::Rect(x, y, label_size.width, label_size.height + baseLine), {0, 0, 255}, -1);
+
+        cv::putText(res, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, {255, 255, 255}, 1);
+    }
+    cv::addWeighted(res, 0.5, mask, 0.8, 1, res);
 }
-#endif //SEGMENT_SIMPLE_YOLOV8_SEG_HPP
+#endif  // SEGMENT_SIMPLE_YOLOV8_SEG_HPP
diff --git a/csrc/segment/simple/main.cpp b/csrc/segment/simple/main.cpp
index 2185dd1..32ce066 100644
--- a/csrc/segment/simple/main.cpp
+++ b/csrc/segment/simple/main.cpp
@@ -2,177 +2,131 @@
 // Created by ubuntu on 1/20/23.
 //
 #include "chrono"
-#include "yolov8-seg.hpp"
 #include "opencv2/opencv.hpp"
+#include "yolov8-seg.hpp"
 
 const std::vector<std::string> CLASS_NAMES = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus",
-	"train", "truck", "boat", "traffic light", "fire hydrant",
-	"stop sign", "parking meter", "bench", "bird", "cat",
-	"dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella",
-	"handbag", "tie", "suitcase", "frisbee", "skis",
-	"snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
-	"cup", "fork", "knife", "spoon", "bowl",
-	"banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake",
-	"chair", "couch", "potted plant", "bed", "dining table",
-	"toilet", "tv", "laptop", "mouse", "remote",
-	"keyboard", "cell phone", "microwave", "oven",
-	"toaster", "sink", "refrigerator", "book", "clock", "vase",
-	"scissors", "teddy bear", "hair drier", "toothbrush" };
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"};
 
 const std::vector<std::vector<unsigned int>> COLORS = {
-	{ 0, 114, 189 }, { 217, 83, 25 }, { 237, 177, 32 },
-	{ 126, 47, 142 }, { 119, 172, 48 }, { 77, 190, 238 },
-	{ 162, 20, 47 }, { 76, 76, 76 }, { 153, 153, 153 },
-	{ 255, 0, 0 }, { 255, 128, 0 }, { 191, 191, 0 },
-	{ 0, 255, 0 }, { 0, 0, 255 }, { 170, 0, 255 },
-	{ 85, 85, 0 }, { 85, 170, 0 }, { 85, 255, 0 },
-	{ 170, 85, 0 }, { 170, 170, 0 }, { 170, 255, 0 },
-	{ 255, 85, 0 }, { 255, 170, 0 }, { 255, 255, 0 },
-	{ 0, 85, 128 }, { 0, 170, 128 }, { 0, 255, 128 },
-	{ 85, 0, 128 }, { 85, 85, 128 }, { 85, 170, 128 },
-	{ 85, 255, 128 }, { 170, 0, 128 }, { 170, 85, 128 },
-	{ 170, 170, 128 }, { 170, 255, 128 }, { 255, 0, 128 },
-	{ 255, 85, 128 }, { 255, 170, 128 }, { 255, 255, 128 },
-	{ 0, 85, 255 }, { 0, 170, 255 }, { 0, 255, 255 },
-	{ 85, 0, 255 }, { 85, 85, 255 }, { 85, 170, 255 },
-	{ 85, 255, 255 }, { 170, 0, 255 }, { 170, 85, 255 },
-	{ 170, 170, 255 }, { 170, 255, 255 }, { 255, 0, 255 },
-	{ 255, 85, 255 }, { 255, 170, 255 }, { 85, 0, 0 },
-	{ 128, 0, 0 }, { 170, 0, 0 }, { 212, 0, 0 },
-	{ 255, 0, 0 }, { 0, 43, 0 }, { 0, 85, 0 },
-	{ 0, 128, 0 }, { 0, 170, 0 }, { 0, 212, 0 },
-	{ 0, 255, 0 }, { 0, 0, 43 }, { 0, 0, 85 },
-	{ 0, 0, 128 }, { 0, 0, 170 }, { 0, 0, 212 },
-	{ 0, 0, 255 }, { 0, 0, 0 }, { 36, 36, 36 },
-	{ 73, 73, 73 }, { 109, 109, 109 }, { 146, 146, 146 },
-	{ 182, 182, 182 }, { 219, 219, 219 }, { 0, 114, 189 },
-	{ 80, 183, 189 }, { 128, 128, 0 }
-};
+    {0, 114, 189},   {217, 83, 25},   {237, 177, 32},  {126, 47, 142},  {119, 172, 48},  {77, 190, 238},
+    {162, 20, 47},   {76, 76, 76},    {153, 153, 153}, {255, 0, 0},     {255, 128, 0},   {191, 191, 0},
+    {0, 255, 0},     {0, 0, 255},     {170, 0, 255},   {85, 85, 0},     {85, 170, 0},    {85, 255, 0},
+    {170, 85, 0},    {170, 170, 0},   {170, 255, 0},   {255, 85, 0},    {255, 170, 0},   {255, 255, 0},
+    {0, 85, 128},    {0, 170, 128},   {0, 255, 128},   {85, 0, 128},    {85, 85, 128},   {85, 170, 128},
+    {85, 255, 128},  {170, 0, 128},   {170, 85, 128},  {170, 170, 128}, {170, 255, 128}, {255, 0, 128},
+    {255, 85, 128},  {255, 170, 128}, {255, 255, 128}, {0, 85, 255},    {0, 170, 255},   {0, 255, 255},
+    {85, 0, 255},    {85, 85, 255},   {85, 170, 255},  {85, 255, 255},  {170, 0, 255},   {170, 85, 255},
+    {170, 170, 255}, {170, 255, 255}, {255, 0, 255},   {255, 85, 255},  {255, 170, 255}, {85, 0, 0},
+    {128, 0, 0},     {170, 0, 0},     {212, 0, 0},     {255, 0, 0},     {0, 43, 0},      {0, 85, 0},
+    {0, 128, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},     {0, 0, 43},      {0, 0, 85},
+    {0, 0, 128},     {0, 0, 170},     {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {73, 73, 73},    {109, 109, 109}, {146, 146, 146}, {182, 182, 182}, {219, 219, 219}, {0, 114, 189},
+    {80, 183, 189},  {128, 128, 0}};
 
 const std::vector<std::vector<unsigned int>> MASK_COLORS = {
-	{ 255, 56, 56 }, { 255, 157, 151 }, { 255, 112, 31 },
-	{ 255, 178, 29 }, { 207, 210, 49 }, { 72, 249, 10 },
-	{ 146, 204, 23 }, { 61, 219, 134 }, { 26, 147, 52 },
-	{ 0, 212, 187 }, { 44, 153, 168 }, { 0, 194, 255 },
-	{ 52, 69, 147 }, { 100, 115, 255 }, { 0, 24, 236 },
-	{ 132, 56, 255 }, { 82, 0, 133 }, { 203, 56, 255 },
-	{ 255, 149, 200 }, { 255, 55, 199 }
-};
+    {255, 56, 56},  {255, 157, 151}, {255, 112, 31}, {255, 178, 29}, {207, 210, 49},  {72, 249, 10}, {146, 204, 23},
+    {61, 219, 134}, {26, 147, 52},   {0, 212, 187},  {44, 153, 168}, {0, 194, 255},   {52, 69, 147}, {100, 115, 255},
+    {0, 24, 236},   {132, 56, 255},  {82, 0, 133},   {203, 56, 255}, {255, 149, 200}, {255, 55, 199}};
 
 int main(int argc, char** argv)
 {
-	// cuda:0
-	cudaSetDevice(0);
+    // cuda:0
+    cudaSetDevice(0);
 
-	const std::string engine_file_path{ argv[1] };
-	const std::string path{ argv[2] };
+    const std::string engine_file_path{argv[1]};
+    const std::string path{argv[2]};
 
-	std::vector<std::string> imagePathList;
-	bool isVideo{ false };
+    std::vector<std::string> imagePathList;
+    bool                     isVideo{false};
 
-	assert(argc == 3);
+    assert(argc == 3);
 
-	auto yolov8 = new YOLOv8_seg(engine_file_path);
-	yolov8->make_pipe(true);
+    auto yolov8 = new YOLOv8_seg(engine_file_path);
+    yolov8->make_pipe(true);
 
-	if (IsFile(path))
-	{
-		std::string suffix = path.substr(path.find_last_of('.') + 1);
-		if (
-			suffix == "jpg" ||
-				suffix == "jpeg" ||
-				suffix == "png"
-			)
-		{
-			imagePathList.push_back(path);
-		}
-		else if (
-			suffix == "mp4" ||
-				suffix == "avi" ||
-				suffix == "m4v" ||
-				suffix == "mpeg" ||
-				suffix == "mov" ||
-				suffix == "mkv"
-			)
-		{
-			isVideo = true;
-		}
-		else
-		{
-			printf("suffix %s is wrong !!!\n", suffix.c_str());
-			std::abort();
-		}
-	}
-	else if (IsFolder(path))
-	{
-		cv::glob(path + "/*.jpg", imagePathList);
-	}
+    if (IsFile(path)) {
+        std::string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png") {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov"
+                 || suffix == "mkv") {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            std::abort();
+        }
+    }
+    else if (IsFolder(path)) {
+        cv::glob(path + "/*.jpg", imagePathList);
+    }
 
-	cv::Mat res, image;
-	cv::Size size = cv::Size{ 640, 640 };
-	int topk = 100;
-	int seg_h = 160;
-	int seg_w = 160;
-	int seg_channels = 32;
-	float score_thres = 0.25f;
-	float iou_thres = 0.65f;
+    cv::Mat  res, image;
+    cv::Size size         = cv::Size{640, 640};
+    int      topk         = 100;
+    int      seg_h        = 160;
+    int      seg_w        = 160;
+    int      seg_channels = 32;
+    float    score_thres  = 0.25f;
+    float    iou_thres    = 0.65f;
 
-	std::vector<Object> objs;
+    std::vector<Object> objs;
 
-	cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
+    cv::namedWindow("result", cv::WINDOW_AUTOSIZE);
 
-	if (isVideo)
-	{
-		cv::VideoCapture cap(path);
+    if (isVideo) {
+        cv::VideoCapture cap(path);
 
-		if (!cap.isOpened())
-		{
-			printf("can not open %s\n", path.c_str());
-			return -1;
-		}
-		while (cap.read(image))
-		{
-			objs.clear();
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			if (cv::waitKey(10) == 'q')
-			{
-				break;
-			}
-		}
-	}
-	else
-	{
-		for (auto& path : imagePathList)
-		{
-			objs.clear();
-			image = cv::imread(path);
-			yolov8->copy_from_Mat(image, size);
-			auto start = std::chrono::system_clock::now();
-			yolov8->infer();
-			auto end = std::chrono::system_clock::now();
-			yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
-			yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
-			auto tc = (double)
-				std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
-			printf("cost %2.4lf ms\n", tc);
-			cv::imshow("result", res);
-			cv::waitKey(0);
-		}
-	}
-	cv::destroyAllWindows();
-	delete yolov8;
-	return 0;
+        if (!cap.isOpened()) {
+            printf("can not open %s\n", path.c_str());
+            return -1;
+        }
+        while (cap.read(image)) {
+            objs.clear();
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            if (cv::waitKey(10) == 'q') {
+                break;
+            }
+        }
+    }
+    else {
+        for (auto& path : imagePathList) {
+            objs.clear();
+            image = cv::imread(path);
+            yolov8->copy_from_Mat(image, size);
+            auto start = std::chrono::system_clock::now();
+            yolov8->infer();
+            auto end = std::chrono::system_clock::now();
+            yolov8->postprocess(objs, score_thres, iou_thres, topk, seg_channels, seg_h, seg_w);
+            yolov8->draw_objects(image, res, objs, CLASS_NAMES, COLORS, MASK_COLORS);
+            auto tc = (double)std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.;
+            printf("cost %2.4lf ms\n", tc);
+            cv::imshow("result", res);
+            cv::waitKey(0);
+        }
+    }
+    cv::destroyAllWindows();
+    delete yolov8;
+    return 0;
 }