Support yolov8 original pose model.

1 year ago · 5b09fff3cd
parent 90c3aeba1d
commit 5b09fff3cd
9 changed files with 327 additions and 9 deletions
--- a/config.py
+++ b/config.py
@ -36,5 +36,20 @@ MASK_COLORS = np.array([(255, 56, 56), (255, 157, 151), (255, 112, 31),
                        (255, 149, 200), (255, 55, 199)],
                       dtype=np.float32) / 255.
 KPS_COLORS = [[0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
              [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
              [255, 128, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255],
              [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255]]
 SKELETON = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13],
            [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3],
            [2, 4], [3, 5], [4, 6], [5, 7]]
 LIMB_COLORS = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255],
               [255, 51, 255], [255, 51, 255], [255, 51, 255], [255, 128, 0],
               [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0],
               [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0],
               [0, 255, 0], [0, 255, 0]]
 # alpha for segment masks
 ALPHA = 0.5
--- a/infer-det-without-torch.py
+++ b/infer-det-without-torch.py
@ -38,6 +38,10 @@ def main(args: argparse.Namespace) -> None:
        data = Engine(tensor)
        bboxes, scores, labels = det_postprocess(data)
        if bboxes.size == 0:
            # if no bounding box
            print(f'{image}: no object!')
            continue
        bboxes -= dwdh
        bboxes /= ratio
--- a/infer-det.py
+++ b/infer-det.py
@ -37,6 +37,10 @@ def main(args: argparse.Namespace) -> None:
        data = Engine(tensor)
        bboxes, scores, labels = det_postprocess(data)
        if bboxes.numel() == 0:
            # if no bounding box
            print(f'{image}: no object!')
            continue
        bboxes -= dwdh
        bboxes /= ratio
--- a/infer-pose-without-torch.py
+++ b/infer-pose-without-torch.py
@ -0,0 +1,116 @@
 import argparse
 from pathlib import Path
 import cv2
 import numpy as np
 from config import COLORS, KPS_COLORS, LIMB_COLORS, SKELETON
 from models.utils import blob, letterbox, path_to_list, pose_postprocess
 def main(args: argparse.Namespace) -> None:
    if args.method == 'cudart':
        from models.cudart_api import TRTEngine
    elif args.method == 'pycuda':
        from models.pycuda_api import TRTEngine
    else:
        raise NotImplementedError
    Engine = TRTEngine(args.engine)
    H, W = Engine.inp_info[0].shape[-2:]
    images = path_to_list(args.imgs)
    save_path = Path(args.out_dir)
    if not args.show and not save_path.exists():
        save_path.mkdir(parents=True, exist_ok=True)
    for image in images:
        save_image = save_path / image.name
        bgr = cv2.imread(str(image))
        draw = bgr.copy()
        bgr, ratio, dwdh = letterbox(bgr, (W, H))
        dw, dh = int(dwdh[0]), int(dwdh[1])
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        tensor = blob(rgb, return_seg=False)
        dwdh = np.array(dwdh * 2, dtype=np.float32)
        tensor = np.ascontiguousarray(tensor)
        # inference
        data = Engine(tensor)
        bboxes, scores, kpts = pose_postprocess(data, args.conf_thres,
                                                args.iou_thres)
        if bboxes.size == 0:
            # if no bounding box
            print(f'{image}: no object!')
            continue
        bboxes -= dwdh
        bboxes /= ratio
        for (bbox, score, kpt) in zip(bboxes, scores, kpts):
            bbox = bbox.round().astype(np.int32).tolist()
            color = COLORS['person']
            cv2.rectangle(draw, bbox[:2], bbox[2:], color, 2)
            cv2.putText(draw,
                        f'person:{score:.3f}', (bbox[0], bbox[1] - 2),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.75, [225, 255, 255],
                        thickness=2)
            for i in range(19):
                if i < 17:
                    px, py, ps = kpt[i]
                    if ps > 0.5:
                        kcolor = KPS_COLORS[i]
                        px = round(float(px - dw) / ratio)
                        py = round(float(py - dh) / ratio)
                        cv2.circle(draw, (px, py), 5, kcolor, -1)
                xi, yi = SKELETON[i]
                pos1_s = kpt[xi - 1][2]
                pos2_s = kpt[yi - 1][2]
                if pos1_s > 0.5 and pos2_s > 0.5:
                    limb_color = LIMB_COLORS[i]
                    pos1_x = round(float(kpt[xi - 1][0] - dw) / ratio)
                    pos1_y = round(float(kpt[xi - 1][1] - dh) / ratio)
                    pos2_x = round(float(kpt[yi - 1][0] - dw) / ratio)
                    pos2_y = round(float(kpt[yi - 1][1] - dh) / ratio)
                    cv2.line(draw, (pos1_x, pos1_y), (pos2_x, pos2_y),
                             limb_color, 2)
        if args.show:
            cv2.imshow('result', draw)
            cv2.waitKey(0)
        else:
            cv2.imwrite(str(save_image), draw)
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--engine', type=str, help='Engine file')
    parser.add_argument('--imgs', type=str, help='Images file')
    parser.add_argument('--show',
                        action='store_true',
                        help='Show the detection results')
    parser.add_argument('--out-dir',
                        type=str,
                        default='./output',
                        help='Path to output file')
    parser.add_argument('--conf-thres',
                        type=float,
                        default=0.25,
                        help='Confidence threshold')
    parser.add_argument('--iou-thres',
                        type=float,
                        default=0.65,
                        help='Confidence threshold')
    parser.add_argument('--method',
                        type=str,
                        default='cudart',
                        help='CUDART pipeline')
    args = parser.parse_args()
    return args
 if __name__ == '__main__':
    args = parse_args()
    main(args)
--- a/infer-pose.py
+++ b/infer-pose.py
@ -0,0 +1,112 @@
 from models import TRTModule  # isort:skip
 import argparse
 from pathlib import Path
 import cv2
 import torch
 from config import COLORS, KPS_COLORS, LIMB_COLORS, SKELETON
 from models.torch_utils import pose_postprocess
 from models.utils import blob, letterbox, path_to_list
 def main(args: argparse.Namespace) -> None:
    device = torch.device(args.device)
    Engine = TRTModule(args.engine, device)
    H, W = Engine.inp_info[0].shape[-2:]
    images = path_to_list(args.imgs)
    save_path = Path(args.out_dir)
    if not args.show and not save_path.exists():
        save_path.mkdir(parents=True, exist_ok=True)
    for image in images:
        save_image = save_path / image.name
        bgr = cv2.imread(str(image))
        draw = bgr.copy()
        bgr, ratio, dwdh = letterbox(bgr, (W, H))
        dw, dh = int(dwdh[0]), int(dwdh[1])
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        tensor = blob(rgb, return_seg=False)
        dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
        tensor = torch.asarray(tensor, device=device)
        # inference
        data = Engine(tensor)
        bboxes, scores, kpts = pose_postprocess(data, args.conf_thres,
                                                args.iou_thres)
        if bboxes.numel() == 0:
            # if no bounding box
            print(f'{image}: no object!')
            continue
        bboxes -= dwdh
        bboxes /= ratio
        for (bbox, score, kpt) in zip(bboxes, scores, kpts):
            bbox = bbox.round().int().tolist()
            color = COLORS['person']
            cv2.rectangle(draw, bbox[:2], bbox[2:], color, 2)
            cv2.putText(draw,
                        f'person:{score:.3f}', (bbox[0], bbox[1] - 2),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.75, [225, 255, 255],
                        thickness=2)
            for i in range(19):
                if i < 17:
                    px, py, ps = kpt[i]
                    if ps > 0.5:
                        kcolor = KPS_COLORS[i]
                        px = round(float(px - dw) / ratio)
                        py = round(float(py - dh) / ratio)
                        cv2.circle(draw, (px, py), 5, kcolor, -1)
                xi, yi = SKELETON[i]
                pos1_s = kpt[xi - 1][2]
                pos2_s = kpt[yi - 1][2]
                if pos1_s > 0.5 and pos2_s > 0.5:
                    limb_color = LIMB_COLORS[i]
                    pos1_x = round(float(kpt[xi - 1][0] - dw) / ratio)
                    pos1_y = round(float(kpt[xi - 1][1] - dh) / ratio)
                    pos2_x = round(float(kpt[yi - 1][0] - dw) / ratio)
                    pos2_y = round(float(kpt[yi - 1][1] - dh) / ratio)
                    cv2.line(draw, (pos1_x, pos1_y), (pos2_x, pos2_y),
                             limb_color, 2)
        if args.show:
            cv2.imshow('result', draw)
            cv2.waitKey(0)
        else:
            cv2.imwrite(str(save_image), draw)
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument('--engine', type=str, help='Engine file')
    parser.add_argument('--imgs', type=str, help='Images file')
    parser.add_argument('--show',
                        action='store_true',
                        help='Show the detection results')
    parser.add_argument('--out-dir',
                        type=str,
                        default='./output',
                        help='Path to output file')
    parser.add_argument('--conf-thres',
                        type=float,
                        default=0.25,
                        help='Confidence threshold')
    parser.add_argument('--iou-thres',
                        type=float,
                        default=0.65,
                        help='Confidence threshold')
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='TensorRT infer device')
    args = parser.parse_args()
    return args
 if __name__ == '__main__':
    args = parse_args()
    main(args)
--- a/infer-seg-without-torch.py
+++ b/infer-seg-without-torch.py
@ -41,6 +41,10 @@ def main(args: argparse.Namespace) -> None:
        seg_img = seg_img[dh:H - dh, dw:W - dw, [2, 1, 0]]
        bboxes, scores, labels, masks = seg_postprocess(
            data, bgr.shape[:2], args.conf_thres, args.iou_thres)
        if bboxes.size == 0:
            # if no bounding box
            print(f'{image}: no object!')
            continue
        masks = masks[:, dh:H - dh, dw:W - dw, :]
        mask_colors = MASK_COLORS[labels % len(MASK_COLORS)]
        mask_colors = mask_colors.reshape(-1, 1, 1, 3) * ALPHA
--- a/infer-seg.py
+++ b/infer-seg.py
@ -42,10 +42,9 @@ def main(args: argparse.Namespace) -> None:
                                device=device)
        bboxes, scores, labels, masks = seg_postprocess(
            data, bgr.shape[:2], args.conf_thres, args.iou_thres)
-        if bboxes is None:
+        if bboxes.numel() == 0:
-            # if no bounding box or others save original image
+            # if no bounding box
-            if not args.show:
+            print(f'{image}: no object!')
                cv2.imwrite(str(save_image), draw)
            continue
        masks = masks[:, dh:H - dh, dw:W - dw, :]
        indices = (labels % len(MASK_COLORS)).long()
--- a/models/torch_utils.py
+++ b/models/torch_utils.py
@ -3,7 +3,7 @@ from typing import List, Tuple, Union
 import torch
 import torch.nn.functional as F
 from torch import Tensor
-from torchvision.ops import batched_nms
+from torchvision.ops import batched_nms, nms
 def seg_postprocess(
@ -14,12 +14,13 @@ def seg_postprocess(
        -> Tuple[Tensor, Tensor, Tensor, Tensor]:
    assert len(data) == 2
    h, w = shape[0] // 4, shape[1] // 4  # 4x downsampling
-    outputs, proto = (i[0] for i in data)
+    outputs, proto = data[0][0], data[1][0]
    bboxes, scores, labels, maskconf = outputs.split([4, 1, 1, 32], 1)
    scores, labels = scores.squeeze(), labels.squeeze()
    idx = scores > conf_thres
-    if idx.sum() == 0:  # no bounding boxes or seg were created
+    if not idx.any():  # no bounding boxes or seg were created
-        return None, None, None, None
+        return bboxes.new_zeros((0, 4)), scores.new_zeros(
            (0, )), labels.new_zeros((0, )), bboxes.new_zeros((0, 0, 0, 0))
    bboxes, scores, labels, maskconf = \
        bboxes[idx], scores[idx], labels[idx], maskconf[idx]
    idx = batched_nms(bboxes, scores, labels, iou_thres)
@ -35,10 +36,37 @@ def seg_postprocess(
    return bboxes, scores, labels, masks
 def pose_postprocess(
        data: Union[Tuple, Tensor],
        conf_thres: float = 0.25,
        iou_thres: float = 0.65) \
        -> Tuple[Tensor, Tensor, Tensor]:
    if isinstance(data, tuple):
        assert len(data) == 1
        data = data[0]
    outputs = torch.transpose(data[0], 0, 1).contiguous()
    bboxes, scores, kpts = outputs.split([4, 1, 51], 1)
    scores, kpts = scores.squeeze(), kpts.squeeze()
    idx = scores > conf_thres
    if not idx.any():  # no bounding boxes or seg were created
        return bboxes.new_zeros((0, 4)), scores.new_zeros(
            (0, )), bboxes.new_zeros((0, 0, 0))
    bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
    xycenter, wh = bboxes.chunk(2, -1)
    bboxes = torch.cat([xycenter - 0.5 * wh, xycenter + 0.5 * wh], -1)
    idx = nms(bboxes, scores, iou_thres)
    bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
    return bboxes, scores, kpts.reshape(idx.shape[0], -1, 3)
 def det_postprocess(data: Tuple[Tensor, Tensor, Tensor, Tensor]):
    assert len(data) == 4
-    num_dets, bboxes, scores, labels = (i[0] for i in data)
+    num_dets, bboxes, scores, labels = data[0][0], data[1][0], data[2][
        0], data[3][0]
    nums = num_dets.item()
    if nums == 0:
        return bboxes.new_zeros((0, 4)), scores.new_zeros(
            (0, )), labels.new_zeros((0, ))
    bboxes = bboxes[:nums]
    scores = scores[:nums]
    labels = labels[:nums]
--- a/models/utils.py
+++ b/models/utils.py
@ -45,6 +45,7 @@ def letterbox(im: ndarray,
 def blob(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]:
    seg = None
    if return_seg:
        seg = im.astype(np.float32) / 255
    im = im.transpose([2, 0, 1])
@ -88,6 +89,9 @@ def det_postprocess(data: Tuple[ndarray, ndarray, ndarray, ndarray]):
    assert len(data) == 4
    num_dets, bboxes, scores, labels = (i[0] for i in data)
    nums = num_dets.item()
    if nums == 0:
        return np.empty((0, 4), dtype=np.float32), np.empty(
            (0, ), dtype=np.float32), np.empty((0, ), dtype=np.int32)
    bboxes = bboxes[:nums]
    scores = scores[:nums]
    labels = labels[:nums]
@ -106,6 +110,12 @@ def seg_postprocess(
    bboxes, scores, labels, maskconf = np.split(outputs, [4, 5, 6], 1)
    scores, labels = scores.squeeze(), labels.squeeze()
    idx = scores > conf_thres
    if not idx.any():  # no bounding boxes or seg were created
        return np.empty((0, 4), dtype=np.float32), \
            np.empty((0,), dtype=np.float32), \
            np.empty((0,), dtype=np.int32), \
            np.empty((0, 0, 0, 0), dtype=np.int32)
    bboxes, scores, labels, maskconf = \
        bboxes[idx], scores[idx], labels[idx], maskconf[idx]
    cvbboxes = np.concatenate([bboxes[:, :2], bboxes[:, 2:] - bboxes[:, :2]],
@ -128,3 +138,29 @@ def seg_postprocess(
    masks = masks.transpose(2, 0, 1)
    masks = np.ascontiguousarray((masks > 0.5)[..., None], dtype=np.float32)
    return bboxes, scores, labels, masks
 def pose_postprocess(
        data: Union[Tuple, ndarray],
        conf_thres: float = 0.25,
        iou_thres: float = 0.65) \
        -> Tuple[ndarray, ndarray, ndarray]:
    if isinstance(data, tuple):
        assert len(data) == 1
        data = data[0]
    outputs = np.transpose(data[0], (1, 0))
    bboxes, scores, kpts = np.split(outputs, [4, 5], 1)
    scores, kpts = scores.squeeze(), kpts.squeeze()
    idx = scores > conf_thres
    if not idx.any():  # no bounding boxes or seg were created
        return np.empty((0, 4), dtype=np.float32), np.empty(
            (0, ), dtype=np.float32), np.empty((0, 0, 0), dtype=np.float32)
    bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx]
    xycenter, wh = np.split(bboxes, [
        2,
    ], -1)
    cvbboxes = np.concatenate([xycenter - 0.5 * wh, wh], -1)
    idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres)
    cvbboxes, scores, kpts = cvbboxes[idx], scores[idx], kpts[idx]
    cvbboxes[:, 2:] += cvbboxes[:, :2]
    return cvbboxes, scores, kpts.reshape(idx.shape[0], -1, 3)