diff --git a/demo/video_gpuaccel_demo.py b/demo/video_gpuaccel_demo.py new file mode 100644 index 000000000..e19eb98e8 --- /dev/null +++ b/demo/video_gpuaccel_demo.py @@ -0,0 +1,113 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse + +import cv2 +import mmcv +import numpy as np +import torch +from torchvision.transforms import functional as F + +from mmdet.apis import init_detector +from mmdet.datasets.pipelines import Compose + +try: + import ffmpegcv +except ImportError: + raise ImportError( + 'Please install ffmpegcv with:\n\n pip install ffmpegcv') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='MMDetection video demo with GPU acceleration') + parser.add_argument('video', help='Video file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--score-thr', type=float, default=0.3, help='Bbox score threshold') + parser.add_argument('--out', type=str, help='Output video file') + parser.add_argument('--show', action='store_true', help='Show video') + parser.add_argument( + '--nvdecode', action='store_true', help='Use NVIDIA decoder') + parser.add_argument( + '--wait-time', + type=float, + default=1, + help='The interval of show (s), 0 is block') + args = parser.parse_args() + return args + + +def prefetch_img_metas(cfg, ori_wh): + w, h = ori_wh + cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' + test_pipeline = Compose(cfg.data.test.pipeline) + data = {'img': np.zeros((h, w, 3), dtype=np.uint8)} + data = test_pipeline(data) + img_metas = data['img_metas'][0].data + return img_metas + + +def process_img(frame_resize, img_metas, device): + assert frame_resize.shape == img_metas['pad_shape'] + frame_cuda = torch.from_numpy(frame_resize).to(device).float() + frame_cuda = frame_cuda.permute(2, 0, 1) # HWC to CHW + mean = torch.from_numpy(img_metas['img_norm_cfg']['mean']).to(device) + std = torch.from_numpy(img_metas['img_norm_cfg']['std']).to(device) + frame_cuda = F.normalize(frame_cuda, mean=mean, std=std, inplace=True) + frame_cuda = frame_cuda[None, :, :, :] # NCHW + data = {'img': [frame_cuda], 'img_metas': [[img_metas]]} + return data + + +def main(): + args = parse_args() + assert args.out or args.show, \ + ('Please specify at least one operation (save/show the ' + 'video) with the argument "--out" or "--show"') + + model = init_detector(args.config, args.checkpoint, device=args.device) + + if args.nvdecode: + VideoCapture = ffmpegcv.VideoCaptureNV + else: + VideoCapture = ffmpegcv.VideoCapture + video_origin = VideoCapture(args.video) + img_metas = prefetch_img_metas(model.cfg, + (video_origin.width, video_origin.height)) + resize_wh = img_metas['pad_shape'][1::-1] + video_resize = VideoCapture( + args.video, + resize=resize_wh, + resize_keepratio=True, + resize_keepratioalign='topleft', + pix_fmt='rgb24') + video_writer = None + if args.out: + video_writer = ffmpegcv.VideoWriter(args.out, fps=video_origin.fps) + + with torch.no_grad(): + for frame_resize, frame_origin in zip( + mmcv.track_iter_progress(video_resize), video_origin): + data = process_img(frame_resize, img_metas, args.device) + result = model(return_loss=False, rescale=True, **data)[0] + frame_mask = model.show_result( + frame_origin, result, score_thr=args.score_thr) + if args.show: + cv2.namedWindow('video', 0) + mmcv.imshow(frame_mask, 'video', args.wait_time) + if args.out: + video_writer.write(frame_mask) + + if video_writer: + video_writer.release() + video_origin.release() + video_resize.release() + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() diff --git a/docs/en/1_exist_data_model.md b/docs/en/1_exist_data_model.md index 136a197d6..297f1ee83 100644 --- a/docs/en/1_exist_data_model.md +++ b/docs/en/1_exist_data_model.md @@ -162,6 +162,32 @@ python demo/video_demo.py demo/demo.mp4 \ --out result.mp4 ``` +#### Video demo with GPU acceleration + +This script performs inference on a video with GPU acceleration. + +```shell +python demo/video_gpuaccel_demo.py \ + ${VIDEO_FILE} \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + [--device ${GPU_ID}] \ + [--score-thr ${SCORE_THR}] \ + [--nvdecode] \ + [--out ${OUT_FILE}] \ + [--show] \ + [--wait-time ${WAIT_TIME}] +``` + +Examples: + +```shell +python demo/video_gpuaccel_demo.py demo/demo.mp4 \ + configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py \ + checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \ + --nvdecode --out result.mp4 +``` + ## Test existing models on standard datasets To evaluate a model's accuracy, one usually tests the model on some standard datasets. diff --git a/docs/zh_cn/1_exist_data_model.md b/docs/zh_cn/1_exist_data_model.md index 68b3d70ac..42f947130 100644 --- a/docs/zh_cn/1_exist_data_model.md +++ b/docs/zh_cn/1_exist_data_model.md @@ -160,6 +160,33 @@ asyncio.run(main()) --out result.mp4 ``` +#### 视频样例,显卡加速版本 + +这是在视频样例上进行推理的脚本,使用显卡加速。 + + ```shell + python demo/video_gpuaccel_demo.py \ + ${VIDEO_FILE} \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + [--device ${GPU_ID}] \ + [--score-thr ${SCORE_THR}] \ + [--nvdecode] \ + [--out ${OUT_FILE}] \ + [--show] \ + [--wait-time ${WAIT_TIME}] + + ``` + +运行样例: + + ```shell + python demo/video_gpuaccel_demo.py demo/demo.mp4 \ + configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py \ + checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \ + --nvdecode --out result.mp4 + ``` + ## 在标准数据集上测试现有模型 为了测试一个模型的精度,我们通常会在标准数据集上对其进行测试。MMDetection 支持多个公共数据集,包括 [COCO](https://cocodataset.org/) ,