diff --git a/.gitignore b/.gitignore index ca159c8..821dfc1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .vscode/ +__pycache__/ *.pyc *.DS_Store *.swp @@ -9,4 +10,7 @@ tmp.* logs/ weights/ dump/ -src/loftr/utils/superglue.py \ No newline at end of file +demo/*.mp4 +demo/demo_images/ +src/loftr/utils/superglue.py +demo/utils.py \ No newline at end of file diff --git a/README.md b/README.md index 8a00d4a..5ab307b 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,12 @@ In the meanwhile, discussions about the paper are welcomed in the [discussion pa - [x] Inference code and pretrained models (DS and OT) (2021-4-7) - [x] Code for reproducing the test-set results (2021-4-7) -- [ ] Webcam demo to reproduce the result shown in the GIF above (expected 2021-4-13) +- [x] Webcam demo to reproduce the result shown in the GIF above (2021-4-13) - [ ] Training code and training data preparation (expected 2021-6-10) ## Installation ```shell -# For full pytorch-lightning trainer features +# For full pytorch-lightning trainer features (recommended) conda env create -f environment.yaml conda activate loftr @@ -33,7 +33,8 @@ We provide the [download link](https://drive.google.com/drive/folders/1DOcOPZb3- - the megadepth-1500-testset (~600MB). - 4 pretrained models of indoor-ds, indoor-ot, outdoor-ds and outdoor-ot (each ~45MB). -By now, the LoFTR-DS model is ready to go! +By now, the environment is all set and the LoFTR-DS model is ready to go! +If you want to run LoFTR-OT, some extra steps are needed:
[Requirements for LoFTR-OT] @@ -71,7 +72,55 @@ By now, the LoFTR-DS model is ready to go!
-An example is in the `notebooks/demo_single_pair.ipynb`. +An example is given in `notebooks/demo_single_pair.ipynb`. + +### Online demo +Run the online demo with a webcam to reproduce the result shown in the GIF above. +```bash +cd demo +./run_demo.sh +``` +
+ [run_demo.sh] + + ```bash + #!/bin/bash + set -e + # set -x + + if [ ! -f utils.py ]; then + echo "Downloading utils.py from the SuperGlue repo." + echo "We cannot provide this file directly due to its strict licence." + wget https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/utils.py + fi + + # Use webcam 0 as input source. + input=0 + # or use a pre-recorded video given the path. + # input=/home/sunjiaming/Downloads/scannet_test/$scene_name.mp4 + + # Toggle indoor/outdoor model here. + model_ckpt=../weights/indoor_ds.ckpt + # model_ckpt=../weights/outdoor_ds.ckpt + + # Optionally assign the GPU ID. + # export CUDA_VISIBLE_DEVICES=0 + + echo "Running LoFTR demo.." + eval "$(conda shell.zsh hook)" + conda activate loftr + python demo_loftr.py --weight $model_ckpt --input $input + # To save the input video and output match visualizations. + # python demo_loftr.py --weight $model_ckpt --input $input --save_video --save_input + + # Running on remote GPU servers with no GUI. + # Save images first. + # python demo_loftr.py --weight $model_ckpt --input $input --no_display --output_dir="./demo_images/" + # Then convert them to a video. + # ffmpeg -framerate 15 -pattern_type glob -i '*.png' -c:v libx264 -r 30 -pix_fmt yuv420p out.mp4 + + ``` +
### Reproduce the testing results with pytorch-lightning @@ -84,14 +133,12 @@ bash ./scripts/reproduce_test/indoor_ds.sh python test.py configs/data/scannet_test_1500.py configs/loftr/loftr_ds.py --ckpt_path weights/indoor_ds.ckpt --profiler_name inference --gpus=1 --accelerator="ddp" ``` -For visualizing the dump results, please refer to `notebooks/visualize_dump_results.ipynb`. +For visualizing the results, please refer to `notebooks/visualize_dump_results.ipynb`.
- - ## Citation If you find this code useful for your research, please use the following BibTeX entry. @@ -100,16 +147,11 @@ If you find this code useful for your research, please use the following BibTeX @article{sun2021loftr, title={{LoFTR}: Detector-Free Local Feature Matching with Transformers}, author={Sun, Jiaming and Shen, Zehong and Wang, Yuang and Bao, Hujun and Zhou, Xiaowei}, - journal={CVPR}, + journal={{CVPR}}, year={2021} } ``` - - - ## Copyright This work is affiliated with ZJU-SenseTime Joint Lab of 3D Vision, and its intellectual property belongs to SenseTime Group Ltd. diff --git a/demo/demo_loftr.py b/demo/demo_loftr.py new file mode 100644 index 0000000..60974d9 --- /dev/null +++ b/demo/demo_loftr.py @@ -0,0 +1,240 @@ +front_matter = """ +------------------------------------------------------------------------ +Online demo for [LoFTR](https://zju3dv.github.io/loftr/). + +This demo is heavily inspired by [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork/). +We thank the authors for their execellent work. +------------------------------------------------------------------------ +""" + +import os +import argparse +from pathlib import Path +import cv2 +import torch +import numpy as np +import matplotlib.cm as cm + +os.sys.path.append("../") # Add the project directory +from src.loftr import LoFTR, default_cfg +from src.config.default import get_cfg_defaults +try: + from demo.utils import (AverageTimer, VideoStreamer, + make_matching_plot_fast, make_matching_plot, frame2tensor) +except: + raise ImportError("This demo requires utils.py from SuperGlue, please use run_demo.sh to start this script.") + + +torch.set_grad_enabled(False) + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='LoFTR online demo', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--weight', type=str, help="Path to the checkpoint.") + parser.add_argument( + '--input', type=str, default='0', + help='ID of a USB webcam, URL of an IP camera, ' + 'or path to an image directory or movie file') + parser.add_argument( + '--output_dir', type=str, default=None, + help='Directory where to write output frames (If None, no output)') + parser.add_argument( + '--image_glob', type=str, nargs='+', default=['*.png', '*.jpg', '*.jpeg'], + help='Glob if a directory of images is specified') + parser.add_argument( + '--skip', type=int, default=1, + help='Images to skip if input is a movie or directory') + parser.add_argument( + '--max_length', type=int, default=1000000, + help='Maximum length if input is a movie or directory') + parser.add_argument( + '--resize', type=int, nargs='+', default=[640, 480], + help='Resize the input image before running inference. If two numbers, ' + 'resize to the exact dimensions, if one number, resize the max ' + 'dimension, if -1, do not resize') + parser.add_argument( + '--no_display', action='store_true', + help='Do not display images to screen. Useful if running remotely') + parser.add_argument( + '--save_video', action='store_true', + help='Save output (with match visualizations) to a video.') + parser.add_argument( + '--save_input', action='store_true', + help='Save the input images to a video (for gathering repeatable input source).') + parser.add_argument( + '--skip_frames', type=int, default=1, + help="Skip frames from webcam input.") + parser.add_argument( + '--top_k', type=int, default=2000, help="The max vis_range (please refer to the code).") + parser.add_argument( + '--bottom_k', type=int, default=0, help="The min vis_range (please refer to the code).") + + opt = parser.parse_args() + print(front_matter) + parser.print_help() + + if len(opt.resize) == 2 and opt.resize[1] == -1: + opt.resize = opt.resize[0:1] + if len(opt.resize) == 2: + print('Will resize to {}x{} (WxH)'.format( + opt.resize[0], opt.resize[1])) + elif len(opt.resize) == 1 and opt.resize[0] > 0: + print('Will resize max dimension to {}'.format(opt.resize[0])) + elif len(opt.resize) == 1: + print('Will not resize images') + else: + raise ValueError('Cannot specify more than two integers for --resize') + + if torch.cuda.is_available(): + device = 'cuda' + else: + raise RuntimeError("GPU is required to run this demo.") + + # Initialize LoFTR + matcher = LoFTR(config=default_cfg) + matcher.load_state_dict(torch.load(opt.weight)['state_dict']) + matcher = matcher.eval().to(device=device) + + # Configure I/O + if opt.save_video: + print('Writing video to loftr-matches.mp4...') + writer = cv2.VideoWriter('loftr-matches.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 15, (640*2 + 10, 480)) + if opt.save_input: + print('Writing video to demo-input.mp4...') + input_writer = cv2.VideoWriter('demo-input.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 15, (640, 480)) + + vs = VideoStreamer(opt.input, opt.resize, opt.skip, + opt.image_glob, opt.max_length) + frame, ret = vs.next_frame() + assert ret, 'Error when reading the first frame (try different --input?)' + + frame_id = 0 + last_image_id = 0 + frame_tensor = frame2tensor(frame, device) + last_data = {'image0': frame_tensor} + last_frame = frame + + if opt.output_dir is not None: + print('==> Will write outputs to {}'.format(opt.output_dir)) + Path(opt.output_dir).mkdir(exist_ok=True) + + # Create a window to display the demo. + if not opt.no_display: + window_name = 'LoFTR Matches' + cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) + cv2.resizeWindow(window_name, (640*2, 480)) + else: + print('Skipping visualization, will not show a GUI.') + + # Print the keyboard help menu. + print('==> Keyboard control:\n' + '\tn: select the current frame as the reference image (left)\n' + '\td/f: move the range of the matches (ranked by confidence) to visualize\n' + '\tc/v: increase/decrease the length of the visualization range (i.e., total number of matches) to show\n' + '\tq: quit') + + timer = AverageTimer() + vis_range = [opt.bottom_k, opt.top_k] + + while True: + frame_id += 1 + frame, ret = vs.next_frame() + if frame_id % opt.skip_frames != 0: + # print("Skipping frame.") + continue + if opt.save_input: + inp = np.stack([frame]*3, -1) + inp_rgb = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) + input_writer.write(inp_rgb) + if not ret: + print('Finished demo_loftr.py') + break + timer.update('data') + stem0, stem1 = last_image_id, vs.i - 1 + + frame_tensor = frame2tensor(frame, device) + last_data = {**last_data, 'image1': frame_tensor} + matcher(last_data) + + total_n_matches = len(last_data['mkpts0_f']) + mkpts0 = last_data['mkpts0_f'].cpu().numpy()[vis_range[0]:vis_range[1]] + mkpts1 = last_data['mkpts1_f'].cpu().numpy()[vis_range[0]:vis_range[1]] + mconf = last_data['mconf'].cpu().numpy()[vis_range[0]:vis_range[1]] + + # Normalize confidence. + if len(mconf) > 0: + conf_vis_min = 0. + conf_min = mconf.min() + conf_max = mconf.max() + mconf = (mconf - conf_vis_min) / (conf_max - conf_vis_min + 1e-5) + + timer.update('forward') + alpha = 0 + color = cm.jet(mconf, alpha=alpha) + + text = [ + f'LoFTR', + '# Matches (showing/total): {}/{}'.format(len(mkpts0), total_n_matches), + ] + small_text = [ + f'Showing matches from {vis_range[0]}:{vis_range[1]}', + f'Confidence Range: {conf_min:.2f}:{conf_max:.2f}', + 'Image Pair: {:06}:{:06}'.format(stem0, stem1), + ] + out = make_matching_plot_fast( + last_frame, frame, mkpts0, mkpts1, mkpts0, mkpts1, color, text, + path=None, show_keypoints=False, small_text=small_text) + + # Save high quality png, optionally with dynamic alpha support (unreleased yet). + # save_path = 'demo_vid/{:06}'.format(frame_id) + # make_matching_plot( + # last_frame, frame, mkpts0, mkpts1, mkpts0, mkpts1, color, text, + # path=save_path, show_keypoints=opt.show_keypoints, small_text=small_text) + + if not opt.no_display: + if opt.save_video: + writer.write(out) + cv2.imshow('LoFTR Matches', out) + key = chr(cv2.waitKey(1) & 0xFF) + if key == 'q': + if opt.save_video: + writer.release() + if opt.save_input: + input_writer.release() + vs.cleanup() + print('Exiting...') + break + elif key == 'n': + last_data['image0'] = frame_tensor + last_frame = frame + last_image_id = (vs.i - 1) + frame_id_left = frame_id + elif key in ['d', 'f']: + if key == 'd': + if vis_range[0] >= 0: + vis_range[0] -= 200 + vis_range[1] -= 200 + if key =='f': + vis_range[0] += 200 + vis_range[1] += 200 + print(f'\nChanged the vis_range to {vis_range[0]}:{vis_range[1]}') + elif key in ['c', 'v']: + if key == 'c': + vis_range[1] -= 50 + if key =='v': + vis_range[1] += 50 + print(f'\nChanged the vis_range[1] to {vis_range[1]}') + elif opt.output_dir is not None: + stem = 'matches_{:06}_{:06}'.format(stem0, stem1) + out_file = str(Path(opt.output_dir, stem + '.png')) + print('\nWriting image to {}'.format(out_file)) + cv2.imwrite(out_file, out) + else: + raise ValueError("output_dir is required when no display is given.") + timer.update('viz') + timer.print() + + + cv2.destroyAllWindows() + vs.cleanup() diff --git a/demo/run_demo.sh b/demo/run_demo.sh new file mode 100755 index 0000000..c262eef --- /dev/null +++ b/demo/run_demo.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -e +# set -x + +if [ ! -f utils.py ]; then + echo "Downloading utils.py from the SuperGlue repo." + echo "We cannot provide this file directly due to its strict licence." + wget https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/utils.py +fi + +# Use webcam 0 as input source. +input=0 +# or use a pre-recorded video given the path. +# input=/home/sunjiaming/Downloads/scannet_test/$scene_name.mp4 + +# Toggle indoor/outdoor model here. +model_ckpt=../weights/indoor_ds.ckpt +# model_ckpt=../weights/outdoor_ds.ckpt + +# Optionally assign the GPU ID. +# export CUDA_VISIBLE_DEVICES=0 + +echo "Running LoFTR demo.." +eval "$(conda shell.zsh hook)" +conda activate loftr +python demo_loftr.py --weight $model_ckpt --input $input +# To save the input video and output match visualizations. +# python demo_loftr.py --weight $model_ckpt --input $input --save_video --save_input + +# Running on remote GPU servers with no GUI. +# Save images first. +# python demo_loftr.py --weight $model_ckpt --input $input --no_display --output_dir="./demo_images/" +# Then convert them to a video. +# ffmpeg -framerate 15 -pattern_type glob -i '*.png' -c:v libx264 -r 30 -pix_fmt yuv420p out.mp4