parent
348ad897d4
commit
4feac496c1
54 changed files with 2076 additions and 122 deletions
@ -0,0 +1,3 @@ |
|||||||
|
[submodule "third_party/SuperGluePretrainedNetwork"] |
||||||
|
path = third_party/SuperGluePretrainedNetwork |
||||||
|
url = git@github.com:magicleap/SuperGluePretrainedNetwork.git |
@ -0,0 +1,3 @@ |
|||||||
|
* |
||||||
|
*/ |
||||||
|
!.gitignore |
@ -0,0 +1,22 @@ |
|||||||
|
from configs.data.base import cfg |
||||||
|
|
||||||
|
|
||||||
|
TRAIN_BASE_PATH = "data/megadepth/index" |
||||||
|
cfg.DATASET.TRAINVAL_DATA_SOURCE = "MegaDepth" |
||||||
|
cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train" |
||||||
|
cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7" |
||||||
|
cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt" |
||||||
|
cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0 |
||||||
|
|
||||||
|
TEST_BASE_PATH = "data/megadepth/index" |
||||||
|
cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" |
||||||
|
cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" |
||||||
|
cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}/scene_info_val_1500" |
||||||
|
cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt" |
||||||
|
cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val |
||||||
|
|
||||||
|
# 368 scenes in total for MegaDepth |
||||||
|
# (with difficulty balanced (further split each scene to 3 sub-scenes)) |
||||||
|
cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100 |
||||||
|
|
||||||
|
cfg.DATASET.MGDPT_IMG_RESIZE = 640 # for training on 11GB mem GPUs |
@ -0,0 +1,22 @@ |
|||||||
|
from configs.data.base import cfg |
||||||
|
|
||||||
|
|
||||||
|
TRAIN_BASE_PATH = "data/megadepth/index" |
||||||
|
cfg.DATASET.TRAINVAL_DATA_SOURCE = "MegaDepth" |
||||||
|
cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train" |
||||||
|
cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7" |
||||||
|
cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt" |
||||||
|
cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0 |
||||||
|
|
||||||
|
TEST_BASE_PATH = "data/megadepth/index" |
||||||
|
cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" |
||||||
|
cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" |
||||||
|
cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}/scene_info_val_1500" |
||||||
|
cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt" |
||||||
|
cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val |
||||||
|
|
||||||
|
# 368 scenes in total for MegaDepth |
||||||
|
# (with difficulty balanced (further split each scene to 3 sub-scenes)) |
||||||
|
cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100 |
||||||
|
|
||||||
|
cfg.DATASET.MGDPT_IMG_RESIZE = 840 # for training on 32GB meme GPUs |
@ -0,0 +1,17 @@ |
|||||||
|
from configs.data.base import cfg |
||||||
|
|
||||||
|
|
||||||
|
TRAIN_BASE_PATH = "data/scannet/index" |
||||||
|
cfg.DATASET.TRAINVAL_DATA_SOURCE = "ScanNet" |
||||||
|
cfg.DATASET.TRAIN_DATA_ROOT = "data/scannet/train" |
||||||
|
cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_data/train" |
||||||
|
cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/scene_data/train_list/scannet_all.txt" |
||||||
|
cfg.DATASET.TRAIN_INTRINSIC_PATH = f"{TRAIN_BASE_PATH}/intrinsics.npz" |
||||||
|
|
||||||
|
TEST_BASE_PATH = "assets/scannet_test_1500" |
||||||
|
cfg.DATASET.TEST_DATA_SOURCE = "ScanNet" |
||||||
|
cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/scannet/test" |
||||||
|
cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = TEST_BASE_PATH |
||||||
|
cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/scannet_test.txt" |
||||||
|
cfg.DATASET.VAL_INTRINSIC_PATH = cfg.DATASET.TEST_INTRINSIC_PATH = f"{TEST_BASE_PATH}/intrinsics.npz" |
||||||
|
cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val |
@ -0,0 +1,3 @@ |
|||||||
|
* |
||||||
|
*/ |
||||||
|
!.gitignore |
@ -1,3 +1,5 @@ |
|||||||
from src.config.default import _CN as cfg |
from src.config.default import _CN as cfg |
||||||
|
|
||||||
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' |
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' |
||||||
|
|
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12, 17, 20, 23, 26, 29] |
@ -0,0 +1,7 @@ |
|||||||
|
from src.config.default import _CN as cfg |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.SPARSE_SPVS = False |
||||||
|
|
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12, 17, 20, 23, 26, 29] |
@ -1,3 +1,5 @@ |
|||||||
from src.config.default import _CN as cfg |
from src.config.default import _CN as cfg |
||||||
|
|
||||||
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'sinkhorn' |
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'sinkhorn' |
||||||
|
|
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12, 17, 20, 23, 26, 29] |
@ -0,0 +1,7 @@ |
|||||||
|
from src.config.default import _CN as cfg |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'sinkhorn' |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.SPARSE_SPVS = False |
||||||
|
|
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12, 17, 20, 23, 26, 29] |
@ -0,0 +1,3 @@ |
|||||||
|
* |
||||||
|
*/ |
||||||
|
!.gitignore |
@ -0,0 +1,15 @@ |
|||||||
|
from src.config.default import _CN as cfg |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' |
||||||
|
|
||||||
|
cfg.TRAINER.CANONICAL_LR = 8e-3 |
||||||
|
cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs |
||||||
|
cfg.TRAINER.WARMUP_RATIO = 0.1 |
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] |
||||||
|
|
||||||
|
# pose estimation |
||||||
|
cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 |
||||||
|
|
||||||
|
cfg.TRAINER.OPTIMIZER = "adamw" |
||||||
|
cfg.TRAINER.ADAMW_DECAY = 0.1 |
||||||
|
cfg.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 |
@ -0,0 +1,16 @@ |
|||||||
|
from src.config.default import _CN as cfg |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' |
||||||
|
cfg.LOFTR.MATCH_COARSE.SPARSE_SPVS = False |
||||||
|
|
||||||
|
cfg.TRAINER.CANONICAL_LR = 8e-3 |
||||||
|
cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs |
||||||
|
cfg.TRAINER.WARMUP_RATIO = 0.1 |
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] |
||||||
|
|
||||||
|
# pose estimation |
||||||
|
cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 |
||||||
|
|
||||||
|
cfg.TRAINER.OPTIMIZER = "adamw" |
||||||
|
cfg.TRAINER.ADAMW_DECAY = 0.1 |
||||||
|
cfg.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 |
@ -0,0 +1,15 @@ |
|||||||
|
from src.config.default import _CN as cfg |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'sinkhorn' |
||||||
|
|
||||||
|
cfg.TRAINER.CANONICAL_LR = 8e-3 |
||||||
|
cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs |
||||||
|
cfg.TRAINER.WARMUP_RATIO = 0.1 |
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] |
||||||
|
|
||||||
|
# pose estimation |
||||||
|
cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 |
||||||
|
|
||||||
|
cfg.TRAINER.OPTIMIZER = "adamw" |
||||||
|
cfg.TRAINER.ADAMW_DECAY = 0.1 |
||||||
|
cfg.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 |
@ -0,0 +1,16 @@ |
|||||||
|
from src.config.default import _CN as cfg |
||||||
|
|
||||||
|
cfg.LOFTR.MATCH_COARSE.MATCH_TYPE = 'sinkhorn' |
||||||
|
cfg.LOFTR.MATCH_COARSE.SPARSE_SPVS = False |
||||||
|
|
||||||
|
cfg.TRAINER.CANONICAL_LR = 8e-3 |
||||||
|
cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs |
||||||
|
cfg.TRAINER.WARMUP_RATIO = 0.1 |
||||||
|
cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] |
||||||
|
|
||||||
|
# pose estimation |
||||||
|
cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 |
||||||
|
|
||||||
|
cfg.TRAINER.OPTIMIZER = "adamw" |
||||||
|
cfg.TRAINER.ADAMW_DECAY = 0.1 |
||||||
|
cfg.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 |
@ -0,0 +1,4 @@ |
|||||||
|
# Ignore everything in this directory |
||||||
|
* |
||||||
|
# Except this file |
||||||
|
!.gitignore |
@ -0,0 +1,4 @@ |
|||||||
|
# Ignore everything in this directory |
||||||
|
* |
||||||
|
# Except this file |
||||||
|
!.gitignore |
@ -0,0 +1,4 @@ |
|||||||
|
# Ignore everything in this directory |
||||||
|
* |
||||||
|
# Except this file |
||||||
|
!.gitignore |
@ -0,0 +1,3 @@ |
|||||||
|
* |
||||||
|
*/ |
||||||
|
!.gitignore |
Binary file not shown.
@ -0,0 +1 @@ |
|||||||
|
/mnt/lustre/share/3dv/dataset/scannet/scannet_1500_testset |
@ -0,0 +1 @@ |
|||||||
|
/mnt/lustre/share/3dv/dataset/scannet/out/output |
@ -0,0 +1,73 @@ |
|||||||
|
|
||||||
|
# Traininig LoFTR |
||||||
|
|
||||||
|
## Dataset setup |
||||||
|
Generally, two parts of data are needed for training LoFTR, the original dataset, i.e., ScanNet and MegaDepth, and the offline generated dataset indices. The dataset indices store scenes, image pairs, and other metadata within each dataset used for training/validation/testing. For the MegaDepth dataset, the relative poses between images used for training are directly cached in the indexing files. However, the relative poses of ScanNet image pairs are not stored due to the enormous resulting file size. |
||||||
|
|
||||||
|
**Download the dataset indices** |
||||||
|
|
||||||
|
You can download the required dataset indices from the [following link](https://drive.google.com/drive/folders/1DOcOPZb3-5cWxLqn256AhwUVjBPifhuf). |
||||||
|
After downloading, unzip the required files. |
||||||
|
```shell |
||||||
|
unzip downloaded-file.zip |
||||||
|
|
||||||
|
# extract dataset indices |
||||||
|
tar xf train-data/megadepth_indices.tar |
||||||
|
tar xf train-data/scannet_indices.tar |
||||||
|
|
||||||
|
# extract testing data (optional) |
||||||
|
tar xf testdata/megadepth_test_1500.tar |
||||||
|
tar xf testdata/scannet_test_1500.tar |
||||||
|
``` |
||||||
|
|
||||||
|
**Build the dataset symlinks** |
||||||
|
|
||||||
|
We symlink the datasets to the /data directory under the main LoFTR project directory. |
||||||
|
|
||||||
|
> NOTE: For the ScanNet dataset, we use the [python exported data](https://github.com/ScanNet/ScanNet/tree/master/SensReader/python), |
||||||
|
instead of the [c++ exported one](https://github.com/ScanNet/ScanNet/tree/master/SensReader/c%2B%2B). |
||||||
|
|
||||||
|
```shell |
||||||
|
# scannet |
||||||
|
# -- # train and test dataset |
||||||
|
ln -s /path/to/scannet_train/* /path/to/LoFTR/data/scannet/train |
||||||
|
ln -s /path/to/scannet_test/* /path/to/LoFTR/data/scannet/test |
||||||
|
# -- # dataset indices |
||||||
|
ln -s /path/to/scannet_indices/* /path/to/LoFTR/data/scannet/index |
||||||
|
|
||||||
|
# megadepth |
||||||
|
# -- # train and test dataset (train and test share the same dataset) |
||||||
|
ln -s /path/to/megadepth/Undistorted_SfM/* /path/to/LoFTR/data/megadepth/train |
||||||
|
ln -s /path/to/megadepth/Undistorted_SfM/* /path/to/LoFTR/data/megadepth/test |
||||||
|
# -- # dataset indices |
||||||
|
ln -s /path/to/megadepth_indices/* /path/to/LoFTR/data/megadepth/index |
||||||
|
``` |
||||||
|
|
||||||
|
|
||||||
|
## Training |
||||||
|
We provide training scripts of ScanNet and MegaDepth. The results in the LoFTR paper can be reproduced with 32/64 GPUs with at least 11GB of RAM for ScanNet, and 8/16 GPUs with at least 24GB of RAM for MegaDepth. For a different setup (e.g., training with 4 gpus on ScanNet), we scale the learning rate and its warm-up linearly, but the final evaluation results might vary due to the different batch size & learning rate used. Thus the reproduction of results in our paper is not guaranteed. |
||||||
|
|
||||||
|
Training scripts of the optimal-transport matcher end with "_ot" and ones of the dual-softmax matcher end with "_ds". |
||||||
|
|
||||||
|
The released training scripts use smaller setups comparing to ones used for training the released models. You could manually scale the setup (e.g., using 32 gpus instead of 4) to reproduce our results. |
||||||
|
|
||||||
|
|
||||||
|
### Training on ScanNet |
||||||
|
``` shell |
||||||
|
scripts/reproduce_train/indoor_ds.sh |
||||||
|
``` |
||||||
|
> NOTE: It uses 4 gpus only. Reproduction of paper results is not guaranteed under this setup. |
||||||
|
|
||||||
|
|
||||||
|
### Training on MegaDepth |
||||||
|
``` shell |
||||||
|
scripts/reproduce_train/outdoor_ds.sh |
||||||
|
``` |
||||||
|
> NOTE: It uses 4 gpus only, with smaller image sizes of 640x640. Reproduction of paper results is not guaranteed under this setup. |
||||||
|
|
||||||
|
|
||||||
|
## Updated Training Strategy |
||||||
|
In the released training code, we use a slightly modified version of the coarse-level training supervision comparing to the one described in our paper. |
||||||
|
For example, as described in our paper, we only supervise the ground-truth positive matches when training the dual-softmax model. However, the entire confidence matrix produced by the dual-softmax matcher is supervised by default in the released code, regardless of the use of softmax operators. This implementation is counter-intuitive and unusual but leads to better evaluation results on estimating relative camera poses. The same phenomenon applies to the optimal-transport matcher version as well. Note that we don't supervise the dustbin rows and columns under the dense supervision setup. |
||||||
|
|
||||||
|
> NOTE: To use the sparse supervision described in our paper, set `_CN.LOFTR.MATCH_COARSE.SPARSE_SPVS = False`. |
File diff suppressed because one or more lines are too long
@ -0,0 +1,3 @@ |
|||||||
|
* |
||||||
|
*/ |
||||||
|
!.gitignore |
@ -0,0 +1,33 @@ |
|||||||
|
#!/bin/bash -l |
||||||
|
|
||||||
|
SCRIPTPATH=$(dirname $(readlink -f "$0")) |
||||||
|
PROJECT_DIR="${SCRIPTPATH}/../../" |
||||||
|
|
||||||
|
# conda activate loftr |
||||||
|
export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH |
||||||
|
cd $PROJECT_DIR |
||||||
|
|
||||||
|
data_cfg_path="configs/data/scannet_trainval.py" |
||||||
|
main_cfg_path="configs/loftr/indoor/loftr_ds_dense.py" |
||||||
|
|
||||||
|
n_nodes=1 |
||||||
|
n_gpus_per_node=4 |
||||||
|
torch_num_workers=4 |
||||||
|
batch_size=1 |
||||||
|
pin_memory=true |
||||||
|
exp_name="indoor-ds-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))" |
||||||
|
|
||||||
|
python -u ./train.py \ |
||||||
|
${data_cfg_path} \ |
||||||
|
${main_cfg_path} \ |
||||||
|
--exp_name=${exp_name} \ |
||||||
|
--gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ |
||||||
|
--batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ |
||||||
|
--check_val_every_n_epoch=1 \ |
||||||
|
--log_every_n_steps=100 \ |
||||||
|
--flush_logs_every_n_steps=100 \ |
||||||
|
--limit_val_batches=1. \ |
||||||
|
--num_sanity_val_steps=10 \ |
||||||
|
--benchmark=True \ |
||||||
|
--max_epochs=30 \ |
||||||
|
--parallel_load_data |
@ -0,0 +1,33 @@ |
|||||||
|
#!/bin/bash -l |
||||||
|
|
||||||
|
SCRIPTPATH=$(dirname $(readlink -f "$0")) |
||||||
|
PROJECT_DIR="${SCRIPTPATH}/../../" |
||||||
|
|
||||||
|
# conda activate loftr |
||||||
|
export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH |
||||||
|
cd $PROJECT_DIR |
||||||
|
|
||||||
|
data_cfg_path="configs/data/scannet_trainval.py" |
||||||
|
main_cfg_path="configs/loftr/indoor/loftr_ot_dense.py" |
||||||
|
|
||||||
|
n_nodes=1 |
||||||
|
n_gpus_per_node=4 |
||||||
|
torch_num_workers=4 |
||||||
|
batch_size=1 |
||||||
|
pin_memory=true |
||||||
|
exp_name="indoor-ot-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))" |
||||||
|
|
||||||
|
python -u ./train.py \ |
||||||
|
${data_cfg_path} \ |
||||||
|
${main_cfg_path} \ |
||||||
|
--exp_name=${exp_name} \ |
||||||
|
--gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ |
||||||
|
--batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ |
||||||
|
--check_val_every_n_epoch=1 \ |
||||||
|
--log_every_n_steps=100 \ |
||||||
|
--flush_logs_every_n_steps=100 \ |
||||||
|
--limit_val_batches=1. \ |
||||||
|
--num_sanity_val_steps=10 \ |
||||||
|
--benchmark=True \ |
||||||
|
--max_epochs=30 \ |
||||||
|
--parallel_load_data |
@ -0,0 +1,35 @@ |
|||||||
|
#!/bin/bash -l |
||||||
|
|
||||||
|
SCRIPTPATH=$(dirname $(readlink -f "$0")) |
||||||
|
PROJECT_DIR="${SCRIPTPATH}/../../" |
||||||
|
|
||||||
|
# conda activate loftr |
||||||
|
export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH |
||||||
|
cd $PROJECT_DIR |
||||||
|
|
||||||
|
TRAIN_IMG_SIZE=640 |
||||||
|
# to reproduced the results in our paper, please use: |
||||||
|
# TRAIN_IMG_SIZE=840 |
||||||
|
data_cfg_path="configs/data/megadepth_trainval_${TRAIN_IMG_SIZE}.py" |
||||||
|
main_cfg_path="configs/loftr/outdoor/loftr_ds_dense.py" |
||||||
|
|
||||||
|
n_nodes=1 |
||||||
|
n_gpus_per_node=4 |
||||||
|
torch_num_workers=4 |
||||||
|
batch_size=1 |
||||||
|
pin_memory=true |
||||||
|
exp_name="outdoor-ds-${TRAIN_IMG_SIZE}-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))" |
||||||
|
|
||||||
|
python -u ./train.py \ |
||||||
|
${data_cfg_path} \ |
||||||
|
${main_cfg_path} \ |
||||||
|
--exp_name=${exp_name} \ |
||||||
|
--gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ |
||||||
|
--batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ |
||||||
|
--check_val_every_n_epoch=1 \ |
||||||
|
--log_every_n_steps=1 \ |
||||||
|
--flush_logs_every_n_steps=1 \ |
||||||
|
--limit_val_batches=1. \ |
||||||
|
--num_sanity_val_steps=10 \ |
||||||
|
--benchmark=True \ |
||||||
|
--max_epochs=30 |
@ -0,0 +1,35 @@ |
|||||||
|
#!/bin/bash -l |
||||||
|
|
||||||
|
SCRIPTPATH=$(dirname $(readlink -f "$0")) |
||||||
|
PROJECT_DIR="${SCRIPTPATH}/../../" |
||||||
|
|
||||||
|
# conda activate loftr |
||||||
|
export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH |
||||||
|
cd $PROJECT_DIR |
||||||
|
|
||||||
|
TRAIN_IMG_SIZE=640 |
||||||
|
# to reproduced the results in our paper, please use: |
||||||
|
# TRAIN_IMG_SIZE=840 |
||||||
|
data_cfg_path="configs/data/megadepth_trainval_${TRAIN_IMG_SIZE}.py" |
||||||
|
main_cfg_path="configs/loftr/outdoor/loftr_ot_dense.py" |
||||||
|
|
||||||
|
n_nodes=1 |
||||||
|
n_gpus_per_node=4 |
||||||
|
torch_num_workers=4 |
||||||
|
batch_size=1 |
||||||
|
pin_memory=true |
||||||
|
exp_name="outdoor-ot-${TRAIN_IMG_SIZE}-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))" |
||||||
|
|
||||||
|
python -u ./train.py \ |
||||||
|
${data_cfg_path} \ |
||||||
|
${main_cfg_path} \ |
||||||
|
--exp_name=${exp_name} \ |
||||||
|
--gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ |
||||||
|
--batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ |
||||||
|
--check_val_every_n_epoch=1 \ |
||||||
|
--log_every_n_steps=1 \ |
||||||
|
--flush_logs_every_n_steps=1 \ |
||||||
|
--limit_val_batches=1. \ |
||||||
|
--num_sanity_val_steps=10 \ |
||||||
|
--benchmark=True \ |
||||||
|
--max_epochs=30 |
@ -0,0 +1,77 @@ |
|||||||
|
import torch |
||||||
|
from torch.utils.data import Sampler, ConcatDataset |
||||||
|
|
||||||
|
|
||||||
|
class RandomConcatSampler(Sampler): |
||||||
|
""" Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset |
||||||
|
in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement. |
||||||
|
However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase. |
||||||
|
|
||||||
|
For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not. |
||||||
|
Args: |
||||||
|
shuffle (bool): shuffle the random sampled indices across all sub-datsets. |
||||||
|
repeat (int): repeatedly use the sampled indices multiple times for training. |
||||||
|
[arXiv:1902.05509, arXiv:1901.09335] |
||||||
|
NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples) |
||||||
|
NOTE: This sampler behaves differently with DistributedSampler. |
||||||
|
It assume the dataset is splitted across ranks instead of replicated. |
||||||
|
TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs. |
||||||
|
ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373 |
||||||
|
""" |
||||||
|
def __init__(self, |
||||||
|
data_source: ConcatDataset, |
||||||
|
n_samples_per_subset: int, |
||||||
|
subset_replacement: bool=True, |
||||||
|
shuffle: bool=True, |
||||||
|
repeat: int=1, |
||||||
|
seed: int=None): |
||||||
|
if not isinstance(data_source, ConcatDataset): |
||||||
|
raise TypeError("data_source should be torch.utils.data.ConcatDataset") |
||||||
|
|
||||||
|
self.data_source = data_source |
||||||
|
self.n_subset = len(self.data_source.datasets) |
||||||
|
self.n_samples_per_subset = n_samples_per_subset |
||||||
|
self.n_samples = self.n_subset * self.n_samples_per_subset * repeat |
||||||
|
self.subset_replacement = subset_replacement |
||||||
|
self.repeat = repeat |
||||||
|
self.shuffle = shuffle |
||||||
|
self.generator = torch.manual_seed(seed) |
||||||
|
assert self.repeat >= 1 |
||||||
|
|
||||||
|
def __len__(self): |
||||||
|
return self.n_samples |
||||||
|
|
||||||
|
def __iter__(self): |
||||||
|
indices = [] |
||||||
|
# sample from each sub-dataset |
||||||
|
for d_idx in range(self.n_subset): |
||||||
|
low = 0 if d_idx==0 else self.data_source.cumulative_sizes[d_idx-1] |
||||||
|
high = self.data_source.cumulative_sizes[d_idx] |
||||||
|
if self.subset_replacement: |
||||||
|
rand_tensor = torch.randint(low, high, (self.n_samples_per_subset, ), |
||||||
|
generator=self.generator, dtype=torch.int64) |
||||||
|
else: # sample without replacement |
||||||
|
len_subset = len(self.data_source.datasets[d_idx]) |
||||||
|
rand_tensor = torch.randperm(len_subset, generator=self.generator) + low |
||||||
|
if len_subset >= self.n_samples_per_subset: |
||||||
|
rand_tensor = rand_tensor[:self.n_samples_per_subset] |
||||||
|
else: # padding with replacement |
||||||
|
rand_tensor_replacement = torch.randint(low, high, (self.n_samples_per_subset - len_subset, ), |
||||||
|
generator=self.generator, dtype=torch.int64) |
||||||
|
rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement]) |
||||||
|
indices.append(rand_tensor) |
||||||
|
indices = torch.cat(indices) |
||||||
|
if self.shuffle: # shuffle the sampled dataset (from multiple subsets) |
||||||
|
rand_tensor = torch.randperm(len(indices), generator=self.generator) |
||||||
|
indices = indices[rand_tensor] |
||||||
|
|
||||||
|
# repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling) |
||||||
|
if self.repeat > 1: |
||||||
|
repeat_indices = [indices.clone() for _ in range(self.repeat - 1)] |
||||||
|
if self.shuffle: |
||||||
|
_choice = lambda x: x[torch.randperm(len(x), generator=self.generator)] |
||||||
|
repeat_indices = map(_choice, repeat_indices) |
||||||
|
indices = torch.cat([indices, *repeat_indices], 0) |
||||||
|
|
||||||
|
assert indices.shape[0] == self.n_samples |
||||||
|
return iter(indices.tolist()) |
@ -0,0 +1,54 @@ |
|||||||
|
import torch |
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad() |
||||||
|
def warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1): |
||||||
|
""" Warp kpts0 from I0 to I1 with depth, K and Rt |
||||||
|
Also check covisibility and depth consistency. |
||||||
|
Depth is consistent if relative error < 0.2 (hard-coded). |
||||||
|
|
||||||
|
Args: |
||||||
|
kpts0 (torch.Tensor): [N, L, 2] - <x, y>, |
||||||
|
depth0 (torch.Tensor): [N, H, W], |
||||||
|
depth1 (torch.Tensor): [N, H, W], |
||||||
|
T_0to1 (torch.Tensor): [N, 3, 4], |
||||||
|
K0 (torch.Tensor): [N, 3, 3], |
||||||
|
K1 (torch.Tensor): [N, 3, 3], |
||||||
|
Returns: |
||||||
|
calculable_mask (torch.Tensor): [N, L] |
||||||
|
warped_keypoints0 (torch.Tensor): [N, L, 2] <x0_hat, y1_hat> |
||||||
|
""" |
||||||
|
kpts0_long = kpts0.round().long() |
||||||
|
|
||||||
|
# Sample depth, get calculable_mask on depth != 0 |
||||||
|
kpts0_depth = torch.stack( |
||||||
|
[depth0[i, kpts0_long[i, :, 1], kpts0_long[i, :, 0]] for i in range(kpts0.shape[0])], dim=0 |
||||||
|
) # (N, L) |
||||||
|
nonzero_mask = kpts0_depth != 0 |
||||||
|
|
||||||
|
# Unproject |
||||||
|
kpts0_h = torch.cat([kpts0, torch.ones_like(kpts0[:, :, [0]])], dim=-1) * kpts0_depth[..., None] # (N, L, 3) |
||||||
|
kpts0_cam = K0.inverse() @ kpts0_h.transpose(2, 1) # (N, 3, L) |
||||||
|
|
||||||
|
# Rigid Transform |
||||||
|
w_kpts0_cam = T_0to1[:, :3, :3] @ kpts0_cam + T_0to1[:, :3, [3]] # (N, 3, L) |
||||||
|
w_kpts0_depth_computed = w_kpts0_cam[:, 2, :] |
||||||
|
|
||||||
|
# Project |
||||||
|
w_kpts0_h = (K1 @ w_kpts0_cam).transpose(2, 1) # (N, L, 3) |
||||||
|
w_kpts0 = w_kpts0_h[:, :, :2] / (w_kpts0_h[:, :, [2]] + 1e-4) # (N, L, 2), +1e-4 to avoid zero depth |
||||||
|
|
||||||
|
# Covisible Check |
||||||
|
h, w = depth1.shape[1:3] |
||||||
|
covisible_mask = (w_kpts0[:, :, 0] > 0) * (w_kpts0[:, :, 0] < w-1) * \ |
||||||
|
(w_kpts0[:, :, 1] > 0) * (w_kpts0[:, :, 1] < h-1) |
||||||
|
w_kpts0_long = w_kpts0.long() |
||||||
|
w_kpts0_long[~covisible_mask, :] = 0 |
||||||
|
|
||||||
|
w_kpts0_depth = torch.stack( |
||||||
|
[depth1[i, w_kpts0_long[i, :, 1], w_kpts0_long[i, :, 0]] for i in range(w_kpts0_long.shape[0])], dim=0 |
||||||
|
) # (N, L) |
||||||
|
consistent_mask = ((w_kpts0_depth - w_kpts0_depth_computed) / w_kpts0_depth).abs() < 0.2 |
||||||
|
valid_mask = nonzero_mask * covisible_mask * consistent_mask |
||||||
|
|
||||||
|
return valid_mask, w_kpts0 |
@ -0,0 +1,151 @@ |
|||||||
|
from math import log |
||||||
|
from loguru import logger |
||||||
|
|
||||||
|
import torch |
||||||
|
from einops import repeat |
||||||
|
from kornia.utils import create_meshgrid |
||||||
|
|
||||||
|
from .geometry import warp_kpts |
||||||
|
|
||||||
|
############## ↓ Coarse-Level supervision ↓ ############## |
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad() |
||||||
|
def mask_pts_at_padded_regions(grid_pt, mask): |
||||||
|
"""For megadepth dataset, zero-padding exists in images""" |
||||||
|
mask = repeat(mask, 'n h w -> n (h w) c', c=2) |
||||||
|
grid_pt[~mask.bool()] = 0 |
||||||
|
return grid_pt |
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad() |
||||||
|
def spvs_coarse(data, config): |
||||||
|
""" |
||||||
|
Update: |
||||||
|
data (dict): { |
||||||
|
"conf_matrix_gt": [N, hw0, hw1], |
||||||
|
'spv_b_ids': [M] |
||||||
|
'spv_i_ids': [M] |
||||||
|
'spv_j_ids': [M] |
||||||
|
'spv_w_pt0_i': [N, hw0, 2], in original image resolution |
||||||
|
'spv_pt1_i': [N, hw1, 2], in original image resolution |
||||||
|
} |
||||||
|
|
||||||
|
NOTE: |
||||||
|
- for scannet dataset, there're 3 kinds of resolution {i, c, f} |
||||||
|
- for megadepth dataset, there're 4 kinds of resolution {i, i_resize, c, f} |
||||||
|
""" |
||||||
|
# 1. misc |
||||||
|
device = data['image0'].device |
||||||
|
N, _, H0, W0 = data['image0'].shape |
||||||
|
_, _, H1, W1 = data['image1'].shape |
||||||
|
scale = config['LOFTR']['RESOLUTION'][0] |
||||||
|
scale0 = scale * data['scale0'][:, None] if 'scale0' in data else scale |
||||||
|
scale1 = scale * data['scale1'][:, None] if 'scale0' in data else scale |
||||||
|
h0, w0, h1, w1 = map(lambda x: x // scale, [H0, W0, H1, W1]) |
||||||
|
|
||||||
|
# 2. warp grids |
||||||
|
# create kpts in meshgrid and resize them to image resolution |
||||||
|
grid_pt0_c = create_meshgrid(h0, w0, False, device).reshape(1, h0*w0, 2).repeat(N, 1, 1) # [N, hw, 2] |
||||||
|
grid_pt0_i = scale0 * grid_pt0_c |
||||||
|
grid_pt1_c = create_meshgrid(h1, w1, False, device).reshape(1, h1*w1, 2).repeat(N, 1, 1) |
||||||
|
grid_pt1_i = scale1 * grid_pt1_c |
||||||
|
|
||||||
|
# mask padded region to (0, 0), so no need to manually mask conf_matrix_gt |
||||||
|
if 'mask0' in data: |
||||||
|
grid_pt0_i = mask_pts_at_padded_regions(grid_pt0_i, data['mask0']) |
||||||
|
grid_pt1_i = mask_pts_at_padded_regions(grid_pt1_i, data['mask1']) |
||||||
|
|
||||||
|
# warp kpts bi-directionally and resize them to coarse-level resolution |
||||||
|
# (no depth consistency check, since it leads to worse results experimentally) |
||||||
|
# (unhandled edge case: points with 0-depth will be warped to the left-up corner) |
||||||
|
_, w_pt0_i = warp_kpts(grid_pt0_i, data['depth0'], data['depth1'], data['T_0to1'], data['K0'], data['K1']) |
||||||
|
_, w_pt1_i = warp_kpts(grid_pt1_i, data['depth1'], data['depth0'], data['T_1to0'], data['K1'], data['K0']) |
||||||
|
w_pt0_c = w_pt0_i / scale1 |
||||||
|
w_pt1_c = w_pt1_i / scale0 |
||||||
|
|
||||||
|
# 3. check if mutual nearest neighbor |
||||||
|
w_pt0_c_round = w_pt0_c[:, :, :].round().long() |
||||||
|
nearest_index1 = w_pt0_c_round[..., 0] + w_pt0_c_round[..., 1] * w1 |
||||||
|
w_pt1_c_round = w_pt1_c[:, :, :].round().long() |
||||||
|
nearest_index0 = w_pt1_c_round[..., 0] + w_pt1_c_round[..., 1] * w0 |
||||||
|
|
||||||
|
# corner case: out of boundary |
||||||
|
def out_bound_mask(pt, w, h): |
||||||
|
return (pt[..., 0] < 0) + (pt[..., 0] >= w) + (pt[..., 1] < 0) + (pt[..., 1] >= h) |
||||||
|
nearest_index1[out_bound_mask(w_pt0_c_round, w1, h1)] = 0 |
||||||
|
nearest_index0[out_bound_mask(w_pt1_c_round, w0, h0)] = 0 |
||||||
|
|
||||||
|
loop_back = torch.stack([nearest_index0[_b][_i] for _b, _i in enumerate(nearest_index1)], dim=0) |
||||||
|
correct_0to1 = loop_back == torch.arange(h0*w0, device=device)[None].repeat(N, 1) |
||||||
|
correct_0to1[:, 0] = False # ignore the top-left corner |
||||||
|
|
||||||
|
# 4. construct a gt conf_matrix |
||||||
|
conf_matrix_gt = torch.zeros(N, h0*w0, h1*w1, device=device) |
||||||
|
b_ids, i_ids = torch.where(correct_0to1 != 0) |
||||||
|
j_ids = nearest_index1[b_ids, i_ids] |
||||||
|
|
||||||
|
conf_matrix_gt[b_ids, i_ids, j_ids] = 1 |
||||||
|
data.update({'conf_matrix_gt': conf_matrix_gt}) |
||||||
|
|
||||||
|
# 5. save coarse matches(gt) for training fine level |
||||||
|
if len(b_ids) == 0: |
||||||
|
logger.warning(f"No groundtruth coarse match found for: {data['pair_names']}") |
||||||
|
# this won't affect fine-level loss calculation |
||||||
|
b_ids = torch.tensor([0], device=device) |
||||||
|
i_ids = torch.tensor([0], device=device) |
||||||
|
j_ids = torch.tensor([0], device=device) |
||||||
|
|
||||||
|
data.update({ |
||||||
|
'spv_b_ids': b_ids, |
||||||
|
'spv_i_ids': i_ids, |
||||||
|
'spv_j_ids': j_ids |
||||||
|
}) |
||||||
|
|
||||||
|
# 6. save intermediate results (for fast fine-level computation) |
||||||
|
data.update({ |
||||||
|
'spv_w_pt0_i': w_pt0_i, |
||||||
|
'spv_pt1_i': grid_pt1_i |
||||||
|
}) |
||||||
|
|
||||||
|
|
||||||
|
def compute_supervision_coarse(data, config): |
||||||
|
assert len(set(data['dataset_name'])) == 1, "Do not support mixed datasets training!" |
||||||
|
data_source = data['dataset_name'][0] |
||||||
|
if data_source.lower() in ['scannet', 'megadepth']: |
||||||
|
spvs_coarse(data, config) |
||||||
|
else: |
||||||
|
raise ValueError(f'Unknown data source: {data_source}') |
||||||
|
|
||||||
|
|
||||||
|
############## ↓ Fine-Level supervision ↓ ############## |
||||||
|
|
||||||
|
@torch.no_grad() |
||||||
|
def spvs_fine(data, config): |
||||||
|
""" |
||||||
|
Update: |
||||||
|
data (dict):{ |
||||||
|
"expec_f_gt": [M, 2]} |
||||||
|
""" |
||||||
|
# 1. misc |
||||||
|
# w_pt0_i, pt1_i = data.pop('spv_w_pt0_i'), data.pop('spv_pt1_i') |
||||||
|
w_pt0_i, pt1_i = data['spv_w_pt0_i'], data['spv_pt1_i'] |
||||||
|
scale = config['LOFTR']['RESOLUTION'][1] |
||||||
|
radius = config['LOFTR']['FINE_WINDOW_SIZE'] // 2 |
||||||
|
|
||||||
|
# 2. get coarse prediction |
||||||
|
b_ids, i_ids, j_ids = data['b_ids'], data['i_ids'], data['j_ids'] |
||||||
|
|
||||||
|
# 3. compute gt |
||||||
|
scale = scale * data['scale1'][b_ids] if 'scale0' in data else scale |
||||||
|
# `expec_f_gt` might exceed the window, i.e. abs(*) > 1, which would be filtered later |
||||||
|
expec_f_gt = (w_pt0_i[b_ids, i_ids] - pt1_i[b_ids, j_ids]) / scale / radius # [M, 2] |
||||||
|
data.update({"expec_f_gt": expec_f_gt}) |
||||||
|
|
||||||
|
|
||||||
|
def compute_supervision_fine(data, config): |
||||||
|
data_source = data['dataset_name'][0] |
||||||
|
if data_source.lower() in ['scannet', 'megadepth']: |
||||||
|
spvs_fine(data, config) |
||||||
|
else: |
||||||
|
raise NotImplementedError |
@ -0,0 +1,192 @@ |
|||||||
|
from loguru import logger |
||||||
|
|
||||||
|
import torch |
||||||
|
import torch.nn as nn |
||||||
|
|
||||||
|
|
||||||
|
class LoFTRLoss(nn.Module): |
||||||
|
def __init__(self, config): |
||||||
|
super().__init__() |
||||||
|
self.config = config # config under the global namespace |
||||||
|
self.loss_config = config['loftr']['loss'] |
||||||
|
self.match_type = self.config['loftr']['match_coarse']['match_type'] |
||||||
|
self.sparse_spvs = self.config['loftr']['match_coarse']['sparse_spvs'] |
||||||
|
|
||||||
|
# coarse-level |
||||||
|
self.correct_thr = self.loss_config['fine_correct_thr'] |
||||||
|
self.c_pos_w = self.loss_config['pos_weight'] |
||||||
|
self.c_neg_w = self.loss_config['neg_weight'] |
||||||
|
# fine-level |
||||||
|
self.fine_type = self.loss_config['fine_type'] |
||||||
|
|
||||||
|
def compute_coarse_loss(self, conf, conf_gt, weight=None): |
||||||
|
""" Point-wise CE / Focal Loss with 0 / 1 confidence as gt. |
||||||
|
Args: |
||||||
|
conf (torch.Tensor): (N, HW0, HW1) / (N, HW0+1, HW1+1) |
||||||
|
conf_gt (torch.Tensor): (N, HW0, HW1) |
||||||
|
weight (torch.Tensor): (N, HW0, HW1) |
||||||
|
""" |
||||||
|
pos_mask, neg_mask = conf_gt == 1, conf_gt == 0 |
||||||
|
c_pos_w, c_neg_w = self.c_pos_w, self.c_neg_w |
||||||
|
# corner case: no gt coarse-level match at all |
||||||
|
if not pos_mask.any(): # assign a wrong gt |
||||||
|
pos_mask[0, 0, 0] = True |
||||||
|
if weight is not None: |
||||||
|
weight[0, 0, 0] = 0. |
||||||
|
c_pos_w = 0. |
||||||
|
if not neg_mask.any(): |
||||||
|
neg_mask[0, 0, 0] = True |
||||||
|
if weight is not None: |
||||||
|
weight[0, 0, 0] = 0. |
||||||
|
c_neg_w = 0. |
||||||
|
|
||||||
|
if self.loss_config['coarse_type'] == 'cross_entropy': |
||||||
|
assert not self.sparse_spvs, 'Sparse Supervision for cross-entropy not implemented!' |
||||||
|
conf = torch.clamp(conf, 1e-6, 1-1e-6) |
||||||
|
loss_pos = - torch.log(conf[pos_mask]) |
||||||
|
loss_neg = - torch.log(1 - conf[neg_mask]) |
||||||
|
if weight is not None: |
||||||
|
loss_pos = loss_pos * weight[pos_mask] |
||||||
|
loss_neg = loss_neg * weight[neg_mask] |
||||||
|
return c_pos_w * loss_pos.mean() + c_neg_w * loss_neg.mean() |
||||||
|
elif self.loss_config['coarse_type'] == 'focal': |
||||||
|
conf = torch.clamp(conf, 1e-6, 1-1e-6) |
||||||
|
alpha = self.loss_config['focal_alpha'] |
||||||
|
gamma = self.loss_config['focal_gamma'] |
||||||
|
|
||||||
|
if self.sparse_spvs: |
||||||
|
pos_conf = conf[:, :-1, :-1][pos_mask] \ |
||||||
|
if self.match_type == 'sinkhorn' \ |
||||||
|
else conf[pos_mask] |
||||||
|
loss_pos = - alpha * torch.pow(1 - pos_conf, gamma) * pos_conf.log() |
||||||
|
# calculate losses for negative samples |
||||||
|
if self.match_type == 'sinkhorn': |
||||||
|
neg0, neg1 = conf_gt.sum(-1) == 0, conf_gt.sum(1) == 0 |
||||||
|
neg_conf = torch.cat([conf[:, :-1, -1][neg0], conf[:, -1, :-1][neg1]], 0) |
||||||
|
loss_neg = - alpha * torch.pow(1 - neg_conf, gamma) * neg_conf.log() |
||||||
|
else: |
||||||
|
# These is no dustbin for dual_softmax, so we left unmatchable patches without supervision. |
||||||
|
# we could also add 'pseudo negtive-samples' |
||||||
|
pass |
||||||
|
# handle loss weights |
||||||
|
if weight is not None: |
||||||
|
# Different from dense-spvs, the loss w.r.t. padded regions aren't directly zeroed out, |
||||||
|
# but only through manually setting corresponding regions in sim_matrix to '-inf'. |
||||||
|
loss_pos = loss_pos * weight[pos_mask] |
||||||
|
if self.match_type == 'sinkhorn': |
||||||
|
neg_w0 = (weight.sum(-1) != 0)[neg0] |
||||||
|
neg_w1 = (weight.sum(1) != 0)[neg1] |
||||||
|
neg_mask = torch.cat([neg_w0, neg_w1], 0) |
||||||
|
loss_neg = loss_neg[neg_mask] |
||||||
|
|
||||||
|
loss = c_pos_w * loss_pos.mean() + c_neg_w * loss_neg.mean() \ |
||||||
|
if self.match_type == 'sinkhorn' \ |
||||||
|
else c_pos_w * loss_pos.mean() |
||||||
|
return loss |
||||||
|
# positive and negative elements occupy similar propotions. => more balanced loss weights needed |
||||||
|
else: # dense supervision (in the case of match_type=='sinkhorn', the dustbin is not supervised.) |
||||||
|
loss_pos = - alpha * torch.pow(1 - conf[pos_mask], gamma) * (conf[pos_mask]).log() |
||||||
|
loss_neg = - alpha * torch.pow(conf[neg_mask], gamma) * (1 - conf[neg_mask]).log() |
||||||
|
if weight is not None: |
||||||
|
loss_pos = loss_pos * weight[pos_mask] |
||||||
|
loss_neg = loss_neg * weight[neg_mask] |
||||||
|
return c_pos_w * loss_pos.mean() + c_neg_w * loss_neg.mean() |
||||||
|
# each negative element occupy a smaller propotion than positive elements. => higher negative loss weight needed |
||||||
|
else: |
||||||
|
raise ValueError('Unknown coarse loss: {type}'.format(type=self.loss_config['coarse_type'])) |
||||||
|
|
||||||
|
def compute_fine_loss(self, expec_f, expec_f_gt): |
||||||
|
if self.fine_type == 'l2_with_std': |
||||||
|
return self._compute_fine_loss_l2_std(expec_f, expec_f_gt) |
||||||
|
elif self.fine_type == 'l2': |
||||||
|
return self._compute_fine_loss_l2(expec_f, expec_f_gt) |
||||||
|
else: |
||||||
|
raise NotImplementedError() |
||||||
|
|
||||||
|
def _compute_fine_loss_l2(self, expec_f, expec_f_gt): |
||||||
|
""" |
||||||
|
Args: |
||||||
|
expec_f (torch.Tensor): [M, 2] <x, y> |
||||||
|
expec_f_gt (torch.Tensor): [M, 2] <x, y> |
||||||
|
""" |
||||||
|
correct_mask = torch.linalg.norm(expec_f_gt, ord=float('inf'), dim=1) < self.correct_thr |
||||||
|
if correct_mask.sum() == 0: |
||||||
|
if self.training: # this seldomly happen when training, since we pad prediction with gt |
||||||
|
logger.warning("assign a false supervision to avoid ddp deadlock") |
||||||
|
correct_mask[0] = True |
||||||
|
else: |
||||||
|
return None |
||||||
|
offset_l2 = ((expec_f_gt[correct_mask] - expec_f[correct_mask]) ** 2).sum(-1) |
||||||
|
return offset_l2.mean() |
||||||
|
|
||||||
|
def _compute_fine_loss_l2_std(self, expec_f, expec_f_gt): |
||||||
|
""" |
||||||
|
Args: |
||||||
|
expec_f (torch.Tensor): [M, 3] <x, y, std> |
||||||
|
expec_f_gt (torch.Tensor): [M, 2] <x, y> |
||||||
|
""" |
||||||
|
# correct_mask tells you which pair to compute fine-loss |
||||||
|
correct_mask = torch.linalg.norm(expec_f_gt, ord=float('inf'), dim=1) < self.correct_thr |
||||||
|
|
||||||
|
# use std as weight that measures uncertainty |
||||||
|
std = expec_f[:, 2] |
||||||
|
inverse_std = 1. / torch.clamp(std, min=1e-10) |
||||||
|
weight = (inverse_std / torch.mean(inverse_std)).detach() # avoid minizing loss through increase std |
||||||
|
|
||||||
|
# corner case: no correct coarse match found |
||||||
|
if not correct_mask.any(): |
||||||
|
if self.training: # this seldomly happen during training, since we pad prediction with gt |
||||||
|
# sometimes there is not coarse-level gt at all. |
||||||
|
logger.warning("assign a false supervision to avoid ddp deadlock") |
||||||
|
correct_mask[0] = True |
||||||
|
weight[0] = 0. |
||||||
|
else: |
||||||
|
return None |
||||||
|
|
||||||
|
# l2 loss with std |
||||||
|
offset_l2 = ((expec_f_gt[correct_mask] - expec_f[correct_mask, :2]) ** 2).sum(-1) |
||||||
|
loss = (offset_l2 * weight[correct_mask]).mean() |
||||||
|
|
||||||
|
return loss |
||||||
|
|
||||||
|
@torch.no_grad() |
||||||
|
def compute_c_weight(self, data): |
||||||
|
""" compute element-wise weights for computing coarse-level loss. """ |
||||||
|
if 'mask0' in data: |
||||||
|
c_weight = (data['mask0'].flatten(-2)[..., None] * data['mask1'].flatten(-2)[:, None]).float() |
||||||
|
else: |
||||||
|
c_weight = None |
||||||
|
return c_weight |
||||||
|
|
||||||
|
def forward(self, data): |
||||||
|
""" |
||||||
|
Update: |
||||||
|
data (dict): update{ |
||||||
|
'loss': [1] the reduced loss across a batch, |
||||||
|
'loss_scalars' (dict): loss scalars for tensorboard_record |
||||||
|
} |
||||||
|
""" |
||||||
|
loss_scalars = {} |
||||||
|
# 0. compute element-wise loss weight |
||||||
|
c_weight = self.compute_c_weight(data) |
||||||
|
|
||||||
|
# 1. coarse-level loss |
||||||
|
loss_c = self.compute_coarse_loss( |
||||||
|
data['conf_matrix_with_bin'] if self.sparse_spvs and self.match_type == 'sinkhorn' \ |
||||||
|
else data['conf_matrix'], |
||||||
|
data['conf_matrix_gt'], |
||||||
|
weight=c_weight) |
||||||
|
loss = loss_c * self.loss_config['coarse_weight'] |
||||||
|
loss_scalars.update({"loss_c": loss_c.clone().detach().cpu()}) |
||||||
|
|
||||||
|
# 2. fine-level loss |
||||||
|
loss_f = self.compute_fine_loss(data['expec_f'], data['expec_f_gt']) |
||||||
|
if loss_f is not None: |
||||||
|
loss += loss_f * self.loss_config['fine_weight'] |
||||||
|
loss_scalars.update({"loss_f": loss_f.clone().detach().cpu()}) |
||||||
|
else: |
||||||
|
assert self.training is False |
||||||
|
loss_scalars.update({'loss_f': torch.tensor(1.)}) # 1 is the upper bound |
||||||
|
|
||||||
|
loss_scalars.update({'loss': loss.clone().detach().cpu()}) |
||||||
|
data.update({"loss": loss, "loss_scalars": loss_scalars}) |
@ -0,0 +1,42 @@ |
|||||||
|
import torch |
||||||
|
from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, ExponentialLR |
||||||
|
|
||||||
|
|
||||||
|
def build_optimizer(model, config): |
||||||
|
name = config.TRAINER.OPTIMIZER |
||||||
|
lr = config.TRAINER.TRUE_LR |
||||||
|
|
||||||
|
if name == "adam": |
||||||
|
return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAM_DECAY) |
||||||
|
elif name == "adamw": |
||||||
|
return torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAMW_DECAY) |
||||||
|
else: |
||||||
|
raise ValueError(f"TRAINER.OPTIMIZER = {name} is not a valid optimizer!") |
||||||
|
|
||||||
|
|
||||||
|
def build_scheduler(config, optimizer): |
||||||
|
""" |
||||||
|
Returns: |
||||||
|
scheduler (dict):{ |
||||||
|
'scheduler': lr_scheduler, |
||||||
|
'interval': 'step', # or 'epoch' |
||||||
|
'monitor': 'val_f1', (optional) |
||||||
|
'frequency': x, (optional) |
||||||
|
} |
||||||
|
""" |
||||||
|
scheduler = {'interval': config.TRAINER.SCHEDULER_INTERVAL} |
||||||
|
name = config.TRAINER.SCHEDULER |
||||||
|
|
||||||
|
if name == 'MultiStepLR': |
||||||
|
scheduler.update( |
||||||
|
{'scheduler': MultiStepLR(optimizer, config.TRAINER.MSLR_MILESTONES, gamma=config.TRAINER.MSLR_GAMMA)}) |
||||||
|
elif name == 'CosineAnnealing': |
||||||
|
scheduler.update( |
||||||
|
{'scheduler': CosineAnnealingLR(optimizer, config.TRAINER.COSA_TMAX)}) |
||||||
|
elif name == 'ExponentialLR': |
||||||
|
scheduler.update( |
||||||
|
{'scheduler': ExponentialLR(optimizer, config.TRAINER.ELR_GAMMA)}) |
||||||
|
else: |
||||||
|
raise NotImplementedError() |
||||||
|
|
||||||
|
return scheduler |
@ -0,0 +1 @@ |
|||||||
|
Subproject commit c0626d58c843ee0464b0fa1dd4de4059bfae0ab4 |
@ -0,0 +1,120 @@ |
|||||||
|
import math |
||||||
|
import argparse |
||||||
|
import pprint |
||||||
|
from distutils.util import strtobool |
||||||
|
from pathlib import Path |
||||||
|
from loguru import logger as loguru_logger |
||||||
|
|
||||||
|
import pytorch_lightning as pl |
||||||
|
from pytorch_lightning.utilities import rank_zero_only |
||||||
|
from pytorch_lightning.loggers import TensorBoardLogger |
||||||
|
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor |
||||||
|
from pytorch_lightning.plugins import DDPPlugin |
||||||
|
|
||||||
|
from src.config.default import get_cfg_defaults |
||||||
|
from src.utils.misc import get_rank_zero_only_logger, setup_gpus |
||||||
|
from src.utils.profiler import build_profiler |
||||||
|
from src.lightning.data import MultiSceneDataModule |
||||||
|
from src.lightning.lightning_loftr import PL_LoFTR |
||||||
|
|
||||||
|
loguru_logger = get_rank_zero_only_logger(loguru_logger) |
||||||
|
|
||||||
|
|
||||||
|
def parse_args(): |
||||||
|
# init a costum parser which will be added into pl.Trainer parser |
||||||
|
# check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags |
||||||
|
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
||||||
|
parser.add_argument( |
||||||
|
parser.add_argument( |
||||||
|
parser.add_argument( |
||||||
|
'--exp_name', type=str, default='default_exp_name') |
||||||
|
parser.add_argument( |
||||||
|
'--batch_size', type=int, default=4, help='batch_size per gpu') |
||||||
|
parser.add_argument( |
||||||
|
'--num_workers', type=int, default=4) |
||||||
|
parser.add_argument( |
||||||
|
'--pin_memory', type=lambda x: bool(strtobool(x)), |
||||||
|
nargs='?', default=True, help='whether loading data to pinned memory or not') |
||||||
|
parser.add_argument( |
||||||
|
'--ckpt_path', type=str, default=None, |
||||||
|
parser.add_argument( |
||||||
|
'--disable_ckpt', action='store_true', |
||||||
|
help='disable checkpoint saving (useful for debugging).') |
||||||
|
parser.add_argument( |
||||||
|
'--profiler_name', type=str, default=None, |
||||||
|
help='options: [inference, pytorch], or leave it unset') |
||||||
|
parser.add_argument( |
||||||
|
'--parallel_load_data', action='store_true', |
||||||
|
help='load datasets in with multiple processes.') |
||||||
|
|
||||||
|
parser = pl.Trainer.add_argparse_args(parser) |
||||||
|
return parser.parse_args() |
||||||
|
|
||||||
|
|
||||||
|
def main(): |
||||||
|
# parse arguments |
||||||
|
args = parse_args() |
||||||
|
rank_zero_only(pprint.pprint)(vars(args)) |
||||||
|
|
||||||
|
# init default-cfg and merge it with the main- and data-cfg |
||||||
|
config = get_cfg_defaults() |
||||||
|
config.merge_from_file(args.main_cfg_path) |
||||||
|
config.merge_from_file(args.data_cfg_path) |
||||||
|
pl.seed_everything(config.TRAINER.SEED) # reproducibility |
||||||
|
# TODO: Use different seeds for each dataloader workers |
||||||
|
# This is needed for data augmentation |
||||||
|
|
||||||
|
# scale lr and warmup-step automatically |
||||||
|
args.gpus = _n_gpus = setup_gpus(args.gpus) |
||||||
|
config.TRAINER.WORLD_SIZE = _n_gpus * args.num_nodes |
||||||
|
config.TRAINER.TRUE_BATCH_SIZE = config.TRAINER.WORLD_SIZE * args.batch_size |
||||||
|
_scaling = config.TRAINER.TRUE_BATCH_SIZE / config.TRAINER.CANONICAL_BS |
||||||
|
config.TRAINER.SCALING = _scaling |
||||||
|
config.TRAINER.TRUE_LR = config.TRAINER.CANONICAL_LR * _scaling |
||||||
|
config.TRAINER.WARMUP_STEP = math.floor(config.TRAINER.WARMUP_STEP / _scaling) |
||||||
|
|
||||||
|
# lightning module |
||||||
|
profiler = build_profiler(args.profiler_name) |
||||||
|
model = PL_LoFTR(config, pretrained_ckpt=args.ckpt_path, profiler=profiler) |
||||||
|
loguru_logger.info(f"LoFTR LightningModule initialized!") |
||||||
|
|
||||||
|
# lightning data |
||||||
|
data_module = MultiSceneDataModule(args, config) |
||||||
|
loguru_logger.info(f"LoFTR DataModule initialized!") |
||||||
|
|
||||||
|
# TensorBoard Logger |
||||||
|
logger = TensorBoardLogger(save_dir='logs/tb_logs', name=args.exp_name, default_hp_metric=False) |
||||||
|
ckpt_dir = Path(logger.log_dir) / 'checkpoints' |
||||||
|
|
||||||
|
# Callbacks |
||||||
|
# TODO: update ModelCheckpoint to monitor multiple metrics |
||||||
|
ckpt_callback = ModelCheckpoint(monitor='auc@10', verbose=True, save_top_k=5, mode='max', |
||||||
|
save_last=True, |
||||||
|
dirpath=str(ckpt_dir), |
||||||
|
filename='{epoch}-{auc@5:.3f}-{auc@10:.3f}-{auc@20:.3f}') |
||||||
|
lr_monitor = LearningRateMonitor(logging_interval='step') |
||||||
|
callbacks = [lr_monitor] |
||||||
|
if not args.disable_ckpt: |
||||||
|
callbacks.append(ckpt_callback) |
||||||
|
|
||||||
|
# Lightning Trainer |
||||||
|
trainer = pl.Trainer.from_argparse_args( |
||||||
|
args, |
||||||
|
plugins=DDPPlugin(find_unused_parameters=False, |
||||||
|
num_nodes=args.num_nodes, |
||||||
|
sync_batchnorm=config.TRAINER.WORLD_SIZE > 0), |
||||||
|
gradient_clip_val=config.TRAINER.GRADIENT_CLIPPING, |
||||||
|
callbacks=callbacks, |
||||||
|
logger=logger, |
||||||
|
sync_batchnorm=config.TRAINER.WORLD_SIZE > 0, |
||||||
|
replace_sampler_ddp=False, # use custom sampler |
||||||
|
reload_dataloaders_every_epoch=False, # avoid repeated samples! |
||||||
|
weights_summary='full', |
||||||
|
profiler=profiler) |
||||||
|
loguru_logger.info(f"Trainer initialized!") |
||||||
|
loguru_logger.info(f"Start training!") |
||||||
|
trainer.fit(model, datamodule=data_module) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
main() |
Loading…
Reference in new issue