You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
322 lines
12 KiB
322 lines
12 KiB
#!/usr/bin/python3 |
|
|
|
# Copyright (c) ByteDance, Inc. and its affiliates. |
|
# All rights reserved. |
|
# |
|
# This source code is licensed under the license found in the |
|
# LICENSE file in the root directory of this source tree. |
|
|
|
import datetime |
|
import json |
|
import logging |
|
import os |
|
import time |
|
from collections import OrderedDict, defaultdict |
|
from functools import partial |
|
from pprint import pformat |
|
|
|
import numpy as np |
|
import torch |
|
import detectron2.utils.comm as comm |
|
from detectron2.checkpoint import DetectionCheckpointer |
|
from detectron2.config import get_cfg |
|
from detectron2.data import MetadataCatalog |
|
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch, PeriodicWriter |
|
from detectron2.evaluation import ( |
|
CityscapesInstanceEvaluator, |
|
CityscapesSemSegEvaluator, |
|
COCOEvaluator, |
|
COCOPanopticEvaluator, |
|
DatasetEvaluators, |
|
LVISEvaluator, |
|
PascalVOCDetectionEvaluator, |
|
SemSegEvaluator, |
|
verify_results, |
|
) |
|
from detectron2.layers import get_norm |
|
from detectron2.modeling import GeneralizedRCNNWithTTA |
|
from detectron2.modeling.roi_heads import ROI_HEADS_REGISTRY, Res5ROIHeads |
|
from detectron2.solver.build import maybe_add_gradient_clipping |
|
from detectron2.utils.events import EventWriter |
|
|
|
from lr_decay import get_default_optimizer_params, lr_factor_func |
|
|
|
|
|
# [modification] for better logging |
|
def _ex_repr(self): |
|
d = vars(self) |
|
ex = ', '.join(f'{k}={v}' for k, v in d.items() if not k.startswith('__') and k not in [ |
|
'trainer', 'before_train', 'after_train', 'before_step', 'after_step', 'state_dict', |
|
'_model', '_data_loader', 'logger', |
|
]) |
|
return f'{type(self).__name__}({ex})' |
|
hooks.HookBase.__repr__ = _ex_repr |
|
EventWriter.__repr__ = _ex_repr |
|
|
|
|
|
# [modification] add norm |
|
@ROI_HEADS_REGISTRY.register() |
|
class Res5ROIHeadsExtraNorm(Res5ROIHeads): |
|
""" |
|
As described in the MOCO paper, there is an extra BN layer |
|
following the res5 stage. |
|
""" |
|
|
|
def _build_res5_block(self, cfg): |
|
seq, out_channels = super()._build_res5_block(cfg) |
|
norm = cfg.MODEL.RESNETS.NORM |
|
norm = get_norm(norm, out_channels) |
|
seq.add_module("norm", norm) |
|
return seq, out_channels |
|
|
|
|
|
class Trainer(DefaultTrainer): |
|
""" |
|
We use the "DefaultTrainer" which contains pre-defined default logic for |
|
standard training workflow. They may not work for you, especially if you |
|
are working on a new research project. In that case you can write your |
|
own training loop. You can use "tools/plain_train_net.py" as an example. |
|
""" |
|
|
|
# [modification] override the `build_optimizer` for using Adam and layer-wise lr decay |
|
lr_decay_ratio: float = 1.0 |
|
@classmethod |
|
def build_optimizer(cls, cfg, model): |
|
is_resnet50 = int(cfg.MODEL.RESNETS.DEPTH) == 50 |
|
if comm.is_main_process(): |
|
dbg = defaultdict(list) |
|
for module_name, module in model.named_modules(): |
|
for module_param_name, value in module.named_parameters(recurse=False): |
|
if not value.requires_grad: |
|
continue |
|
lrf = lr_factor_func(f"{module_name}.{module_param_name}", is_resnet50=is_resnet50, dec=cls.lr_decay_ratio, debug=True) |
|
dbg[lrf].append(f"{module_name}.{module_param_name}") |
|
for k in sorted(dbg.keys()): |
|
print(f'[{k}] {sorted(dbg[k])}') |
|
print() |
|
|
|
params = get_default_optimizer_params( |
|
model, |
|
base_lr=cfg.SOLVER.BASE_LR, |
|
weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM, |
|
bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR, |
|
weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS, |
|
lr_factor_func=partial(lr_factor_func, is_resnet50=is_resnet50, dec=cls.lr_decay_ratio, debug=False) |
|
) |
|
|
|
opt_clz = { |
|
'sgd': partial(torch.optim.SGD, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV), |
|
'adamw': torch.optim.AdamW, |
|
'adam': torch.optim.AdamW, |
|
}[cfg.SOLVER.OPTIMIZER.lower()] |
|
return maybe_add_gradient_clipping(cfg, opt_clz)(params, lr=cfg.SOLVER.BASE_LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY) |
|
|
|
@classmethod |
|
def build_evaluator(cls, cfg, dataset_name, output_folder=None): |
|
return build_evaluator(cfg, dataset_name, output_folder) |
|
|
|
@classmethod |
|
def test_with_TTA(cls, cfg, model): |
|
logger = logging.getLogger("detectron2.trainer") |
|
# In the end of training, run an evaluation with TTA |
|
# Only support some R-CNN models. |
|
logger.info("Running inference with test-time augmentation ...") |
|
model = GeneralizedRCNNWithTTA(cfg, model) |
|
evaluators = [ |
|
cls.build_evaluator( |
|
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") |
|
) |
|
for name in cfg.DATASETS.TEST |
|
] |
|
res = cls.test(cfg, model, evaluators) |
|
res = OrderedDict({k + "_TTA": v for k, v in res.items()}) |
|
return res |
|
|
|
|
|
def setup(args): |
|
""" |
|
Create configs and perform basic setups. |
|
""" |
|
cfg = get_cfg() |
|
# [modification] we add these two new keys |
|
cfg.SOLVER.OPTIMIZER, cfg.SOLVER.LR_DECAY = 'sgd', 1.0 # by default using SGD and no lr_decay |
|
cfg.merge_from_file(args.config_file) |
|
cfg.merge_from_list(args.opts) |
|
cfg.freeze() |
|
default_setup(cfg, args) |
|
return cfg |
|
|
|
|
|
def main(args): |
|
cfg = setup(args) |
|
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) |
|
|
|
# [modification] for implementing lr decay and for logging |
|
Trainer.lr_decay_ratio = cfg.SOLVER.LR_DECAY |
|
|
|
if args.eval_only: |
|
model = Trainer.build_model(cfg) |
|
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( |
|
cfg.MODEL.WEIGHTS, resume=args.resume |
|
) |
|
res = Trainer.test(cfg, model) |
|
if cfg.TEST.AUG.ENABLED: |
|
res.update(Trainer.test_with_TTA(cfg, model)) |
|
if comm.is_main_process(): |
|
verify_results(cfg, res) |
|
return res |
|
|
|
# [modification] just skip some warnings |
|
import warnings |
|
comm.synchronize() |
|
warnings.filterwarnings('ignore', category=UserWarning) |
|
_ = np.arange(3, dtype=np.int).astype(np.bool) |
|
_ = np.array(torch.ones(3, dtype=torch.int32).numpy(), dtype=np.int) |
|
_ = np.array(torch.ones(3, dtype=torch.int64).numpy(), dtype=np.int) |
|
_ = np.array(torch.ones(3, dtype=torch.long).numpy(), dtype=np.int) |
|
_ = torch.rand(100) // 5 |
|
_ = torch.meshgrid(torch.ones(1)) |
|
warnings.resetwarnings() |
|
comm.synchronize() |
|
|
|
""" |
|
If you'd like to do anything fancier than the standard training logic, |
|
consider writing your own training loop (see plain_train_net.py) or |
|
subclassing the trainer. |
|
""" |
|
trainer = Trainer(cfg) |
|
trainer.resume_or_load(resume=args.resume) |
|
for h in trainer._hooks: |
|
if isinstance(h, PeriodicWriter): |
|
h._period = 1000 # [modification] less logging |
|
|
|
# [modification] we add some hooks for logging |
|
is_local_master = comm.get_rank() % args.num_gpus == 0 |
|
if comm.is_main_process(): |
|
print(f'[default hooks] {pformat(trainer._hooks, indent=2, width=300)}') |
|
ex_hooks = [ |
|
hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model)) if cfg.TEST.AUG.ENABLED else None, |
|
LogHook(cfg.TEST.EVAL_PERIOD, args.config_file, cfg.OUTPUT_DIR, is_local_master) if comm.is_main_process() else None, |
|
] |
|
trainer.register_hooks(ex_hooks) |
|
if comm.is_main_process(): |
|
print(f'[extra hooks] {pformat(ex_hooks, indent=2, width=300)}') |
|
|
|
return trainer.train() |
|
|
|
|
|
# [modification] we add a hook for logging results to `cfg.OUTPUT_DIR/d2_coco_log.txt` |
|
class LogHook(hooks.HookBase): |
|
def __init__(self, eval_period, config_file, output_dir, is_local_master): |
|
self.eval_period = eval_period |
|
self.log_period = eval_period // 4 |
|
self.log = {} |
|
|
|
self.is_master = comm.is_main_process() |
|
self.is_local_master = is_local_master |
|
|
|
self.config_file = config_file |
|
self.out_dir = output_dir |
|
self.log_txt_name = os.path.join(self.out_dir, 'd2_coco_log.txt') |
|
|
|
def __write_to_log_file(self, d): |
|
if self.is_local_master: |
|
self.log.update(d) |
|
with open(self.log_txt_name, 'w') as fp: |
|
json.dump(self.log, fp) |
|
fp.write('\n') |
|
|
|
def update_and_write_to_local_log(self): |
|
stat = self.trainer.storage.latest() |
|
self.log['boxAP'], self.log['bAP50'], self.log['bAP75'] = stat['bbox/AP'][0], stat['bbox/AP50'][0], stat['bbox/AP75'][0] |
|
self.log['mskAP'], self.log['mAP50'], self.log['mAP75'] = stat['segm/AP'][0], stat['segm/AP50'][0], stat['segm/AP75'][0] |
|
self.log['bAP-l'], self.log['bAP-m'], self.log['bAP-s'] = stat['bbox/APl'][0], stat['bbox/APm'][0], stat['bbox/APs'][0] |
|
self.log['mAP-l'], self.log['mAP-m'], self.log['mAP-s'] = stat['segm/APl'][0], stat['segm/APm'][0], stat['segm/APs'][0] |
|
all_ap = sorted([(v[0], k.split('AP-')[-1].strip()) for k, v in stat.items() if k.startswith('bbox/AP-')]) |
|
all_ap = [tu[1] for tu in all_ap] |
|
self.log['easy'] = ' | '.join(all_ap[-7:]) |
|
self.log['hard'] = ' | '.join(all_ap[:7]) |
|
for k in self.log.keys(): |
|
if 'AP' in k: |
|
self.log[k] = round(self.log[k], 3) |
|
self.__write_to_log_file({}) |
|
|
|
def after_step(self): |
|
next_iter = self.trainer.iter + 1 |
|
if self.eval_period > 0 and next_iter % self.eval_period == 0: |
|
self.update_and_write_to_local_log() |
|
|
|
if self.log_period > 0 and next_iter % self.log_period == 0: |
|
stat = self.trainer.storage.latest() |
|
remain_secs = round(stat['eta_seconds'][0]) |
|
d = { |
|
'cfg': self.config_file, |
|
'rema': str(datetime.timedelta(seconds=remain_secs)), 'fini': time.strftime("%m-%d %H:%M", time.localtime(time.time() + remain_secs)), |
|
'cur_iter': f'{next_iter}/{self.trainer.max_iter}', |
|
} |
|
self.__write_to_log_file(d) |
|
|
|
def after_train(self): |
|
self.update_and_write_to_local_log() |
|
last_boxAP, last_mskAP = round(self.log['boxAP'], 3), round(self.log['mskAP'], 3) |
|
self.__write_to_log_file({ |
|
'rema': '-', 'fini': time.strftime("%m-%d %H:%M", time.localtime(time.time() - 120)), |
|
'last_boxAP': last_boxAP, |
|
'last_mskAP': last_mskAP, |
|
}) |
|
time.sleep(5) |
|
if self.is_master: |
|
print(f'\n[finished] ========== last_boxAP={last_boxAP}, last_mskAP={last_mskAP} ==========\n') |
|
|
|
|
|
if __name__ == "__main__": |
|
args = default_argument_parser().parse_args() |
|
print("Command Line Args:", args) |
|
launch( |
|
main, |
|
args.num_gpus, |
|
num_machines=args.num_machines, |
|
machine_rank=args.machine_rank, |
|
dist_url=args.dist_url, |
|
args=(args,), |
|
) |
|
|
|
|
|
def build_evaluator(cfg, dataset_name, output_folder=None): |
|
""" |
|
Create evaluator(s) for a given dataset. |
|
This uses the special metadata "evaluator_type" associated with each builtin dataset. |
|
For your own dataset, you can simply create an evaluator manually in your |
|
script and do not have to worry about the hacky if-else logic here. |
|
""" |
|
if output_folder is None: |
|
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") |
|
evaluator_list = [] |
|
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type |
|
if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: |
|
evaluator_list.append( |
|
SemSegEvaluator( |
|
dataset_name, |
|
distributed=True, |
|
output_dir=output_folder, |
|
) |
|
) |
|
if evaluator_type in ["coco", "coco_panoptic_seg"]: |
|
evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) |
|
if evaluator_type == "coco_panoptic_seg": |
|
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) |
|
if evaluator_type == "cityscapes_instance": |
|
return CityscapesInstanceEvaluator(dataset_name) |
|
if evaluator_type == "cityscapes_sem_seg": |
|
return CityscapesSemSegEvaluator(dataset_name) |
|
elif evaluator_type == "pascal_voc": |
|
return PascalVOCDetectionEvaluator(dataset_name) |
|
elif evaluator_type == "lvis": |
|
return LVISEvaluator(dataset_name, output_dir=output_folder) |
|
if len(evaluator_list) == 0: |
|
raise NotImplementedError( |
|
"no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) |
|
) |
|
elif len(evaluator_list) == 1: |
|
return evaluator_list[0] |
|
return DatasetEvaluators(evaluator_list)
|
|
|