You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
341 lines
12 KiB
341 lines
12 KiB
#!/usr/bin/env python |
|
|
|
import os |
|
import os.path as osp |
|
import argparse |
|
from operator import itemgetter |
|
|
|
import numpy as np |
|
import paddle |
|
from paddle.inference import Config |
|
from paddle.inference import create_predictor |
|
from paddle.inference import PrecisionType |
|
from paddlers.tasks import load_model |
|
from paddlers.utils import logging |
|
|
|
from config_utils import parse_configs |
|
|
|
|
|
class _bool(object): |
|
def __new__(cls, x): |
|
if isinstance(x, str): |
|
if x.lower() == 'false': |
|
return False |
|
elif x.lower() == 'true': |
|
return True |
|
return bool.__new__(x) |
|
|
|
|
|
class TIPCPredictor(object): |
|
def __init__(self, |
|
model_dir, |
|
device='cpu', |
|
gpu_id=0, |
|
cpu_thread_num=1, |
|
use_mkl=True, |
|
mkl_thread_num=4, |
|
use_trt=False, |
|
memory_optimize=True, |
|
trt_precision_mode='fp32', |
|
benchmark=False, |
|
model_name='', |
|
batch_size=1): |
|
self.model_dir = model_dir |
|
self._model = load_model(model_dir, with_net=False) |
|
|
|
if trt_precision_mode.lower() == 'fp32': |
|
trt_precision_mode = PrecisionType.Float32 |
|
elif trt_precision_mode.lower() == 'fp16': |
|
trt_precision_mode = PrecisionType.Float16 |
|
else: |
|
logging.error( |
|
"TensorRT precision mode {} is invalid. Supported modes are fp32 and fp16." |
|
.format(trt_precision_mode), |
|
exit=True) |
|
|
|
self.config = self.get_config( |
|
device=device, |
|
gpu_id=gpu_id, |
|
cpu_thread_num=cpu_thread_num, |
|
use_mkl=use_mkl, |
|
mkl_thread_num=mkl_thread_num, |
|
use_trt=use_trt, |
|
use_glog=False, |
|
memory_optimize=memory_optimize, |
|
max_trt_batch_size=1, |
|
trt_precision_mode=trt_precision_mode) |
|
self.predictor = create_predictor(self.config) |
|
|
|
self.batch_size = batch_size |
|
|
|
if benchmark: |
|
import auto_log |
|
pid = os.getpid() |
|
self.autolog = auto_log.AutoLogger( |
|
model_name=model_name, |
|
model_precision=trt_precision_mode, |
|
batch_size=batch_size, |
|
data_shape='dynamic', |
|
save_path=None, |
|
inference_config=self.config, |
|
pids=pid, |
|
process_name=None, |
|
gpu_ids=0, |
|
time_keys=[ |
|
'preprocess_time', 'inference_time', 'postprocess_time' |
|
], |
|
warmup=0, |
|
logger=logging) |
|
self.benchmark = benchmark |
|
|
|
def get_config(self, device, gpu_id, cpu_thread_num, use_mkl, |
|
mkl_thread_num, use_trt, use_glog, memory_optimize, |
|
max_trt_batch_size, trt_precision_mode): |
|
config = Config( |
|
osp.join(self.model_dir, 'model.pdmodel'), |
|
osp.join(self.model_dir, 'model.pdiparams')) |
|
|
|
if device == 'gpu': |
|
config.enable_use_gpu(200, gpu_id) |
|
config.switch_ir_optim(True) |
|
if use_trt: |
|
if self._model.model_type == 'segmenter': |
|
logging.warning( |
|
"Semantic segmentation models do not support TensorRT acceleration, " |
|
"TensorRT is forcibly disabled.") |
|
elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__: |
|
logging.warning( |
|
"RCNN models do not support TensorRT acceleration, " |
|
"TensorRT is forcibly disabled.") |
|
else: |
|
config.enable_tensorrt_engine( |
|
workspace_size=1 << 10, |
|
max_batch_size=max_trt_batch_size, |
|
min_subgraph_size=3, |
|
precision_mode=trt_precision_mode, |
|
use_static=False, |
|
use_calib_mode=False) |
|
else: |
|
config.disable_gpu() |
|
config.set_cpu_math_library_num_threads(cpu_thread_num) |
|
if use_mkl: |
|
if self._model.__class__.__name__ == 'MaskRCNN': |
|
logging.warning( |
|
"MaskRCNN does not support MKL-DNN, MKL-DNN is forcibly disabled" |
|
) |
|
else: |
|
try: |
|
# Cache 10 different shapes for mkldnn to avoid memory leak. |
|
config.set_mkldnn_cache_capacity(10) |
|
config.enable_mkldnn() |
|
config.set_cpu_math_library_num_threads(mkl_thread_num) |
|
except Exception as e: |
|
logging.warning( |
|
"The current environment does not support MKL-DNN, MKL-DNN is disabled." |
|
) |
|
pass |
|
|
|
if not use_glog: |
|
config.disable_glog_info() |
|
if memory_optimize: |
|
config.enable_memory_optim() |
|
config.switch_use_feed_fetch_ops(False) |
|
return config |
|
|
|
def preprocess(self, images, transforms): |
|
preprocessed_samples = self._model.preprocess( |
|
images, transforms, to_tensor=False) |
|
if self._model.model_type == 'classifier': |
|
preprocessed_samples = {'image': preprocessed_samples[0]} |
|
elif self._model.model_type == 'segmenter': |
|
preprocessed_samples = { |
|
'image': preprocessed_samples[0], |
|
'ori_shape': preprocessed_samples[1] |
|
} |
|
elif self._model.model_type == 'detector': |
|
pass |
|
elif self._model.model_type == 'change_detector': |
|
preprocessed_samples = { |
|
'image': preprocessed_samples[0], |
|
'image2': preprocessed_samples[1], |
|
'ori_shape': preprocessed_samples[2] |
|
} |
|
elif self._model.model_type == 'restorer': |
|
preprocessed_samples = { |
|
'image': preprocessed_samples[0], |
|
'tar_shape': preprocessed_samples[1] |
|
} |
|
else: |
|
logging.error( |
|
"Invalid model type {}".format(self._model.model_type), |
|
exit=True) |
|
return preprocessed_samples |
|
|
|
def postprocess(self, |
|
net_outputs, |
|
topk=1, |
|
ori_shape=None, |
|
tar_shape=None, |
|
transforms=None): |
|
if self._model.model_type == 'classifier': |
|
true_topk = min(self._model.num_classes, topk) |
|
if self._model.postprocess is None: |
|
self._model.build_postprocess_from_labels(topk) |
|
# XXX: Convert ndarray to tensor as self._model.postprocess requires |
|
assert len(net_outputs) == 1 |
|
net_outputs = paddle.to_tensor(net_outputs[0]) |
|
outputs = self._model.postprocess(net_outputs) |
|
class_ids = map(itemgetter('class_ids'), outputs) |
|
scores = map(itemgetter('scores'), outputs) |
|
label_names = map(itemgetter('label_names'), outputs) |
|
preds = [{ |
|
'class_ids_map': l, |
|
'scores_map': s, |
|
'label_names_map': n, |
|
} for l, s, n in zip(class_ids, scores, label_names)] |
|
elif self._model.model_type in ('segmenter', 'change_detector'): |
|
label_map, score_map = self._model.postprocess( |
|
net_outputs, |
|
batch_origin_shape=ori_shape, |
|
transforms=transforms.transforms) |
|
preds = [{ |
|
'label_map': l, |
|
'score_map': s |
|
} for l, s in zip(label_map, score_map)] |
|
elif self._model.model_type == 'detector': |
|
net_outputs = { |
|
k: v |
|
for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs) |
|
} |
|
preds = self._model.postprocess(net_outputs) |
|
elif self._model.model_type == 'restorer': |
|
res_maps = self._model.postprocess( |
|
net_outputs[0], |
|
batch_tar_shape=tar_shape, |
|
transforms=transforms.transforms) |
|
preds = [{'res_map': res_map} for res_map in res_maps] |
|
else: |
|
logging.error( |
|
"Invalid model type {}.".format(self._model.model_type), |
|
exit=True) |
|
|
|
return preds |
|
|
|
def _run(self, images, topk=1, transforms=None, time_it=False): |
|
if self.benchmark and time_it: |
|
self.autolog.times.start() |
|
|
|
preprocessed_input = self.preprocess(images, transforms) |
|
|
|
input_names = self.predictor.get_input_names() |
|
for name in input_names: |
|
input_tensor = self.predictor.get_input_handle(name) |
|
input_tensor.copy_from_cpu(preprocessed_input[name]) |
|
|
|
if self.benchmark and time_it: |
|
self.autolog.times.stamp() |
|
|
|
self.predictor.run() |
|
|
|
output_names = self.predictor.get_output_names() |
|
net_outputs = [] |
|
for name in output_names: |
|
output_tensor = self.predictor.get_output_handle(name) |
|
net_outputs.append(output_tensor.copy_to_cpu()) |
|
|
|
if self.benchmark and time_it: |
|
self.autolog.times.stamp() |
|
|
|
res = self.postprocess( |
|
net_outputs, |
|
topk, |
|
ori_shape=preprocessed_input.get('ori_shape', None), |
|
tar_shape=preprocessed_input.get('tar_shape', None), |
|
transforms=transforms) |
|
|
|
if self.benchmark and time_it: |
|
self.autolog.times.end(stamp=True) |
|
|
|
return res |
|
|
|
def predict(self, data_dir, file_list, topk=1, warmup_iters=5): |
|
transforms = self._model.test_transforms |
|
|
|
# Warm up |
|
iters = 0 |
|
while True: |
|
for images in self._parse_lines(data_dir, file_list): |
|
if iters >= warmup_iters: |
|
break |
|
self._run( |
|
images=images, |
|
topk=topk, |
|
transforms=transforms, |
|
time_it=False) |
|
iters += 1 |
|
else: |
|
continue |
|
break |
|
|
|
results = [] |
|
for images in self._parse_lines(data_dir, file_list): |
|
res = self._run( |
|
images=images, topk=topk, transforms=transforms, time_it=True) |
|
results.append(res) |
|
return results |
|
|
|
def _parse_lines(self, data_dir, file_list): |
|
with open(file_list, 'r') as f: |
|
batch = [] |
|
for line in f: |
|
items = line.strip().split() |
|
items = [osp.join(data_dir, item) for item in items] |
|
if self._model.model_type == 'change_detector': |
|
batch.append((items[0], items[1])) |
|
else: |
|
batch.append(items[0]) |
|
if len(batch) == self.batch_size: |
|
yield batch |
|
batch.clear() |
|
if 0 < len(batch) < self.batch_size: |
|
yield batch |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser() |
|
|
|
parser.add_argument('--config', type=str) |
|
parser.add_argument('--inherit_off', action='store_true') |
|
parser.add_argument('--model_dir', type=str, default='./') |
|
parser.add_argument( |
|
'--device', type=str, choices=['cpu', 'gpu'], default='cpu') |
|
parser.add_argument('--enable_mkldnn', type=_bool, default=False) |
|
parser.add_argument('--cpu_threads', type=int, default=10) |
|
parser.add_argument('--use_trt', type=_bool, default=False) |
|
parser.add_argument( |
|
'--precision', type=str, choices=['fp32', 'fp16'], default='fp16') |
|
parser.add_argument('--batch_size', type=int, default=1) |
|
parser.add_argument('--benchmark', type=_bool, default=False) |
|
parser.add_argument('--model_name', type=str, default='') |
|
|
|
args = parser.parse_args() |
|
|
|
cfg = parse_configs(args.config, not args.inherit_off) |
|
eval_dataset = cfg['datasets']['eval'] |
|
data_dir = eval_dataset.args['data_dir'] |
|
file_list = eval_dataset.args['file_list'] |
|
|
|
predictor = TIPCPredictor( |
|
args.model_dir, |
|
device=args.device, |
|
cpu_thread_num=args.cpu_threads, |
|
use_mkl=args.enable_mkldnn, |
|
mkl_thread_num=args.cpu_threads, |
|
use_trt=args.use_trt, |
|
trt_precision_mode=args.precision, |
|
benchmark=args.benchmark) |
|
|
|
predictor.predict(data_dir, file_list) |
|
|
|
if args.benchmark: |
|
predictor.autolog.report()
|
|
|