PaddleRS/paddlers/deploy/predictor.py

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os.path as osp
from operator import itemgetter

import numpy as np
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from paddle.inference import PrecisionType

from paddlers.tasks import load_model
from paddlers.utils import logging, Timer


class Predictor(object):
    def __init__(self,
                 model_dir,
                 use_gpu=False,
                 gpu_id=0,
                 cpu_thread_num=1,
                 use_mkl=True,
                 mkl_thread_num=4,
                 use_trt=False,
                 use_glog=False,
                 memory_optimize=True,
                 max_trt_batch_size=1,
                 trt_precision_mode='float32'):
        """ 
        创建Paddle Predictor

        Args:
            model_dir: 模型路径（必须是导出的部署或量化模型）。
            use_gpu: 是否使用GPU，默认为False。
            gpu_id: 使用GPU的ID，默认为0。
            cpu_thread_num：使用cpu进行预测时的线程数，默认为1。
            use_mkl: 是否使用mkldnn计算库，CPU情况下使用，默认为False。
            mkl_thread_num: mkldnn计算线程数，默认为4。
            use_trt: 是否使用TensorRT，默认为False。
            use_glog: 是否启用glog日志, 默认为False。
            memory_optimize: 是否启动内存优化，默认为True。
            max_trt_batch_size: 在使用TensorRT时配置的最大batch size，默认为1。
            trt_precision_mode：在使用TensorRT时采用的精度，可选值['float32', 'float16']。默认为'float32'。
        """

        self.model_dir = model_dir
        self._model = load_model(model_dir, with_net=False)

        if trt_precision_mode.lower() == 'float32':
            trt_precision_mode = PrecisionType.Float32
        elif trt_precision_mode.lower() == 'float16':
            trt_precision_mode = PrecisionType.Float16
        else:
            logging.error(
                "TensorRT precision mode {} is invalid. Supported modes are float32 and float16."
                .format(trt_precision_mode),
                exit=True)

        self.predictor = self.create_predictor(
            use_gpu=use_gpu,
            gpu_id=gpu_id,
            cpu_thread_num=cpu_thread_num,
            use_mkl=use_mkl,
            mkl_thread_num=mkl_thread_num,
            use_trt=use_trt,
            use_glog=use_glog,
            memory_optimize=memory_optimize,
            max_trt_batch_size=max_trt_batch_size,
            trt_precision_mode=trt_precision_mode)
        self.timer = Timer()

    def create_predictor(self,
                         use_gpu=True,
                         gpu_id=0,
                         cpu_thread_num=1,
                         use_mkl=True,
                         mkl_thread_num=4,
                         use_trt=False,
                         use_glog=False,
                         memory_optimize=True,
                         max_trt_batch_size=1,
                         trt_precision_mode=PrecisionType.Float32):
        config = Config(
            osp.join(self.model_dir, 'model.pdmodel'),
            osp.join(self.model_dir, 'model.pdiparams'))

        if use_gpu:
            # 设置GPU初始显存(单位M)和Device ID
            config.enable_use_gpu(200, gpu_id)
            config.switch_ir_optim(True)
            if use_trt:
                if self._model.model_type == 'segmenter':
                    logging.warning(
                        "Semantic segmentation models do not support TensorRT acceleration, "
                        "TensorRT is forcibly disabled.")
                elif 'RCNN' in self._model.__class__.__name__:
                    logging.warning(
                        "RCNN models do not support TensorRT acceleration, "
                        "TensorRT is forcibly disabled.")
                else:
                    config.enable_tensorrt_engine(
                        workspace_size=1 << 10,
                        max_batch_size=max_trt_batch_size,
                        min_subgraph_size=3,
                        precision_mode=trt_precision_mode,
                        use_static=False,
                        use_calib_mode=False)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_thread_num)
            if use_mkl:
                if self._model.__class__.__name__ == 'MaskRCNN':
                    logging.warning(
                        "MaskRCNN does not support MKL-DNN, MKL-DNN is forcibly disabled"
                    )
                else:
                    try:
                        # cache 10 different shapes for mkldnn to avoid memory leak
                        config.set_mkldnn_cache_capacity(10)
                        config.enable_mkldnn()
                        config.set_cpu_math_library_num_threads(mkl_thread_num)
                    except Exception as e:
                        logging.warning(
                            "The current environment does not support MKL-DNN, MKL-DNN is disabled."
                        )
                        pass

        if not use_glog:
            config.disable_glog_info()
        if memory_optimize:
            config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)
        return predictor

    def preprocess(self, images, transforms):
        preprocessed_samples = self._model._preprocess(
            images, transforms, to_tensor=False)
        if self._model.model_type == 'classifier':
            preprocessed_samples = {'image': preprocessed_samples[0]}
        elif self._model.model_type == 'segmenter':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'ori_shape': preprocessed_samples[1]
            }
        elif self._model.model_type == 'detector':
            pass
        elif self._model.model_type == 'changedetector':
            preprocessed_samples = {
                'image': preprocessed_samples[0],
                'image2': preprocessed_samples[1],
                'ori_shape': preprocessed_samples[2]
            }
        else:
            logging.error(
                "Invalid model type {}".format(self._model.model_type),
                exit=True)
        return preprocessed_samples

    def postprocess(self, net_outputs, topk=1, ori_shape=None, transforms=None):
        if self._model.model_type == 'classifier':
            true_topk = min(self._model.num_classes, topk)
            if self._model._postprocess is None:
                self._model.build_postprocess_from_labels(topk)
            # XXX: Convert ndarray to tensor as self._model._postprocess requires
            net_outputs = paddle.to_tensor(net_outputs)
            assert net_outputs.shape[1] == 1
            outputs = self._model._postprocess(net_outputs.squeeze(1))
            class_ids = map(itemgetter('class_ids'), outputs)
            scores = map(itemgetter('scores'), outputs)
            label_names = map(itemgetter('label_names'), outputs)
            preds = [{
                'class_ids_map': l,
                'scores_map': s,
                'label_names_map': n,
            } for l, s, n in zip(class_ids, scores, label_names)]
        elif self._model.model_type in ('segmenter', 'changedetector'):
            label_map, score_map = self._model._postprocess(
                net_outputs,
                batch_origin_shape=ori_shape,
                transforms=transforms.transforms)
            preds = [{
                'label_map': l,
                'score_map': s
            } for l, s in zip(label_map, score_map)]
        elif self._model.model_type == 'detector':
            net_outputs = {
                k: v
                for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
            }
            preds = self._model._postprocess(net_outputs)
        else:
            logging.error(
                "Invalid model type {}.".format(self._model.model_type),
                exit=True)

        return preds

    def raw_predict(self, inputs):
        """ 接受预处理过后的数据进行预测
            Args:
                inputs(dict): 预处理过后的数据
        """
        input_names = self.predictor.get_input_names()
        for name in input_names:
            input_tensor = self.predictor.get_input_handle(name)
            input_tensor.copy_from_cpu(inputs[name])

        self.predictor.run()
        output_names = self.predictor.get_output_names()
        net_outputs = list()
        for name in output_names:
            output_tensor = self.predictor.get_output_handle(name)
            net_outputs.append(output_tensor.copy_to_cpu())

        return net_outputs

    def _run(self, images, topk=1, transforms=None):
        self.timer.preprocess_time_s.start()
        preprocessed_input = self.preprocess(images, transforms)
        self.timer.preprocess_time_s.end(iter_num=len(images))

        self.timer.inference_time_s.start()
        net_outputs = self.raw_predict(preprocessed_input)
        self.timer.inference_time_s.end(iter_num=1)

        self.timer.postprocess_time_s.start()
        results = self.postprocess(
            net_outputs,
            topk,
            ori_shape=preprocessed_input.get('ori_shape', None),
            transforms=transforms)
        self.timer.postprocess_time_s.end(iter_num=len(images))

        return results

    def predict(self,
                img_file,
                topk=1,
                transforms=None,
                warmup_iters=0,
                repeats=1):
        """ 图片预测
            Args:
                img_file(List[str or tuple or np.ndarray], str, tuple, or np.ndarray):
                    对于场景分类、图像复原、目标检测和语义分割任务来说，该参数可为单一图像路径，或是解码后的、排列格式为（H, W, C）
                    且具有float32类型的BGR图像（表示为numpy的ndarray形式），或者是一组图像路径或np.ndarray对象构成的列表；对于变化检测
                    任务来说，该参数可以为图像路径二元组（分别表示前后两个时相影像路径），或是两幅图像组成的二元组，或者是上述两种二元组
                    之一构成的列表。
                topk(int): 场景分类模型预测时使用，表示预测前topk的结果。默认值为1。
                transforms (paddlers.transforms): 数据预处理操作。默认值为None, 即使用`model.yml`中保存的数据预处理操作。
                warmup_iters (int): 预热轮数，用于评估模型推理以及前后处理速度。若大于1，会预先重复预测warmup_iters，而后才开始正式的预测及其速度评估。默认为0。
                repeats (int): 重复次数，用于评估模型推理以及前后处理速度。若大于1，会预测repeats次取时间平均值。默认值为1。
        """
        if repeats < 1:
            logging.error("`repeats` must be greater than 1.", exit=True)
        if transforms is None and not hasattr(self._model, 'test_transforms'):
            raise Exception("Transforms need to be defined, now is None.")
        if transforms is None:
            transforms = self._model.test_transforms
        if isinstance(img_file, tuple) and len(img_file) != 2:
            raise ValueError(
                f"A change detection model accepts exactly two input images, but there are {len(img_file)}."
            )
        if isinstance(img_file, (str, np.ndarray, tuple)):
            images = [img_file]
        else:
            images = img_file

        for _ in range(warmup_iters):
            self._run(images=images, topk=topk, transforms=transforms)
        self.timer.reset()

        for _ in range(repeats):
            results = self._run(images=images, topk=topk, transforms=transforms)

        self.timer.repeats = repeats
        self.timer.img_num = len(images)
        self.timer.info(average=True)

        if isinstance(img_file, (str, np.ndarray, tuple)):
            results = results[0]

        return results

    def batch_predict(self, image_list, **params):
        return self.predict(img_file=image_list, **params)
[Feature] Add deployment code and docs (#43) 3 years ago			`# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`import os.path as osp`
[Fix] Fix classification deploy code (#105) 2 years ago			`from operator import itemgetter`
[Feature] Add deployment code and docs (#43) 3 years ago
			`import numpy as np`
[Fix] Fix classification deploy code (#105) 2 years ago			`import paddle`
[Feature] Add deployment code and docs (#43) 3 years ago			`from paddle.inference import Config`
			`from paddle.inference import create_predictor`
			`from paddle.inference import PrecisionType`
add license 3 years ago
[Feature] Add deployment code and docs (#43) 3 years ago			`from paddlers.tasks import load_model`
			`from paddlers.utils import logging, Timer`


			`class Predictor(object):`
			`def __init__(self,`
			`model_dir,`
			`use_gpu=False,`
			`gpu_id=0,`
			`cpu_thread_num=1,`
			`use_mkl=True,`
			`mkl_thread_num=4,`
			`use_trt=False,`
			`use_glog=False,`
			`memory_optimize=True,`
			`max_trt_batch_size=1,`
			`trt_precision_mode='float32'):`
			`"""`
			`创建Paddle Predictor`

			`Args:`
			`model_dir: 模型路径（必须是导出的部署或量化模型）。`
			`use_gpu: 是否使用GPU，默认为False。`
			`gpu_id: 使用GPU的ID，默认为0。`
			`cpu_thread_num：使用cpu进行预测时的线程数，默认为1。`
			`use_mkl: 是否使用mkldnn计算库，CPU情况下使用，默认为False。`
			`mkl_thread_num: mkldnn计算线程数，默认为4。`
			`use_trt: 是否使用TensorRT，默认为False。`
			`use_glog: 是否启用glog日志, 默认为False。`
			`memory_optimize: 是否启动内存优化，默认为True。`
			`max_trt_batch_size: 在使用TensorRT时配置的最大batch size，默认为1。`
			`trt_precision_mode：在使用TensorRT时采用的精度，可选值['float32', 'float16']。默认为'float32'。`
			`"""`

			`self.model_dir = model_dir`
			`self._model = load_model(model_dir, with_net=False)`

			`if trt_precision_mode.lower() == 'float32':`
			`trt_precision_mode = PrecisionType.Float32`
			`elif trt_precision_mode.lower() == 'float16':`
			`trt_precision_mode = PrecisionType.Float16`
			`else:`
			`logging.error(`
			`"TensorRT precision mode {} is invalid. Supported modes are float32 and float16."`
			`.format(trt_precision_mode),`
			`exit=True)`

			`self.predictor = self.create_predictor(`
			`use_gpu=use_gpu,`
			`gpu_id=gpu_id,`
			`cpu_thread_num=cpu_thread_num,`
			`use_mkl=use_mkl,`
			`mkl_thread_num=mkl_thread_num,`
			`use_trt=use_trt,`
			`use_glog=use_glog,`
			`memory_optimize=memory_optimize,`
			`max_trt_batch_size=max_trt_batch_size,`
			`trt_precision_mode=trt_precision_mode)`
			`self.timer = Timer()`

			`def create_predictor(self,`
			`use_gpu=True,`
			`gpu_id=0,`
			`cpu_thread_num=1,`
			`use_mkl=True,`
			`mkl_thread_num=4,`
			`use_trt=False,`
			`use_glog=False,`
			`memory_optimize=True,`
			`max_trt_batch_size=1,`
			`trt_precision_mode=PrecisionType.Float32):`
			`config = Config(`
			`osp.join(self.model_dir, 'model.pdmodel'),`
			`osp.join(self.model_dir, 'model.pdiparams'))`

			`if use_gpu:`
			`# 设置GPU初始显存(单位M)和Device ID`
			`config.enable_use_gpu(200, gpu_id)`
			`config.switch_ir_optim(True)`
			`if use_trt:`
			`if self._model.model_type == 'segmenter':`
			`logging.warning(`
			`"Semantic segmentation models do not support TensorRT acceleration, "`
			`"TensorRT is forcibly disabled.")`
			`elif 'RCNN' in self._model.__class__.__name__:`
			`logging.warning(`
			`"RCNN models do not support TensorRT acceleration, "`
			`"TensorRT is forcibly disabled.")`
			`else:`
			`config.enable_tensorrt_engine(`
			`workspace_size=1 << 10,`
			`max_batch_size=max_trt_batch_size,`
			`min_subgraph_size=3,`
			`precision_mode=trt_precision_mode,`
			`use_static=False,`
			`use_calib_mode=False)`
			`else:`
			`config.disable_gpu()`
			`config.set_cpu_math_library_num_threads(cpu_thread_num)`
			`if use_mkl:`
			`if self._model.__class__.__name__ == 'MaskRCNN':`
			`logging.warning(`
			`"MaskRCNN does not support MKL-DNN, MKL-DNN is forcibly disabled"`
			`)`
			`else:`
			`try:`
			`# cache 10 different shapes for mkldnn to avoid memory leak`
			`config.set_mkldnn_cache_capacity(10)`
			`config.enable_mkldnn()`
			`config.set_cpu_math_library_num_threads(mkl_thread_num)`
			`except Exception as e:`
			`logging.warning(`
			`"The current environment does not support MKL-DNN, MKL-DNN is disabled."`
			`)`
			`pass`

			`if not use_glog:`
			`config.disable_glog_info()`
			`if memory_optimize:`
			`config.enable_memory_optim()`
			`config.switch_use_feed_fetch_ops(False)`
			`predictor = create_predictor(config)`
			`return predictor`

			`def preprocess(self, images, transforms):`
			`preprocessed_samples = self._model._preprocess(`
			`images, transforms, to_tensor=False)`
			`if self._model.model_type == 'classifier':`
			`preprocessed_samples = {'image': preprocessed_samples[0]}`
			`elif self._model.model_type == 'segmenter':`
			`preprocessed_samples = {`
			`'image': preprocessed_samples[0],`
			`'ori_shape': preprocessed_samples[1]`
			`}`
			`elif self._model.model_type == 'detector':`
			`pass`
			`elif self._model.model_type == 'changedetector':`
			`preprocessed_samples = {`
			`'image': preprocessed_samples[0],`
			`'image2': preprocessed_samples[1],`
			`'ori_shape': preprocessed_samples[2]`
			`}`
			`else:`
			`logging.error(`
			`"Invalid model type {}".format(self._model.model_type),`
			`exit=True)`
			`return preprocessed_samples`

			`def postprocess(self, net_outputs, topk=1, ori_shape=None, transforms=None):`
			`if self._model.model_type == 'classifier':`
			`true_topk = min(self._model.num_classes, topk)`
[Fix] Fix classification deploy code (#105) 2 years ago			`if self._model._postprocess is None:`
			`self._model.build_postprocess_from_labels(topk)`
			`# XXX: Convert ndarray to tensor as self._model._postprocess requires`
			`net_outputs = paddle.to_tensor(net_outputs)`
			`assert net_outputs.shape[1] == 1`
			`outputs = self._model._postprocess(net_outputs.squeeze(1))`
			`class_ids = map(itemgetter('class_ids'), outputs)`
			`scores = map(itemgetter('scores'), outputs)`
			`label_names = map(itemgetter('label_names'), outputs)`
			`preds = [{`
			`'class_ids_map': l,`
			`'scores_map': s,`
			`'label_names_map': n,`
			`} for l, s, n in zip(class_ids, scores, label_names)]`
[Feature] Add deployment code and docs (#43) 3 years ago			`elif self._model.model_type in ('segmenter', 'changedetector'):`
			`label_map, score_map = self._model._postprocess(`
			`net_outputs,`
			`batch_origin_shape=ori_shape,`
			`transforms=transforms.transforms)`
			`preds = [{`
			`'label_map': l,`
			`'score_map': s`
			`} for l, s in zip(label_map, score_map)]`
			`elif self._model.model_type == 'detector':`
			`net_outputs = {`
			`k: v`
			`for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)`
			`}`
			`preds = self._model._postprocess(net_outputs)`
			`else:`
			`logging.error(`
			`"Invalid model type {}.".format(self._model.model_type),`
			`exit=True)`

			`return preds`

			`def raw_predict(self, inputs):`
			`""" 接受预处理过后的数据进行预测`
			`Args:`
			`inputs(dict): 预处理过后的数据`
			`"""`
			`input_names = self.predictor.get_input_names()`
			`for name in input_names:`
			`input_tensor = self.predictor.get_input_handle(name)`
			`input_tensor.copy_from_cpu(inputs[name])`

			`self.predictor.run()`
			`output_names = self.predictor.get_output_names()`
			`net_outputs = list()`
			`for name in output_names:`
			`output_tensor = self.predictor.get_output_handle(name)`
			`net_outputs.append(output_tensor.copy_to_cpu())`

			`return net_outputs`

			`def _run(self, images, topk=1, transforms=None):`
			`self.timer.preprocess_time_s.start()`
			`preprocessed_input = self.preprocess(images, transforms)`
			`self.timer.preprocess_time_s.end(iter_num=len(images))`

			`self.timer.inference_time_s.start()`
			`net_outputs = self.raw_predict(preprocessed_input)`
			`self.timer.inference_time_s.end(iter_num=1)`

			`self.timer.postprocess_time_s.start()`
			`results = self.postprocess(`
			`net_outputs,`
			`topk,`
			`ori_shape=preprocessed_input.get('ori_shape', None),`
			`transforms=transforms)`
			`self.timer.postprocess_time_s.end(iter_num=len(images))`

			`return results`

			`def predict(self,`
			`img_file,`
			`topk=1,`
			`transforms=None,`
			`warmup_iters=0,`
			`repeats=1):`
			`""" 图片预测`
			`Args:`
[Docs] Update predictor.predict() doc string 3 years ago			`img_file(List[str or tuple or np.ndarray], str, tuple, or np.ndarray):`
[Feature] Add deployment code and docs (#43) 3 years ago			`对于场景分类、图像复原、目标检测和语义分割任务来说，该参数可为单一图像路径，或是解码后的、排列格式为（H, W, C）`
			`且具有float32类型的BGR图像（表示为numpy的ndarray形式），或者是一组图像路径或np.ndarray对象构成的列表；对于变化检测`
			`任务来说，该参数可以为图像路径二元组（分别表示前后两个时相影像路径），或是两幅图像组成的二元组，或者是上述两种二元组`
			`之一构成的列表。`
			`topk(int): 场景分类模型预测时使用，表示预测前topk的结果。默认值为1。`
Update predictor.py 3 years ago			transforms (paddlers.transforms): 数据预处理操作。默认值为None, 即使用`model.yml`中保存的数据预处理操作。
[Feature] Add deployment code and docs (#43) 3 years ago			`warmup_iters (int): 预热轮数，用于评估模型推理以及前后处理速度。若大于1，会预先重复预测warmup_iters，而后才开始正式的预测及其速度评估。默认为0。`
			`repeats (int): 重复次数，用于评估模型推理以及前后处理速度。若大于1，会预测repeats次取时间平均值。默认值为1。`
			`"""`
			`if repeats < 1:`
			logging.error("`repeats` must be greater than 1.", exit=True)
			`if transforms is None and not hasattr(self._model, 'test_transforms'):`
			`raise Exception("Transforms need to be defined, now is None.")`
			`if transforms is None:`
			`transforms = self._model.test_transforms`
			`if isinstance(img_file, tuple) and len(img_file) != 2:`
			`raise ValueError(`
			`f"A change detection model accepts exactly two input images, but there are {len(img_file)}."`
			`)`
			`if isinstance(img_file, (str, np.ndarray, tuple)):`
			`images = [img_file]`
			`else:`
			`images = img_file`

			`for _ in range(warmup_iters):`
			`self._run(images=images, topk=topk, transforms=transforms)`
			`self.timer.reset()`

			`for _ in range(repeats):`
			`results = self._run(images=images, topk=topk, transforms=transforms)`

			`self.timer.repeats = repeats`
			`self.timer.img_num = len(images)`
			`self.timer.info(average=True)`

[Fix] Fix predictor and imgdecoder 2 years ago			`if isinstance(img_file, (str, np.ndarray, tuple)):`
[Feature] Add deployment code and docs (#43) 3 years ago			`results = results[0]`

			`return results`

			`def batch_predict(self, image_list, **params):`
			`return self.predict(img_file=image_list, **params)`