You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

151 lines
5.8 KiB

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle.utils import try_import
from paddlers.models.ppdet.core.workspace import register, serializable
from paddlers.models.ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
def print_prune_params(model):
model_dict = model.state_dict()
for key in model_dict.keys():
weight_name = model_dict[key].name
logger.info('Parameter name: {}, shape: {}'.format(
weight_name, model_dict[key].shape))
@register
@serializable
class Pruner(object):
def __init__(self,
criterion,
pruned_params,
pruned_ratios,
print_params=False):
super(Pruner, self).__init__()
assert criterion in ['l1_norm', 'fpgm'], \
"unsupported prune criterion: {}".format(criterion)
self.criterion = criterion
self.pruned_params = pruned_params
self.pruned_ratios = pruned_ratios
self.print_params = print_params
def __call__(self, model):
# FIXME: adapt to network graph when Training and inference are
# inconsistent, now only supports prune inference network graph.
model.eval()
paddleslim = try_import('paddleslim')
from paddleslim.analysis import dygraph_flops as flops
input_spec = [{
"image": paddle.ones(
shape=[1, 3, 640, 640], dtype='float32'),
"im_shape": paddle.full(
[1, 2], 640, dtype='float32'),
"scale_factor": paddle.ones(
shape=[1, 2], dtype='float32')
}]
if self.print_params:
print_prune_params(model)
ori_flops = flops(model, input_spec) / (1000**3)
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
if self.criterion == 'fpgm':
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
elif self.criterion == 'l1_norm':
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
logger.info("pruned params: {}".format(self.pruned_params))
pruned_ratios = [float(n) for n in self.pruned_ratios]
ratios = {}
for i, param in enumerate(self.pruned_params):
ratios[param] = pruned_ratios[i]
pruner.prune_vars(ratios, [0])
pruned_flops = flops(model, input_spec) / (1000**3)
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
return model
@register
@serializable
class PrunerQAT(object):
def __init__(self, criterion, pruned_params, pruned_ratios,
print_prune_params, quant_config, print_qat_model):
super(PrunerQAT, self).__init__()
assert criterion in ['l1_norm', 'fpgm'], \
"unsupported prune criterion: {}".format(criterion)
# Pruner hyperparameter
self.criterion = criterion
self.pruned_params = pruned_params
self.pruned_ratios = pruned_ratios
self.print_prune_params = print_prune_params
# QAT hyperparameter
self.quant_config = quant_config
self.print_qat_model = print_qat_model
def __call__(self, model):
# FIXME: adapt to network graph when Training and inference are
# inconsistent, now only supports prune inference network graph.
model.eval()
paddleslim = try_import('paddleslim')
from paddleslim.analysis import dygraph_flops as flops
input_spec = [{
"image": paddle.ones(
shape=[1, 3, 640, 640], dtype='float32'),
"im_shape": paddle.full(
[1, 2], 640, dtype='float32'),
"scale_factor": paddle.ones(
shape=[1, 2], dtype='float32')
}]
if self.print_prune_params:
print_prune_params(model)
ori_flops = flops(model, input_spec) / 1000
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
if self.criterion == 'fpgm':
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
elif self.criterion == 'l1_norm':
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
logger.info("pruned params: {}".format(self.pruned_params))
pruned_ratios = [float(n) for n in self.pruned_ratios]
ratios = {}
for i, param in enumerate(self.pruned_params):
ratios[param] = pruned_ratios[i]
pruner.prune_vars(ratios, [0])
pruned_flops = flops(model, input_spec) / 1000
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
self.quanter.quantize(model)
if self.print_qat_model:
logger.info("Quantized model:")
logger.info(model)
return model
def save_quantized_model(self, layer, path, input_spec=None, **config):
self.quanter.save_quantized_model(
model=layer, path=path, input_spec=input_spec, **config)