You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

170 lines
6.6 KiB

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import platform
import paddle
from ppcls.utils import logger
def retrieval_eval(engine, epoch_id=0):
engine.model.eval()
# step1. build gallery
if engine.gallery_query_dataloader is not None:
gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
engine, name='gallery_query')
query_feas, query_img_id, query_query_id = gallery_feas, gallery_img_id, gallery_unique_id
else:
gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
engine, name='gallery')
query_feas, query_img_id, query_query_id = cal_feature(
engine, name='query')
# step2. do evaluation
sim_block_size = engine.config["Global"].get("sim_block_size", 64)
sections = [sim_block_size] * (len(query_feas) // sim_block_size)
if len(query_feas) % sim_block_size:
sections.append(len(query_feas) % sim_block_size)
fea_blocks = paddle.split(query_feas, num_or_sections=sections)
if query_query_id is not None:
query_id_blocks = paddle.split(query_query_id, num_or_sections=sections)
image_id_blocks = paddle.split(query_img_id, num_or_sections=sections)
metric_key = None
if engine.eval_loss_func is None:
metric_dict = {metric_key: 0.}
else:
metric_dict = dict()
for block_idx, block_fea in enumerate(fea_blocks):
similarity_matrix = paddle.matmul(
block_fea, gallery_feas, transpose_y=True)
if query_query_id is not None:
query_id_block = query_id_blocks[block_idx]
query_id_mask = (query_id_block != gallery_unique_id.t())
image_id_block = image_id_blocks[block_idx]
image_id_mask = (image_id_block != gallery_img_id.t())
keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
similarity_matrix = similarity_matrix * keep_mask.astype(
"float32")
else:
keep_mask = None
metric_tmp = engine.eval_metric_func(similarity_matrix,
image_id_blocks[block_idx],
gallery_img_id, keep_mask)
for key in metric_tmp:
if key not in metric_dict:
metric_dict[key] = metric_tmp[key] * block_fea.shape[
0] / len(query_feas)
else:
metric_dict[key] += metric_tmp[key] * block_fea.shape[
0] / len(query_feas)
metric_info_list = []
for key in metric_dict:
if metric_key is None:
metric_key = key
metric_info_list.append("{}: {:.5f}".format(key, metric_dict[key]))
metric_msg = ", ".join(metric_info_list)
logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))
return metric_dict[metric_key]
def cal_feature(engine, name='gallery'):
all_feas = None
all_image_id = None
all_unique_id = None
has_unique_id = False
if name == 'gallery':
dataloader = engine.gallery_dataloader
elif name == 'query':
dataloader = engine.query_dataloader
elif name == 'gallery_query':
dataloader = engine.gallery_query_dataloader
else:
raise RuntimeError("Only support gallery or query dataset")
max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len(
dataloader)
for idx, batch in enumerate(dataloader): # load is very time-consuming
if idx >= max_iter:
break
if idx % engine.config["Global"]["print_batch_step"] == 0:
logger.info(
f"{name} feature calculation process: [{idx}/{len(dataloader)}]")
if engine.use_dali:
batch = [
paddle.to_tensor(batch[0]['data']),
paddle.to_tensor(batch[0]['label'])
]
batch = [paddle.to_tensor(x) for x in batch]
batch[1] = batch[1].reshape([-1, 1]).astype("int64")
if len(batch) == 3:
has_unique_id = True
batch[2] = batch[2].reshape([-1, 1]).astype("int64")
out = engine.model(batch[0], batch[1])
if "Student" in out:
out = out["Student"]
batch_feas = out["features"]
# do norm
if engine.config["Global"].get("feature_normalize", True):
feas_norm = paddle.sqrt(
paddle.sum(paddle.square(batch_feas), axis=1, keepdim=True))
batch_feas = paddle.divide(batch_feas, feas_norm)
# do binarize
if engine.config["Global"].get("feature_binarize") == "round":
batch_feas = paddle.round(batch_feas).astype("float32") * 2.0 - 1.0
if engine.config["Global"].get("feature_binarize") == "sign":
batch_feas = paddle.sign(batch_feas).astype("float32")
if all_feas is None:
all_feas = batch_feas
if has_unique_id:
all_unique_id = batch[2]
all_image_id = batch[1]
else:
all_feas = paddle.concat([all_feas, batch_feas])
all_image_id = paddle.concat([all_image_id, batch[1]])
if has_unique_id:
all_unique_id = paddle.concat([all_unique_id, batch[2]])
if engine.use_dali:
dataloader.reset()
if paddle.distributed.get_world_size() > 1:
feat_list = []
img_id_list = []
unique_id_list = []
paddle.distributed.all_gather(feat_list, all_feas)
paddle.distributed.all_gather(img_id_list, all_image_id)
all_feas = paddle.concat(feat_list, axis=0)
all_image_id = paddle.concat(img_id_list, axis=0)
if has_unique_id:
paddle.distributed.all_gather(unique_id_list, all_unique_id)
all_unique_id = paddle.concat(unique_id_list, axis=0)
logger.info("Build {} done, all feat shape: {}, begin to eval..".format(
name, all_feas.shape))
return all_feas, all_image_id, all_unique_id