PaddleRS/paddlers/models/ppcls/engine/evaluation/retrieval.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import platform
import paddle
from ppcls.utils import logger


def retrieval_eval(engine, epoch_id=0):
    engine.model.eval()
    # step1. build gallery
    if engine.gallery_query_dataloader is not None:
        gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
            engine, name='gallery_query')
        query_feas, query_img_id, query_query_id = gallery_feas, gallery_img_id, gallery_unique_id
    else:
        gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
            engine, name='gallery')
        query_feas, query_img_id, query_query_id = cal_feature(
            engine, name='query')

    # step2. do evaluation
    sim_block_size = engine.config["Global"].get("sim_block_size", 64)
    sections = [sim_block_size] * (len(query_feas) // sim_block_size)
    if len(query_feas) % sim_block_size:
        sections.append(len(query_feas) % sim_block_size)
    fea_blocks = paddle.split(query_feas, num_or_sections=sections)
    if query_query_id is not None:
        query_id_blocks = paddle.split(query_query_id, num_or_sections=sections)
    image_id_blocks = paddle.split(query_img_id, num_or_sections=sections)
    metric_key = None

    if engine.eval_loss_func is None:
        metric_dict = {metric_key: 0.}
    else:
        metric_dict = dict()
        for block_idx, block_fea in enumerate(fea_blocks):
            similarity_matrix = paddle.matmul(
                block_fea, gallery_feas, transpose_y=True)
            if query_query_id is not None:
                query_id_block = query_id_blocks[block_idx]
                query_id_mask = (query_id_block != gallery_unique_id.t())

                image_id_block = image_id_blocks[block_idx]
                image_id_mask = (image_id_block != gallery_img_id.t())

                keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
                similarity_matrix = similarity_matrix * keep_mask.astype(
                    "float32")
            else:
                keep_mask = None

            metric_tmp = engine.eval_metric_func(similarity_matrix,
                                                 image_id_blocks[block_idx],
                                                 gallery_img_id, keep_mask)

            for key in metric_tmp:
                if key not in metric_dict:
                    metric_dict[key] = metric_tmp[key] * block_fea.shape[
                        0] / len(query_feas)
                else:
                    metric_dict[key] += metric_tmp[key] * block_fea.shape[
                        0] / len(query_feas)

    metric_info_list = []
    for key in metric_dict:
        if metric_key is None:
            metric_key = key
        metric_info_list.append("{}: {:.5f}".format(key, metric_dict[key]))
    metric_msg = ", ".join(metric_info_list)
    logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))

    return metric_dict[metric_key]


def cal_feature(engine, name='gallery'):
    all_feas = None
    all_image_id = None
    all_unique_id = None
    has_unique_id = False

    if name == 'gallery':
        dataloader = engine.gallery_dataloader
    elif name == 'query':
        dataloader = engine.query_dataloader
    elif name == 'gallery_query':
        dataloader = engine.gallery_query_dataloader
    else:
        raise RuntimeError("Only support gallery or query dataset")

    max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len(
        dataloader)
    for idx, batch in enumerate(dataloader):  # load is very time-consuming
        if idx >= max_iter:
            break
        if idx % engine.config["Global"]["print_batch_step"] == 0:
            logger.info(
                f"{name} feature calculation process: [{idx}/{len(dataloader)}]")
        if engine.use_dali:
            batch = [
                paddle.to_tensor(batch[0]['data']),
                paddle.to_tensor(batch[0]['label'])
            ]
        batch = [paddle.to_tensor(x) for x in batch]
        batch[1] = batch[1].reshape([-1, 1]).astype("int64")
        if len(batch) == 3:
            has_unique_id = True
            batch[2] = batch[2].reshape([-1, 1]).astype("int64")
        out = engine.model(batch[0], batch[1])
        if "Student" in out:
            out = out["Student"]
        batch_feas = out["features"]

        # do norm
        if engine.config["Global"].get("feature_normalize", True):
            feas_norm = paddle.sqrt(
                paddle.sum(paddle.square(batch_feas), axis=1, keepdim=True))
            batch_feas = paddle.divide(batch_feas, feas_norm)

        # do binarize
        if engine.config["Global"].get("feature_binarize") == "round":
            batch_feas = paddle.round(batch_feas).astype("float32") * 2.0 - 1.0

        if engine.config["Global"].get("feature_binarize") == "sign":
            batch_feas = paddle.sign(batch_feas).astype("float32")

        if all_feas is None:
            all_feas = batch_feas
            if has_unique_id:
                all_unique_id = batch[2]
            all_image_id = batch[1]
        else:
            all_feas = paddle.concat([all_feas, batch_feas])
            all_image_id = paddle.concat([all_image_id, batch[1]])
            if has_unique_id:
                all_unique_id = paddle.concat([all_unique_id, batch[2]])

    if engine.use_dali:
        dataloader.reset()

    if paddle.distributed.get_world_size() > 1:
        feat_list = []
        img_id_list = []
        unique_id_list = []
        paddle.distributed.all_gather(feat_list, all_feas)
        paddle.distributed.all_gather(img_id_list, all_image_id)
        all_feas = paddle.concat(feat_list, axis=0)
        all_image_id = paddle.concat(img_id_list, axis=0)
        if has_unique_id:
            paddle.distributed.all_gather(unique_id_list, all_unique_id)
            all_unique_id = paddle.concat(unique_id_list, axis=0)

    logger.info("Build {} done, all feat shape: {}, begin to eval..".format(
        name, all_feas.shape))
    return all_feas, all_image_id, all_unique_id
[Feature] Init add clas task (need to fix) 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`import platform`
			`import paddle`
			`from ppcls.utils import logger`


			`def retrieval_eval(engine, epoch_id=0):`
			`engine.model.eval()`
			`# step1. build gallery`
			`if engine.gallery_query_dataloader is not None:`
			`gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(`
			`engine, name='gallery_query')`
			`query_feas, query_img_id, query_query_id = gallery_feas, gallery_img_id, gallery_unique_id`
			`else:`
			`gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(`
			`engine, name='gallery')`
			`query_feas, query_img_id, query_query_id = cal_feature(`
			`engine, name='query')`

			`# step2. do evaluation`
			`sim_block_size = engine.config["Global"].get("sim_block_size", 64)`
			`sections = [sim_block_size] * (len(query_feas) // sim_block_size)`
			`if len(query_feas) % sim_block_size:`
			`sections.append(len(query_feas) % sim_block_size)`
			`fea_blocks = paddle.split(query_feas, num_or_sections=sections)`
			`if query_query_id is not None:`
Reformat all codes (#38) 3 years ago			`query_id_blocks = paddle.split(query_query_id, num_or_sections=sections)`
[Feature] Init add clas task (need to fix) 3 years ago			`image_id_blocks = paddle.split(query_img_id, num_or_sections=sections)`
			`metric_key = None`

			`if engine.eval_loss_func is None:`
			`metric_dict = {metric_key: 0.}`
			`else:`
			`metric_dict = dict()`
			`for block_idx, block_fea in enumerate(fea_blocks):`
			`similarity_matrix = paddle.matmul(`
			`block_fea, gallery_feas, transpose_y=True)`
			`if query_query_id is not None:`
			`query_id_block = query_id_blocks[block_idx]`
			`query_id_mask = (query_id_block != gallery_unique_id.t())`

			`image_id_block = image_id_blocks[block_idx]`
			`image_id_mask = (image_id_block != gallery_img_id.t())`

			`keep_mask = paddle.logical_or(query_id_mask, image_id_mask)`
			`similarity_matrix = similarity_matrix * keep_mask.astype(`
			`"float32")`
			`else:`
			`keep_mask = None`

			`metric_tmp = engine.eval_metric_func(similarity_matrix,`
			`image_id_blocks[block_idx],`
			`gallery_img_id, keep_mask)`

			`for key in metric_tmp:`
			`if key not in metric_dict:`
			`metric_dict[key] = metric_tmp[key] * block_fea.shape[`
			`0] / len(query_feas)`
			`else:`
			`metric_dict[key] += metric_tmp[key] * block_fea.shape[`
			`0] / len(query_feas)`

			`metric_info_list = []`
			`for key in metric_dict:`
			`if metric_key is None:`
			`metric_key = key`
			`metric_info_list.append("{}: {:.5f}".format(key, metric_dict[key]))`
			`metric_msg = ", ".join(metric_info_list)`
			`logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))`

			`return metric_dict[metric_key]`


			`def cal_feature(engine, name='gallery'):`
			`all_feas = None`
			`all_image_id = None`
			`all_unique_id = None`
			`has_unique_id = False`

			`if name == 'gallery':`
			`dataloader = engine.gallery_dataloader`
			`elif name == 'query':`
			`dataloader = engine.query_dataloader`
			`elif name == 'gallery_query':`
			`dataloader = engine.gallery_query_dataloader`
			`else:`
			`raise RuntimeError("Only support gallery or query dataset")`

			`max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len(`
			`dataloader)`
			`for idx, batch in enumerate(dataloader): # load is very time-consuming`
			`if idx >= max_iter:`
			`break`
			`if idx % engine.config["Global"]["print_batch_step"] == 0:`
			`logger.info(`
Reformat all codes (#38) 3 years ago			`f"{name} feature calculation process: [{idx}/{len(dataloader)}]")`
[Feature] Init add clas task (need to fix) 3 years ago			`if engine.use_dali:`
			`batch = [`
			`paddle.to_tensor(batch[0]['data']),`
			`paddle.to_tensor(batch[0]['label'])`
			`]`
			`batch = [paddle.to_tensor(x) for x in batch]`
			`batch[1] = batch[1].reshape([-1, 1]).astype("int64")`
			`if len(batch) == 3:`
			`has_unique_id = True`
			`batch[2] = batch[2].reshape([-1, 1]).astype("int64")`
			`out = engine.model(batch[0], batch[1])`
			`if "Student" in out:`
			`out = out["Student"]`
			`batch_feas = out["features"]`

			`# do norm`
			`if engine.config["Global"].get("feature_normalize", True):`
			`feas_norm = paddle.sqrt(`
			`paddle.sum(paddle.square(batch_feas), axis=1, keepdim=True))`
			`batch_feas = paddle.divide(batch_feas, feas_norm)`

			`# do binarize`
			`if engine.config["Global"].get("feature_binarize") == "round":`
			`batch_feas = paddle.round(batch_feas).astype("float32") * 2.0 - 1.0`

			`if engine.config["Global"].get("feature_binarize") == "sign":`
			`batch_feas = paddle.sign(batch_feas).astype("float32")`

			`if all_feas is None:`
			`all_feas = batch_feas`
			`if has_unique_id:`
			`all_unique_id = batch[2]`
			`all_image_id = batch[1]`
			`else:`
			`all_feas = paddle.concat([all_feas, batch_feas])`
			`all_image_id = paddle.concat([all_image_id, batch[1]])`
			`if has_unique_id:`
			`all_unique_id = paddle.concat([all_unique_id, batch[2]])`

			`if engine.use_dali:`
			`dataloader.reset()`

			`if paddle.distributed.get_world_size() > 1:`
			`feat_list = []`
			`img_id_list = []`
			`unique_id_list = []`
			`paddle.distributed.all_gather(feat_list, all_feas)`
			`paddle.distributed.all_gather(img_id_list, all_image_id)`
			`all_feas = paddle.concat(feat_list, axis=0)`
			`all_image_id = paddle.concat(img_id_list, axis=0)`
			`if has_unique_id:`
			`paddle.distributed.all_gather(unique_id_list, all_unique_id)`
			`all_unique_id = paddle.concat(unique_id_list, axis=0)`

			`logger.info("Build {} done, all feat shape: {}, begin to eval..".format(`
			`name, all_feas.shape))`
			`return all_feas, all_image_id, all_unique_id`