Merge branch 'det1' of https://github.com/juncaipeng/PaddleRS into det1

3 years ago · 806335ad29
parent dbc2490423 cb370b169b
commit 806335ad29
11 changed files with 358 additions and 16 deletions
--- a/paddlers/datasets/init.py
+++ b/paddlers/datasets/init.py
@ -1,2 +1,3 @@
 from .voc import VOCDetection
 from .seg_dataset import SegDataset
 from .raster import Raster
--- a/paddlers/datasets/raster.py
+++ b/paddlers/datasets/raster.py
@ -0,0 +1,139 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os.path as osp
 import numpy as np
 from typing import List, Tuple, Union
 from paddlers.utils import raster2uint8
 try:
    from osgeo import gdal
 except:
    import gdal
 class Raster:
    def __init__(self, 
                 path: str,
                 band_list: Union[List[int], Tuple[int], None]=None, 
                 to_uint8: bool=False) -> None:
        """ Class of read raster.
        Args:
            path (str): The path of raster.
            band_list (Union[List[int], Tuple[int], None], optional): 
                band list (start with 1) or None (all of bands). Defaults to None.
            to_uint8 (bool, optional): 
                Convert uint8 or return raw data. Defaults to False.
        """
        super(Raster, self).__init__()
        if osp.exists(path):
            self.path = path
            self.__src_data = np.load(path) if path.split(".")[-1] == "npy" \
                                            else gdal.Open(path)
            self.__getInfo()
            self.to_uint8 = to_uint8
            self.setBands(band_list)
        else:
            raise ValueError("The path {0} not exists.".format(path))
    def setBands(self,
                 band_list: Union[List[int], Tuple[int], None]) -> None:
        """ Set band of data.
        Args:
            band_list (Union[List[int], Tuple[int], None]): 
                band list (start with 1) or None (all of bands).
        """
        if band_list is not None:
            if len(band_list) > self.bands:
                raise ValueError("The lenght of band_list must be less than {0}.".format(str(self.bands)))
            if max(band_list) > self.bands or min(band_list) < 1:
                raise ValueError("The range of band_list must within [1, {0}].".format(str(self.bands)))
        self.band_list = band_list
    def getArray(self,
                 start_loc: Union[List[int], Tuple[int], None]=None, 
                 block_size: Union[List[int], Tuple[int]]=[512, 512]) -> np.ndarray:
        """ Get ndarray data 
        Args:
            start_loc (Union[List[int], Tuple[int], None], optional): 
                Coordinates of the upper left corner of the block, if None means return full image.
            block_size (Union[List[int], Tuple[int]], optional): 
                Block size. Defaults to [512, 512].
        Returns:
            np.ndarray: data's ndarray.
        """
        if start_loc is None:
            return self.__getAarray()
        else:
            return self.__getBlock(start_loc, block_size)
    def __getInfo(self) -> None:
        self.bands = self.__src_data.RasterCount
        self.width = self.__src_data.RasterXSize
        self.height = self.__src_data.RasterYSize
    def __getAarray(self, window: Union[None, List[int], Tuple[int]]=None) -> np.ndarray:
        if window is not None:
            xoff, yoff, xsize, ysize = window
        if self.band_list is None:
            if window is None:
                ima = self.__src_data.ReadAsArray()
            else:
                ima = self.__src_data.ReadAsArray(xoff, yoff, xsize, ysize)
        else:
            band_array = []
            for b in self.band_list:
                if window is None:
                    band_i = self.__src_data.GetRasterBand(b).ReadAsArray()
                else:
                    band_i = self.__src_data.GetRasterBand(b).ReadAsArray(xoff, yoff, xsize, ysize)
                band_array.append(band_i)
            ima = np.stack(band_array, axis=0)
        if self.bands == 1:
            # the type is complex means this is a SAR data
            if isinstance(type(ima[0, 0]), complex):
                ima = abs(ima)
        else:
            ima = ima.transpose((1, 2, 0))
        if self.to_uint8 is True:
            ima = raster2uint8(ima)
        return ima
    def __getBlock(self,
                   start_loc: Union[List[int], Tuple[int]], 
                   block_size: Union[List[int], Tuple[int]]=[512, 512]) -> np.ndarray:
        if len(start_loc) != 2 or len(block_size) != 2:
            raise ValueError("The length start_loc/block_size must be 2.")
        xoff, yoff = start_loc
        xsize, ysize = block_size
        if (xoff < 0 or xoff > self.width) or (yoff < 0 or yoff > self.height):
            raise ValueError(
                "start_loc must be within [0-{0}, 0-{1}].".format(str(self.width), str(self.height)))
        if xoff + xsize > self.width:
            xsize = self.width - xoff
        if yoff + ysize > self.height:
            ysize = self.height - yoff
        ima = self.__getAarray([int(xoff), int(yoff), int(xsize), int(ysize)])
        h, w = ima.shape[:2] if len(ima.shape) == 3 else ima.shape
        if self.bands != 1:
            tmp = np.zeros((block_size[0], block_size[1], self.bands), dtype=ima.dtype)
            tmp[:h, :w, :] = ima
        else:
            tmp = np.zeros((block_size[0], block_size[1]), dtype=ima.dtype)
            tmp[:h, :w] = ima
        return tmp
--- a/paddlers/tools/yolo_cluster.py
+++ b/paddlers/tools/yolo_cluster.py
@ -99,7 +99,7 @@ class YOLOAnchorCluster(BaseAnchorCluster):
            num_anchors (int): number of clusters
            dataset (DataSet): DataSet instance, VOC or COCO
            image_size (list or int): [h, w], being an int means image height and image width are the same.
-            cache (bool): whether using cache。 Defaults to True.
+            cache (bool): whether using cache. Defaults to True.
            cache_path (str or None, optional): cache directory path. If None, use `data_dir` of dataset. Defaults to None.
            iters (int, optional): iters of kmeans algorithm. Defaults to 300.
            gen_iters (int, optional): iters of genetic algorithm. Defaults to 1000.
--- a/paddlers/transforms/batch_operators.py
+++ b/paddlers/transforms/batch_operators.py
@ -69,7 +69,7 @@ class BatchRandomResize(Transform):
    """
    Resize a batch of input to random sizes.
-    Attention：If interp is 'RANDOM', the interpolation method will be chose randomly.
+    Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
    Args:
        target_sizes (List[int], List[list or tuple] or Tuple[list or tuple]):
@ -108,7 +108,7 @@ class BatchRandomResize(Transform):
 class BatchRandomResizeByShort(Transform):
    """Resize a batch of input to random sizes with keeping the aspect ratio.
-    Attention：If interp is 'RANDOM', the interpolation method will be chose randomly.
+    Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
    Args:
        short_sizes (List[int], Tuple[int]): Target sizes of the shorter side of the image(s).
--- a/paddlers/transforms/img_decoder.py
+++ b/paddlers/transforms/img_decoder.py
@ -1,5 +1,3 @@
 # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@ -21,6 +19,7 @@ import copy
 import random
 import imghdr
 from PIL import Image
 try:
    from collections.abc import Sequence
 except Exception:
@ -103,7 +102,7 @@ class ImgDecode(Transform):
                return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH |
                                  cv2.IMREAD_ANYCOLOR | cv2.IMREAD_COLOR)
            else:
-                return cv2.imread(im_file, cv2.IMREAD_ANYDEPTH |
+                return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH |
                                  cv2.IMREAD_ANYCOLOR)
        elif ext == '.npy':
            return np.load(img_path)
--- a/paddlers/transforms/operators.py
+++ b/paddlers/transforms/operators.py
@ -236,9 +236,9 @@ class Resize(Transform):
    """
    Resize input.
-    - If target_size is an int，resize the image(s) to (target_size, target_size).
+    - If target_size is an int, resize the image(s) to (target_size, target_size).
    - If target_size is a list or tuple, resize the image(s) to target_size.
-    Attention：If interp is 'RANDOM', the interpolation method will be chose randomly.
+    Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
    Args:
        target_size (int, List[int] or Tuple[int]): Target size. If int, the height and width share the same target_size.
@ -347,7 +347,7 @@ class RandomResize(Transform):
    """
    Resize input to random sizes.
-    Attention：If interp is 'RANDOM', the interpolation method will be chose randomly.
+    Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
    Args:
        target_sizes (List[int], List[list or tuple] or Tuple[list or tuple]):
@ -388,7 +388,7 @@ class ResizeByShort(Transform):
    """
    Resize input with keeping the aspect ratio.
-    Attention：If interp is 'RANDOM', the interpolation method will be chose randomly.
+    Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
    Args:
        short_size (int): Target size of the shorter side of the image(s).
@ -427,7 +427,7 @@ class RandomResizeByShort(Transform):
    """
    Resize input to random sizes with keeping the aspect ratio.
-    Attention：If interp is 'RANDOM', the interpolation method will be chose randomly.
+    Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
    Args:
        short_sizes (List[int]): Target size of the shorter side of the image(s).
@ -865,8 +865,8 @@ class RandomCrop(Transform):
 class RandomScaleAspect(Transform):
    """
    Crop input image(s) and resize back to original sizes.
-    Args：
+    Args: 
-        min_scale (float)：Minimum ratio between the cropped region and the original image.
+        min_scale (float): Minimum ratio between the cropped region and the original image.
            If 0, image(s) will not be cropped. Defaults to .5.
        aspect_ratio (float): Aspect ratio of cropped region. Defaults to .33.
    """
@ -1262,7 +1262,7 @@ class RandomBlur(Transform):
    """
    Randomly blur input image(s).
-    Args：
+    Args: 
        prob (float): Probability of blurring.
    """
--- a/paddlers/utils/init.py
+++ b/paddlers/utils/init.py
@ -22,3 +22,4 @@ from .env import get_environ_info, get_num_workers, init_parallel_env
 from .download import download_and_decompress, decompress
 from .stats import SmoothedValue, TrainingStats
 from .shm import _get_shared_memory_size_in_M
 from .convert import raster2uint8
--- a/paddlers/utils/convert.py
+++ b/paddlers/utils/convert.py
@ -0,0 +1,95 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import numpy as np
 import operator
 from functools import reduce
 def raster2uint8(image: np.ndarray) -> np.ndarray:
    """ Convert raster to uint8.
    Args:
        image (np.ndarray): image.
    Returns:
        np.ndarray: image on uint8.
    """
    dtype = image.dtype.name
    dtypes = ["uint8", "uint16", "float32"]
    if dtype not in dtypes:
        raise ValueError(f"'dtype' must be uint8/uint16/float32, not {dtype}.")
    if dtype == "uint8":
        return image
    else:
        if dtype == "float32":
            image = _sample_norm(image)
        return _two_percentLinear(image)
 # 2% linear stretch
 def _two_percentLinear(image: np.ndarray, max_out: int=255, min_out: int=0) -> np.ndarray:
    def _gray_process(gray, maxout=max_out, minout=min_out):
        # get the corresponding gray level at 98% histogram
        high_value = np.percentile(gray, 98)
        low_value = np.percentile(gray, 2)
        truncated_gray = np.clip(gray, a_min=low_value, a_max=high_value)
        processed_gray = ((truncated_gray - low_value) / (high_value - low_value)) * (maxout - minout)
        return processed_gray
    if len(image.shape) == 3:
        processes = []
        for b in range(image.shape[-1]):
            processes.append(_gray_process(image[:, :, b]))
        result = np.stack(processes, axis=2)
    else:  # if len(image.shape) == 2
        result = _gray_process(image)
    return np.uint8(result)
 # simple image standardization
 def _sample_norm(image: np.ndarray, NUMS: int=65536) -> np.ndarray:
    stretches = []
    if len(image.shape) == 3:
        for b in range(image.shape[-1]):
            stretched = _stretch(image[:, :, b], NUMS)
            stretched /= float(NUMS)
            stretches.append(stretched)
        stretched_img = np.stack(stretches, axis=2)
    else:  # if len(image.shape) == 2
        stretched_img = _stretch(image, NUMS)
    return np.uint8(stretched_img * 255)
 # histogram equalization
 def _stretch(ima: np.ndarray, NUMS: int) -> np.ndarray:
    hist = _histogram(ima, NUMS)
    lut = []
    for bt in range(0, len(hist), NUMS):
        # step size
        step = reduce(operator.add, hist[bt : bt + NUMS]) / (NUMS - 1)
        # create balanced lookup table
        n = 0
        for i in range(NUMS):
            lut.append(n / step)
            n += hist[i + bt]
        np.take(lut, ima, out=ima)
        return ima
 # calculate histogram
 def _histogram(ima: np.ndarray, NUMS: int) -> np.ndarray:
    bins = list(range(0, NUMS))
    flat = ima.flat
    n = np.searchsorted(np.sort(flat), bins)
    n = np.concatenate([n, [len(flat)]])
    hist = n[1:] - n[:-1]
    return hist
--- a/requirements.txt
+++ b/requirements.txt
@ -8,10 +8,10 @@ paddleslim == 2.2.1
 shapely
 paddlepaddle-gpu >= 2.2.0
 opencv-python
-scikit-learn==0.20.3
+scikit-learn == 0.20.3
 lap
 motmetrics
 matplotlib
 chardet
 openpyxl
-gdal
+GDAL >= 3.2.2
--- a/tutorials/train/README.md
+++ b/tutorials/train/README.md
@ -0,0 +1,53 @@
 # 使用教程——训练模型
 本目录下整理了使用PaddleRS训练模型的示例代码，代码中均提供了示例数据的自动下载，并均使用单张GPU卡进行训练。
 |代码 | 模型任务 | 数据 |
 |------|--------|---------|
 |object_detection/ppyolo.py | 目标检测PPYOLO | 昆虫检测 |
 |semantic_segmentation/deeplabv3p_resnet50_vd.py | 语义分割DeepLabV3 | 视盘分割 |
 <!-- 可参考API接口说明了解示例代码中的API：
 * [数据集读取API](../../docs/apis/datasets.md)
 * [数据预处理和数据增强API](../../docs/apis/transforms/transforms.md)
 * [模型API/模型加载API](../../docs/apis/models/README.md)
 * [预测结果可视化API](../../docs/apis/visualize.md) -->
 # 环境准备
 - [PaddlePaddle安装](https://www.paddlepaddle.org.cn/install/quick)
 * 版本要求：PaddlePaddle>=2.1.0
 <!-- - [PaddleRS安装](../../docs/install.md) -->
 ## 开始训练
 * 修改tutorials/train/semantic_segmentation/deeplabv3p_resnet50_vd.py中sys.path路径
 ```
 sys.path.append("your/PaddleRS/path")
 ```
 * 在安装PaddleRS后，使用如下命令开始训练，代码会自动下载训练数据, 并均使用单张GPU卡进行训练。
 ```commandline
 export CUDA_VISIBLE_DEVICES=0
 python tutorials/train/semantic_segmentation/deeplabv3p_resnet50_vd.py
 ```
 * 若需使用多张GPU卡进行训练，例如使用2张卡时执行：
 ```commandline
 python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmentation/deeplabv3p_resnet50_vd.py
 ```
 使用多卡时，参考[训练参数调整](../../docs/parameters.md)调整学习率和批量大小。
 ## VisualDL可视化训练指标
 在模型训练过程，在`train`函数中，将`use_vdl`设为True，则训练过程会自动将训练日志以VisualDL的格式打点在`save_dir`（用户自己指定的路径）下的`vdl_log`目录，用户可以使用如下命令启动VisualDL服务，查看可视化指标
 ```commandline
 visualdl --logdir output/deeplabv3p_resnet50_vd/vdl_log --port 8001
 ```
 服务启动后，使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001
--- a/tutorials/train/object_detection/ppyolo.py
+++ b/tutorials/train/object_detection/ppyolo.py
@ -0,0 +1,54 @@
 import sys
 sys.path.append("/ssd2/pengjuncai/PaddleRS")
 import paddlers as pdrs
 from paddlers import transforms as T
 train_transforms = T.Compose([
    T.MixupImage(mixup_epoch=-1), T.RandomDistort(),
    T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]), T.RandomCrop(),
    T.RandomHorizontalFlip(), T.BatchRandomResize(
        target_sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608],
        interp='RANDOM'), T.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])
 eval_transforms = T.Compose([
    T.Resize(
        target_size=608, interp='CUBIC'), T.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])
 train_dataset = pdrs.datasets.VOCDetection(
    data_dir='insect_det',
    file_list='insect_det/train_list.txt',
    label_list='insect_det/labels.txt',
    transforms=train_transforms,
    shuffle=True)
 eval_dataset = pdrs.datasets.VOCDetection(
    data_dir='insect_det',
    file_list='insect_det/val_list.txt',
    label_list='insect_det/labels.txt',
    transforms=eval_transforms,
    shuffle=False)
 num_classes = len(train_dataset.labels)
 model = pdrs.tasks.det.PPYOLO(num_classes=num_classes, backbone='ResNet50_vd_dcn')
 model.train(
    num_epochs=200,
    train_dataset=train_dataset,
    train_batch_size=8,
    eval_dataset=eval_dataset,
    pretrain_weights='COCO',
    learning_rate=0.005 / 12,
    warmup_steps=500,
    warmup_start_lr=0.0,
    save_interval_epochs=5,
    lr_decay_epochs=[85, 135],
    save_dir='output/ppyolo_r50vd_dcn',
    use_vdl=True)