[Feature] Update tools and add geojson2mask (#77)

3 years ago · 5c2084646a
parent 186c82fef0
commit 5c2084646a
6 changed files with 122 additions and 15 deletions
--- a/docs/data/tools.md
+++ b/docs/data/tools.md
@ -5,6 +5,7 @@
 - `coco2mask`：用于将geojson格式的分割标注标签转换为png格式。
 - `mask2shp`：用于对推理得到的png提取shapefile。
 - `mask2geojson`：用于对推理得到的png提取geojson。
 - `geojson2mask`：用于从geojson和原图中提取mask作为训练标签。
 - `matcher`：用于在推理前匹配两个时段的影响。
 - `spliter`：用于将大图数据进行分割以作为训练数据。
 - `coco_tools`：用于统计处理coco类标注文件。
@ -62,6 +63,19 @@ python mask2geojson.py --mask_path xxx.tif --save_path xxx.json [--epsilon 0]
 - `save_path`：保存geojson的路径。
 - `epsilon`：opencv的简化参数，默认为0。
 ### geojson2mask
 `geojson2mask`的主要功能是从原图和geojson文件中提取mask图像。使用代码如下：
 ```shell
 python  geojson2mask.py --image_path xxx.tif --geojson_path xxx.json
 ```
 其中：
 - `image_path`：原图像的路径。
 - `geojson_path`：geojson的路径。
 ### matcher
 ` matcher`的主要功能是在进行变化检测的推理前，匹配两期影像的位置，并将转换后的`im2`图像保存在原地址下，命名为`im2_M.tif`。使用代码如下：
@ -82,12 +96,13 @@ python matcher.py --im1_path xxx.tif --im2_path xxx.xxx [--im1_bands 1 2 3] [--i
 `spliter`的主要功能是在划分大的遥感图像为图像块，便于进行训练。使用代码如下：
 ```shell
-python spliter.py --image_path xxx.tif [--block_size 512] [--save_folder output]
+python spliter.py --image_path xxx.tif [--mask_path None] [--block_size 512] [--save_folder output]
 ```
 其中：
 - `image_path`：需要切分的图像的路径。
 - `mask_path`：一同切分的标签图像路径，默认没有。
 - `block_size`：切分图像块大小，默认为512。
 - `save_folder`：保存切分后结果的文件夹路径，默认为`output`。
--- a/tools/geojson2mask.py
+++ b/tools/geojson2mask.py
@ -0,0 +1,63 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import codecs
 import cv2
 import numpy as np
 import argparse
 import geojson
 from tqdm import tqdm
 from utils import Raster, save_mask_geotiff, Timer
 def _gt_convert(x_geo, y_geo, geotf):
    a = np.array([[geotf[1], geotf[2]], [geotf[4], geotf[5]]])
    b = np.array([x_geo - geotf[0], y_geo - geotf[3]])
    return np.round(np.linalg.solve(a, b)).tolist()  # 解一元二次方程
@Timer
 def convert_data(image_path, geojson_path):
    raster = Raster(image_path)
    tmp_img = np.zeros((raster.height, raster.width), dtype=np.int32)
    geo_reader = codecs.open(geojson_path, "r", encoding="utf-8")
    feats = geojson.loads(geo_reader.read())["features"]  # 所有图像块
    for feat in tqdm(feats):
        geo = feat["geometry"]
        if geo["type"] == "Polygon":  # 多边形
            geo_points = geo["coordinates"][0]
        elif geo["type"] == "MultiPolygon":  # 多面
            geo_points = geo["coordinates"][0][0]
        else:
            raise TypeError("Geometry type must be `Polygon` or `MultiPolygon`, not {}.".format(geo["type"]))
        xy_points = np.array([
            _gt_convert(point[0], point[1], raster.geot)
            for point in geo_points
        ]).astype(np.int32)
        # TODO: Label category
        cv2.fillPoly(tmp_img, [xy_points], 1)  # 多边形填充
    ext = "." + geojson_path.split(".")[-1]
    save_mask_geotiff(tmp_img, geojson_path.replace(ext, ".tif"), raster.proj, raster.geot)
 parser = argparse.ArgumentParser(description="input parameters")
 parser.add_argument("--image_path", type=str, required=True, \
                    help="The path of original image.")
 parser.add_argument("--geojson_path", type=str, required=True, \
                    help="The path of geojson.")
 if __name__ == "__main__":
    args = parser.parse_args()
    convert_data(args.image_path, args.geojson_path)
--- a/tools/mask2geojson.py
+++ b/tools/mask2geojson.py
@ -33,6 +33,9 @@ def _gt_convert(x, y, geotf):
 def convert_data(mask_path, save_path, epsilon=0):
    raster = Raster(mask_path)
    img = raster.getArray()
    ext = save_path.split(".")[-1]
    if ext != "json" and ext != "geojson":
        raise ValueError("The ext of `save_path` must be `json` or `geojson`, not {}.".format(ext))
    geo_writer = codecs.open(save_path, "w", encoding="utf-8")
    clas = np.unique(img)
    cv2_v = (cv2.__version__.split(".")[0] == "3")
@ -69,7 +72,7 @@ parser = argparse.ArgumentParser(description="input parameters")
 parser.add_argument("--mask_path", type=str, required=True, \
                    help="The path of mask tif.")
 parser.add_argument("--save_path", type=str, required=True, \
-                    help="The path to save the results, file suffix is `*.json`.")
+                    help="The path to save the results, file suffix is `*.json/geojson`.")
 parser.add_argument("--epsilon", type=float, default=0, \
                    help="The CV2 simplified parameters, `0` is the default.")
--- a/tools/spliter.py
+++ b/tools/spliter.py
@ -23,33 +23,47 @@ from utils import Raster, Timer
@Timer
-def split_data(image_path, block_size, save_folder):
+def split_data(image_path, mask_path, block_size, save_folder):
    if not osp.exists(save_folder):
        os.makedirs(save_folder)
        os.makedirs(osp.join(save_folder, "images"))
        if mask_path is not None:
            os.makedirs(osp.join(save_folder, "masks"))
    image_name = image_path.replace("\\", "/").split("/")[-1].split(".")[0]
-    raster = Raster(image_path, to_uint8=True)
+    image = Raster(image_path, to_uint8=True)
-    rows = ceil(raster.height / block_size)
+    mask = Raster(mask_path) if mask_path is not None else None
-    cols = ceil(raster.width / block_size)
+    if image.width != mask.width or image.height != mask.height:
        raise ValueError("image's shape must equal mask's shape.")
    rows = ceil(image.height / block_size)
    cols = ceil(image.width / block_size)
    total_number = int(rows * cols)
    for r in range(rows):
        for c in range(cols):
            loc_start = (c * block_size, r * block_size)
-            title = Image.fromarray(
+            image_title = Image.fromarray(image.getArray(
-                raster.getArray(loc_start, (block_size, block_size)))
+                loc_start, (block_size, block_size))).convert("RGB")
-            save_path = osp.join(save_folder, (
+            image_save_path = osp.join(save_folder, "images", (
-                image_name + "_" + str(r) + "_" + str(c) + ".png"))
+                image_name + "_" + str(r) + "_" + str(c) + ".jpg"))
-            title.save(save_path, "PNG")
+            image_title.save(image_save_path, "JPEG")
            if mask is not None:
                mask_title = Image.fromarray(mask.getArray(
                    loc_start, (block_size, block_size))).convert("L")
                mask_save_path = osp.join(save_folder, "masks", (
                    image_name + "_" + str(r) + "_" + str(c) + ".png"))
                mask_title.save(mask_save_path, "PNG")
            print("-- {:d}/{:d} --".format(int(r * cols + c + 1), total_number))
 parser = argparse.ArgumentParser(description="input parameters")
 parser.add_argument("--image_path", type=str, required=True, \
                    help="The path of big image data.")
 parser.add_argument("--mask_path", type=str, default=None, \
                    help="The path of big image label data.")
 parser.add_argument("--block_size", type=int, default=512, \
                    help="The size of image block, `512` is the default.")
-parser.add_argument("--save_folder", type=str, default="output", \
+parser.add_argument("--save_folder", type=str, default="dataset", \
-                    help="The folder path to save the results, `output` is the default.")
+                    help="The folder path to save the results, `dataset` is the default.")
 if __name__ == "__main__":
    args = parser.parse_args()
-    split_data(args.image_path, args.block_size, args.save_folder)
+    split_data(args.image_path, args.mask_path, args.block_size, args.save_folder)
--- a/tools/utils/init.py
+++ b/tools/utils/init.py
@ -16,5 +16,5 @@ import sys
 import os.path as osp
 sys.path.insert(0, osp.abspath(".."))  # add workspace
-from .raster import Raster, raster2uint8
+from .raster import Raster, save_mask_geotiff, raster2uint8
 from .timer import Timer
--- a/tools/utils/raster.py
+++ b/tools/utils/raster.py
@ -192,3 +192,15 @@ class Raster:
            tmp = np.zeros((block_size[0], block_size[1]), dtype=ima.dtype)
            tmp[:h, :w] = ima
        return tmp
 def save_mask_geotiff(mask: np.ndarray, save_path: str, proj: str, geotf: Tuple) -> None:
    height, width = mask.shape
    driver = gdal.GetDriverByName("GTiff")
    dst_ds = driver.Create(save_path, width, height, 1, gdal.GDT_UInt16)
    dst_ds.SetGeoTransform(geotf)
    dst_ds.SetProjection(proj)
    band = dst_ds.GetRasterBand(1)
    band.WriteArray(mask)
    dst_ds.FlushCache()
    dst_ds = None