Merge pull request #2 from Bobholamovic/unittest

[Test] Add Unittests
own
cc 3 years ago committed by GitHub
commit dce23e4a6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 81
      .github/workflows/build_and_test.yaml
  2. 25
      .github/workflows/lint.yaml
  3. 17
      README.md
  4. 1
      deploy/export/README.md
  5. 36
      deploy/export/export_model.py
  6. 4
      docs/README.md
  7. 12
      docs/apis/model_zoo.md
  8. 10
      docs/apis/transforms.md
  9. 4
      docs/data/coco_tools_cn.md
  10. 2
      docs/data/dataset_summary.md
  11. 2
      docs/quick_start.md
  12. 2
      paddlers/__init__.py
  13. 2
      paddlers/custom_models/__init__.py
  14. 2
      paddlers/custom_models/cd/backbones/__init__.py
  15. 24
      paddlers/custom_models/cd/bit.py
  16. 12
      paddlers/custom_models/cd/fc_ef.py
  17. 16
      paddlers/custom_models/cd/fc_siam_conc.py
  18. 16
      paddlers/custom_models/cd/fc_siam_diff.py
  19. 2
      paddlers/custom_models/cd/layers/__init__.py
  20. 6
      paddlers/custom_models/cd/layers/blocks.py
  21. 2
      paddlers/custom_models/cd/snunet.py
  22. 9
      paddlers/custom_models/cls/condensenet_v2.py
  23. 17
      paddlers/custom_models/gan/generators/rcan.py
  24. 13
      paddlers/custom_models/gan/rcan_model.py
  25. 33
      paddlers/custom_models/seg/farseg.py
  26. 2
      paddlers/datasets/__init__.py
  27. 1
      paddlers/datasets/cd_dataset.py
  28. 1
      paddlers/datasets/clas_dataset.py
  29. 6
      paddlers/datasets/coco.py
  30. 6
      paddlers/datasets/voc.py
  31. 32
      paddlers/deploy/predictor.py
  32. 2
      paddlers/models/__init__.py
  33. 2
      paddlers/models/ppcls/__init__.py
  34. 33
      paddlers/models/ppcls/arch/backbone/model_zoo/xception.py
  35. 2
      paddlers/models/ppcls/data/utils/__init__.py
  36. 12
      paddlers/models/ppcls/loss/deephashloss.py
  37. 5
      paddlers/models/ppcls/loss/googlenetloss.py
  38. 6
      paddlers/models/ppcls/loss/multilabelloss.py
  39. 15
      paddlers/models/ppcls/loss/pairwisecosface.py
  40. 3
      paddlers/models/ppcls/loss/supconloss.py
  41. 3
      paddlers/models/ppcls/metric/__init__.py
  42. 3
      paddlers/models/ppcls/utils/download.py
  43. 4
      paddlers/models/ppdet/metrics/json_results.py
  44. 6
      paddlers/models/ppdet/modeling/architectures/meta_arch.py
  45. 16
      paddlers/models/ppdet/modeling/backbones/vgg.py
  46. 13
      paddlers/models/ppdet/modeling/bbox_utils.py
  47. 44
      paddlers/models/ppdet/modeling/layers.py
  48. 30
      paddlers/models/ppdet/modeling/ops.py
  49. 2
      paddlers/models/ppdet/modeling/post_process.py
  50. 8
      paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
  51. 24
      paddlers/models/ppdet/modeling/proposal_generator/target.py
  52. 6
      paddlers/models/ppdet/utils/download.py
  53. 2
      paddlers/models/ppgan/apps/__init__.py
  54. 33
      paddlers/models/ppgan/apps/drn_predictor.py
  55. 3
      paddlers/models/ppgan/apps/esrgan_predictor.py
  56. 31
      paddlers/models/ppgan/apps/lesrcnn_predictor.py
  57. 102
      paddlers/models/ppgan/apps/midas/blocks.py
  58. 13
      paddlers/models/ppgan/apps/midas/midas_net.py
  59. 33
      paddlers/models/ppgan/apps/midas/transforms.py
  60. 4
      paddlers/models/ppgan/apps/midas/utils.py
  61. 8
      paddlers/models/ppgan/apps/mpr_predictor.py
  62. 30
      paddlers/models/ppgan/apps/pan_predictor.py
  63. 26
      paddlers/models/ppgan/datasets/animeganv2_dataset.py
  64. 6
      paddlers/models/ppgan/datasets/base_dataset.py
  65. 1
      paddlers/models/ppgan/datasets/base_sr_dataset.py
  66. 31
      paddlers/models/ppgan/datasets/builder.py
  67. 1
      paddlers/models/ppgan/datasets/common_vision_dataset.py
  68. 55
      paddlers/models/ppgan/datasets/firstorder_dataset.py
  69. 4
      paddlers/models/ppgan/datasets/image_folder.py
  70. 1
      paddlers/models/ppgan/datasets/paired_dataset.py
  71. 9
      paddlers/models/ppgan/datasets/preprocess/__init__.py
  72. 1
      paddlers/models/ppgan/datasets/preprocess/builder.py
  73. 3
      paddlers/models/ppgan/datasets/preprocess/io.py
  74. 35
      paddlers/models/ppgan/datasets/preprocess/transforms.py
  75. 1
      paddlers/models/ppgan/datasets/single_dataset.py
  76. 9
      paddlers/models/ppgan/datasets/starganv2_dataset.py
  77. 1
      paddlers/models/ppgan/datasets/unpaired_dataset.py
  78. 34
      paddlers/models/ppgan/engine/trainer.py
  79. 12
      paddlers/models/ppgan/faceutils/dlibutils/face_align.py
  80. 3
      paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py
  81. 60
      paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py
  82. 4
      paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py
  83. 90
      paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py
  84. 16
      paddlers/models/ppgan/faceutils/face_detection/utils.py
  85. 28
      paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py
  86. 7
      paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py
  87. 58
      paddlers/models/ppgan/metrics/lpips.py
  88. 5
      paddlers/models/ppgan/metrics/psnr_ssim.py
  89. 10
      paddlers/models/ppgan/models/animeganv2_model.py
  90. 21
      paddlers/models/ppgan/models/base_model.py
  91. 1
      paddlers/models/ppgan/models/basicvsr_model.py
  92. 1
      paddlers/models/ppgan/models/criterions/gan_loss.py
  93. 26
      paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py
  94. 32
      paddlers/models/ppgan/models/criterions/pixel_loss.py
  95. 1
      paddlers/models/ppgan/models/cycle_gan_model.py
  96. 1
      paddlers/models/ppgan/models/dc_gan_model.py
  97. 51
      paddlers/models/ppgan/models/discriminators/discriminator_animegan.py
  98. 40
      paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py
  99. 24
      paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py
  100. 68
      paddlers/models/ppgan/models/discriminators/discriminator_photopen.py
  101. Some files were not shown because too many files have changed in this diff Show More

@ -0,0 +1,81 @@
name: build and test
on:
push:
branches:
- develop
- "tests/**"
paths-ignore:
- "docs/**"
- "README.md"
pull_request:
branches:
- develop
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build_and_test_cpu:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.7", "3.8"]
include:
- python-version: "3.7"
os: windows-latest
gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/cp37/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
- python-version: "3.7"
os: ubuntu-latest
gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
- python-version: "3.8"
os: windows-latest
gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/GDAL-3.3.3-cp38-cp38-win_amd64.whl
- python-version: "3.8"
os: ubuntu-latest
gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
fail-fast: false
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install pip --upgrade --user
- name: Install PaddlePaddle
run: python -m pip install paddlepaddle==2.3.1 -i https://mirror.baidu.com/pypi/simple
- name: Install PaddleRS
run: |
python -m pip install -r requirements.txt
python -m pip install -e .
- name: Install GDAL
run: python -m pip install ${{ matrix.gdal-whl-url }}
- name: Run unittests
run: |
cd tests
bash run_fast_tests.sh
shell: bash
build_and_test_cuda102:
runs-on: ubuntu-18.04
container:
image: registry.baidubce.com/paddlepaddle/paddle:2.3.1-gpu-cuda10.2-cudnn7
steps:
- uses: actions/checkout@v3
- name: Upgrade pip
run: python3.7 -m pip install pip --upgrade --user
- name: Install PaddleRS
run: |
python3.7 -m pip install -r requirements.txt
python3.7 -m pip install -e .
- name: Install GDAL
run: python3.7 -m pip install https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
# Do not run unittests, because there is NO GPU in the machine.
# - name: Run unittests
# run: |
# cd tests
# bash run_fast_tests.sh
# shell: bash

@ -0,0 +1,25 @@
name: lint
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Upgrade pip
run: python -m pip install pip --upgrade --user
- name: Install pre-commit hooks
run: |
pip install pre-commit
pre-commit install
- name: Lint
run: pre-commit run --all-files

@ -6,9 +6,9 @@
**基于飞桨框架开发的高性能遥感图像处理开发套件,端到端地完成从训练到部署的全流程遥感深度学习应用。**
<!-- [![Build Status](https://travis-ci.org/PaddleCV-SIG/PaddleRS.svg?branch=release/0.1)](https://travis-ci.org/PaddleCV-SIG/PaddleRS) -->
<!-- [![Version](https://img.shields.io/github/release/PaddleCV-SIG/PaddleRS.svg)](https://github.com/PaddleCV-SIG/PaddleRS/releases) -->
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
<!-- [![version](https://img.shields.io/github/release/PaddleCV-SIG/PaddleRS.svg)](https://github.com/PaddleCV-SIG/PaddleRS/releases) -->
[![license](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
[![build status](https://github.com/PaddleCV-SIG/PaddleRS/workflows/build_and_test.yaml/badge.svg?branch=develop)](https://github.com/PaddleCV-SIG/PaddleRS/actions)
![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
</div>
@ -92,7 +92,7 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
<li>ResizeByShort</li>
<li>RandomResizeByShort</li>
<li>ResizeByLong</li>
<li>RandomFlipOrRotation</li>
<li>RandomFlipOrRotate</li>
<li>RandomHorizontalFlip</li>
<li>RandomVerticalFlip</li>
<li>Normalize</li>
@ -100,13 +100,13 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
<li>RandomCrop</li>
<li>RandomScaleAspect</li>
<li>RandomExpand</li>
<li>Padding</li>
<li>Pad</li>
<li>MixupImage</li>
<li>RandomDistort</li>
<li>RandomBlur</li>
<li>Defogging</li>
<li>DimReducing</li>
<li>BandSelecting</li>
<li>Dehaze</li>
<li>ReduceDim</li>
<li>SelectBand</li>
<li>RandomSwap</li>
</ul>
</td>
@ -223,4 +223,3 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
year={2022}
}
```

@ -60,4 +60,3 @@ python deploy/export_model.py --model_dir=./output/deeplabv3p/best_model/ --save
- 对于检测模型中的YOLO/PPYOLO系列模型,请保证输入影像的`w`和`h`有相同取值、且均为32的倍数;指定`--fixed_input_shape`时,R-CNN模型的`w`和`h`也均需为32的倍数。
- 指定`[w,h]`时,请使用半角逗号(`,`)分隔`w`和`h`,二者之间不允许存在空格等其它字符。
- 将`w`和`h`设得越大,则模型在推理过程中的耗时和内存/显存占用越高。不过,如果`w`和`h`过小,则可能对模型的精度存在较大负面影响。
- 对于变化检测模型BIT,请保证指定`--fixed_input_shape`,并且数值不包含负数,因为BIT用到空间注意力,需要从tensor中获取`b,c,h,w`的属性,若为负数则报错。

@ -21,9 +21,23 @@ from paddlers.tasks import load_model
def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument('--model_dir', '-m', type=str, default=None, help='model directory path')
parser.add_argument('--save_dir', '-s', type=str, default=None, help='path to save inference model')
parser.add_argument('--fixed_input_shape', '-fs', type=str, default=None,
parser.add_argument(
'--model_dir',
'-m',
type=str,
default=None,
help='model directory path')
parser.add_argument(
'--save_dir',
'-s',
type=str,
default=None,
help='path to save inference model')
parser.add_argument(
'--fixed_input_shape',
'-fs',
type=str,
default=None,
help="export inference model with fixed input shape: [w,h] or [n,c,w,h]")
return parser
@ -39,13 +53,17 @@ if __name__ == '__main__':
fixed_input_shape = literal_eval(args.fixed_input_shape)
# Check validaty
if not isinstance(fixed_input_shape, list):
raise ValueError("fixed_input_shape should be of None or list type.")
raise ValueError(
"fixed_input_shape should be of None or list type.")
if len(fixed_input_shape) not in (2, 4):
raise ValueError("fixed_input_shape contains an incorrect number of elements.")
raise ValueError(
"fixed_input_shape contains an incorrect number of elements.")
if fixed_input_shape[-1] <= 0 or fixed_input_shape[-2] <= 0:
raise ValueError("the input width and height must be positive integers.")
if len(fixed_input_shape)==4 and fixed_input_shape[1] <= 0:
raise ValueError("the number of input channels must be a positive integer.")
raise ValueError(
"Input width and height must be positive integers.")
if len(fixed_input_shape) == 4 and fixed_input_shape[1] <= 0:
raise ValueError(
"The number of input channels must be a positive integer.")
# Set environment variables
os.environ['PADDLEX_EXPORT_STAGE'] = 'True'
@ -56,4 +74,4 @@ if __name__ == '__main__':
# Do dynamic-to-static cast
# XXX: Invoke a protected (single underscore) method outside of subclasses.
model._export_inference_model(args.save_dir, fixed_input_shape)
model._export_inference_model(args.save_dir, fixed_input_shape)

@ -1,5 +1,3 @@
PaddleSeg commit fec42fd869b6f796c74cd510671595e3512bc8e9
# 开发规范
请注意,paddlers/models/ppxxx系列除了修改import路径和支持多通道模型外,不要增删改任何代码。
新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。
新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。

@ -4,18 +4,18 @@ PaddleRS的基础模型库来自[PaddleClas](https://github.com/PaddlePaddle/Pad
## 自定义模型库
| 模型名称 | 用途 |
| --------------- | -------- |
| 模型名称 | 用途 |
| --------------- | -------- |
| FarSeg | 语义分割 |
| BIT | 变化检测 |
| CDNet | 变化检测 |
| DSIFN | 变化检测 |
| STANet | 变化检测 |
| SNUNet | 变化检测 |
| SNUNet | 变化检测 |
| DSAMNet | 变化检测 |
| FCEarlyFusion | 变化检测 |
| FCSiamConc | 变化检测 |
| FCSiamDiff | 变化检测 |
| FCEarlyFusion | 变化检测 |
| FCSiamConc | 变化检测 |
| FCSiamDiff | 变化检测 |
## 如何导入

@ -1,6 +1,6 @@
# 数据增强
PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`ImgDecoder`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。
PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`DecodeImg`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。
| 数据增强名称 | 用途 | 任务 | ... |
| -------------------- | ----------------------------------------------- | -------- | ---- |
@ -16,13 +16,13 @@ PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Co
| RandomCrop | 对输入进行随机中心裁剪 | 所有 | ... |
| RandomScaleAspect | 裁剪输入并重新调整大小至原始大小 | 所有 | ... |
| RandomExpand | 通过根据随机偏移填充来随机扩展输入 | 所有 | ... |
| Padding | 将输入填充到指定的大小 | 所有 | ... |
| Pad | 将输入填充到指定的大小 | 所有 | ... |
| MixupImage | 将两张图片和它们的`gt_bbbox/gt_score`混合在一起 | 目标检测 | ... |
| RandomDistort | 对输入进行随机色彩变换 | 所有 | ... |
| RandomBlur | 对输入进行随机模糊 | 所有 | ... |
| Defogging | 对输入图像进行去雾 | 所有 | ... |
| DimReducing | 对输入图像进行降维 | 所有 | ... |
| BandSelecting | 选择输入图像的波段 | 所有 | ... |
| Dehaze | 对输入图像进行去雾 | 所有 | ... |
| ReduceDim | 对输入图像进行降维 | 所有 | ... |
| SelectBand | 选择输入图像的波段 | 所有 | ... |
| RandomSwap | 随机交换两个输入图像 | 变化检测 | ... |
| ... | ... | | ... |

@ -85,7 +85,7 @@ Args_show = True
------------------------------------------------Info------------------------------------------------
json read...
json keys: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
json keys: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
***********************info***********************
Content Type: dict
@ -409,7 +409,7 @@ Args_show = True
------------------------------------------------Info------------------------------------------------
json read...
json keys: dict_keys(['images', 'categories'])
json keys: dict_keys(['images', 'categories'])
**********************images**********************
Content Type: list

@ -215,4 +215,4 @@
| [9-5](https://aistudio.baidu.com/aistudio/datasetdetail/136567) | [WHU TCL SatMVS 1.0](http://gpcv.whu.edu.cn/data/whu_tlc.html) | 图像生成 | 5120 * 5120 | 1 | 300 | __ | tif, jpg | __ | 2.1m, 2.5m | __ | 卫星影像 | ZY3 | 2021 | 武汉大学 | http://gpcv.whu.edu.cn/data/whu_tlc.html | https://aistudio.baidu.com/aistudio/datasetdetail/136567 |
| [9-6](https://aistudio.baidu.com/aistudio/datasetdetail/136567) | [WHU TCL SatMVS 2.0](http://gpcv.whu.edu.cn/data/whu_tlc.html) | 图像生成 | 768 * 384 | 1 | 5011 | __ | tif | __ | 2.1m, 2.5m | __ | 卫星影像 | ZY3 | 2021 | 武汉大学 | http://gpcv.whu.edu.cn/data/whu_tlc.html | https://aistudio.baidu.com/aistudio/datasetdetail/136567 |
| 9-7 | [DLR-ACD](https://www.dlr.de/eoc/en/desktopdefault.aspx/tabid-12760/22294_read-58354/) | 图像生成 | 3619 * 5226 | 3 | 33 | 1 | __ | __ | 0.045~ 0.15m | __ | 航拍影像 | 航拍影像 | 2019 | German Aerospace Center | https://www.dlr.de/eoc/en/desktopdefault.aspx/tabid-12760/22294_read-58354/ | |
| 9-8 | [SEN12MS-CR](https://mediatum.ub.tum.de/1554803) | 图像生成 | 256 * 256 | 13, 2 | 122218 | __ | __ | __ | __ | __ | 卫星影像 | Sentinel1, Sentinel2 | 2020 | TUM | https://mediatum.ub.tum.de/1554803 | |
| 9-8 | [SEN12MS-CR](https://mediatum.ub.tum.de/1554803) | 图像生成 | 256 * 256 | 13, 2 | 122218 | __ | __ | __ | __ | __ | 卫星影像 | Sentinel1, Sentinel2 | 2020 | TUM | https://mediatum.ub.tum.de/1554803 | |

@ -39,4 +39,4 @@ python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmenta
visualdl --logdir output/deeplabv3p_resnet50_multi_channel/vdl_log --port 8001
```
服务启动后,使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001
服务启动后,使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001

@ -21,4 +21,4 @@ env_info = get_environ_info()
log_level = 2
from . import tasks, datasets, transforms, utils, tools, models, deploy
from . import tasks, datasets, transforms, utils, tools, models, deploy

@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import cls, det, seg, gan, cd
from . import cls, det, seg, gan, cd

@ -10,4 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.

@ -22,6 +22,15 @@ from .layers import Conv3x3, Conv1x1, get_norm_layer, Identity
from .param_init import KaimingInitMixin
def calc_product(*args):
if len(args) < 1:
raise ValueError
ret = args[0]
for arg in args[1:]:
ret *= arg
return ret
class BIT(nn.Layer):
"""
The BIT implementation based on PaddlePaddle.
@ -131,9 +140,10 @@ class BIT(nn.Layer):
def _get_semantic_tokens(self, x):
b, c = x.shape[:2]
att_map = self.conv_att(x)
att_map = att_map.reshape((b, self.token_len, 1, -1))
att_map = att_map.reshape(
(b, self.token_len, 1, calc_product(*att_map.shape[2:])))
att_map = F.softmax(att_map, axis=-1)
x = x.reshape((b, 1, c, -1))
x = x.reshape((b, 1, c, att_map.shape[-1]))
tokens = (x * att_map).sum(-1)
return tokens
@ -172,7 +182,7 @@ class BIT(nn.Layer):
else:
token1 = self._get_reshaped_tokens(x1)
token2 = self._get_reshaped_tokens(x2)
# Transformer encoder forward
token = paddle.concat([token1, token2], axis=1)
token = self.encode(token)
@ -253,6 +263,7 @@ class CrossAttention(nn.Layer):
inner_dim = head_dim * n_heads
self.n_heads = n_heads
self.head_dim = head_dim
self.scale = dim**-0.5
self.apply_softmax = apply_softmax
@ -272,9 +283,10 @@ class CrossAttention(nn.Layer):
k = self.fc_k(ref)
v = self.fc_v(ref)
q = q.reshape((b, n, h, -1)).transpose((0, 2, 1, 3))
k = k.reshape((b, paddle.shape(ref)[1], h, -1)).transpose((0, 2, 1, 3))
v = v.reshape((b, paddle.shape(ref)[1], h, -1)).transpose((0, 2, 1, 3))
q = q.reshape((b, n, h, self.head_dim)).transpose((0, 2, 1, 3))
rn = ref.shape[1]
k = k.reshape((b, rn, h, self.head_dim)).transpose((0, 2, 1, 3))
v = v.reshape((b, rn, h, self.head_dim)).transpose((0, 2, 1, 3))
mult = paddle.matmul(q, k, transpose_y=True) * self.scale

@ -131,8 +131,7 @@ class FCEarlyFusion(nn.Layer):
# Stage 4d
x4d = self.upconv4(x4p)
pad4 = (0, paddle.shape(x43)[3] - paddle.shape(x4d)[3], 0,
paddle.shape(x43)[2] - paddle.shape(x4d)[2])
pad4 = (0, x43.shape[3] - x4d.shape[3], 0, x43.shape[2] - x4d.shape[2])
x4d = paddle.concat([F.pad(x4d, pad=pad4, mode='replicate'), x43], 1)
x43d = self.do43d(self.conv43d(x4d))
x42d = self.do42d(self.conv42d(x43d))
@ -140,8 +139,7 @@ class FCEarlyFusion(nn.Layer):
# Stage 3d
x3d = self.upconv3(x41d)
pad3 = (0, paddle.shape(x33)[3] - paddle.shape(x3d)[3], 0,
paddle.shape(x33)[2] - paddle.shape(x3d)[2])
pad3 = (0, x33.shape[3] - x3d.shape[3], 0, x33.shape[2] - x3d.shape[2])
x3d = paddle.concat([F.pad(x3d, pad=pad3, mode='replicate'), x33], 1)
x33d = self.do33d(self.conv33d(x3d))
x32d = self.do32d(self.conv32d(x33d))
@ -149,16 +147,14 @@ class FCEarlyFusion(nn.Layer):
# Stage 2d
x2d = self.upconv2(x31d)
pad2 = (0, paddle.shape(x22)[3] - paddle.shape(x2d)[3], 0,
paddle.shape(x22)[2] - paddle.shape(x2d)[2])
pad2 = (0, x22.shape[3] - x2d.shape[3], 0, x22.shape[2] - x2d.shape[2])
x2d = paddle.concat([F.pad(x2d, pad=pad2, mode='replicate'), x22], 1)
x22d = self.do22d(self.conv22d(x2d))
x21d = self.do21d(self.conv21d(x22d))
# Stage 1d
x1d = self.upconv1(x21d)
pad1 = (0, paddle.shape(x12)[3] - paddle.shape(x1d)[3], 0,
paddle.shape(x12)[2] - paddle.shape(x1d)[2])
pad1 = (0, x12.shape[3] - x1d.shape[3], 0, x12.shape[2] - x1d.shape[2])
x1d = paddle.concat([F.pad(x1d, pad=pad1, mode='replicate'), x12], 1)
x12d = self.do12d(self.conv12d(x1d))
x11d = self.conv11d(x12d)

@ -154,8 +154,8 @@ class FCSiamConc(nn.Layer):
# Decode
# Stage 4d
x4d = self.upconv4(x4p)
pad4 = (0, paddle.shape(x43_1)[3] - paddle.shape(x4d)[3], 0,
paddle.shape(x43_1)[2] - paddle.shape(x4d)[2])
pad4 = (0, x43_1.shape[3] - x4d.shape[3], 0,
x43_1.shape[2] - x4d.shape[2])
x4d = paddle.concat(
[F.pad(x4d, pad=pad4, mode='replicate'), x43_1, x43_2], 1)
x43d = self.do43d(self.conv43d(x4d))
@ -164,8 +164,8 @@ class FCSiamConc(nn.Layer):
# Stage 3d
x3d = self.upconv3(x41d)
pad3 = (0, paddle.shape(x33_1)[3] - paddle.shape(x3d)[3], 0,
paddle.shape(x33_1)[2] - paddle.shape(x3d)[2])
pad3 = (0, x33_1.shape[3] - x3d.shape[3], 0,
x33_1.shape[2] - x3d.shape[2])
x3d = paddle.concat(
[F.pad(x3d, pad=pad3, mode='replicate'), x33_1, x33_2], 1)
x33d = self.do33d(self.conv33d(x3d))
@ -174,8 +174,8 @@ class FCSiamConc(nn.Layer):
# Stage 2d
x2d = self.upconv2(x31d)
pad2 = (0, paddle.shape(x22_1)[3] - paddle.shape(x2d)[3], 0,
paddle.shape(x22_1)[2] - paddle.shape(x2d)[2])
pad2 = (0, x22_1.shape[3] - x2d.shape[3], 0,
x22_1.shape[2] - x2d.shape[2])
x2d = paddle.concat(
[F.pad(x2d, pad=pad2, mode='replicate'), x22_1, x22_2], 1)
x22d = self.do22d(self.conv22d(x2d))
@ -183,8 +183,8 @@ class FCSiamConc(nn.Layer):
# Stage 1d
x1d = self.upconv1(x21d)
pad1 = (0, paddle.shape(x12_1)[3] - paddle.shape(x1d)[3], 0,
paddle.shape(x12_1)[2] - paddle.shape(x1d)[2])
pad1 = (0, x12_1.shape[3] - x1d.shape[3], 0,
x12_1.shape[2] - x1d.shape[2])
x1d = paddle.concat(
[F.pad(x1d, pad=pad1, mode='replicate'), x12_1, x12_2], 1)
x12d = self.do12d(self.conv12d(x1d))

@ -154,8 +154,8 @@ class FCSiamDiff(nn.Layer):
# Decode
# Stage 4d
x4d = self.upconv4(x4p)
pad4 = (0, paddle.shape(x43_1)[3] - paddle.shape(x4d)[3], 0,
paddle.shape(x43_1)[2] - paddle.shape(x4d)[2])
pad4 = (0, x43_1.shape[3] - x4d.shape[3], 0,
x43_1.shape[2] - x4d.shape[2])
x4d = F.pad(x4d, pad=pad4, mode='replicate')
x4d = paddle.concat([x4d, paddle.abs(x43_1 - x43_2)], 1)
x43d = self.do43d(self.conv43d(x4d))
@ -164,8 +164,8 @@ class FCSiamDiff(nn.Layer):
# Stage 3d
x3d = self.upconv3(x41d)
pad3 = (0, paddle.shape(x33_1)[3] - paddle.shape(x3d)[3], 0,
paddle.shape(x33_1)[2] - paddle.shape(x3d)[2])
pad3 = (0, x33_1.shape[3] - x3d.shape[3], 0,
x33_1.shape[2] - x3d.shape[2])
x3d = F.pad(x3d, pad=pad3, mode='replicate')
x3d = paddle.concat([x3d, paddle.abs(x33_1 - x33_2)], 1)
x33d = self.do33d(self.conv33d(x3d))
@ -174,8 +174,8 @@ class FCSiamDiff(nn.Layer):
# Stage 2d
x2d = self.upconv2(x31d)
pad2 = (0, paddle.shape(x22_1)[3] - paddle.shape(x2d)[3], 0,
paddle.shape(x22_1)[2] - paddle.shape(x2d)[2])
pad2 = (0, x22_1.shape[3] - x2d.shape[3], 0,
x22_1.shape[2] - x2d.shape[2])
x2d = F.pad(x2d, pad=pad2, mode='replicate')
x2d = paddle.concat([x2d, paddle.abs(x22_1 - x22_2)], 1)
x22d = self.do22d(self.conv22d(x2d))
@ -183,8 +183,8 @@ class FCSiamDiff(nn.Layer):
# Stage 1d
x1d = self.upconv1(x21d)
pad1 = (0, paddle.shape(x12_1)[3] - paddle.shape(x1d)[3], 0,
paddle.shape(x12_1)[2] - paddle.shape(x1d)[2])
pad1 = (0, x12_1.shape[3] - x1d.shape[3], 0,
x12_1.shape[2] - x1d.shape[2])
x1d = F.pad(x1d, pad=pad1, mode='replicate')
x1d = paddle.concat([x1d, paddle.abs(x12_1 - x12_2)], 1)
x12d = self.do12d(self.conv12d(x1d))

@ -13,4 +13,4 @@
# limitations under the License.
from .blocks import *
from .attention import ChannelAttention, SpatialAttention, CBAM
from .attention import ChannelAttention, SpatialAttention, CBAM

@ -140,12 +140,14 @@ class Conv7x7(BasicConv):
class MaxPool2x2(nn.MaxPool2D):
def __init__(self, **kwargs):
super(MaxPool2x2, self).__init__(kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
super(MaxPool2x2, self).__init__(
kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
class MaxUnPool2x2(nn.MaxUnPool2D):
def __init__(self, **kwargs):
super(MaxUnPool2x2, self).__init__(kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
super(MaxUnPool2x2, self).__init__(
kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
class ConvTransposed3x3(nn.Layer):

@ -132,7 +132,7 @@ class SNUNet(nn.Layer, KaimingInitMixin):
out = paddle.concat([x0_1, x0_2, x0_3, x0_4], 1)
intra = paddle.sum(paddle.stack([x0_1, x0_2, x0_3, x0_4]), axis=0)
intra = x0_1 + x0_2 + x0_3 + x0_4
m_intra = self.ca_intra(intra)
out = self.ca_inter(out) * (out + paddle.tile(m_intra, (1, 4, 1, 1)))

@ -39,7 +39,7 @@ class SELayer(nn.Layer):
b, c, _, _ = x.shape
y = self.avg_pool(x).reshape((b, c))
y = self.fc(y).reshape((b, c, 1, 1))
return x * y.expand_as(x)
return x * paddle.expand(y, shape=x.shape)
class HS(nn.Layer):
@ -92,7 +92,7 @@ def ShuffleLayer(x, groups):
# transpose
x = x.transpose((0, 2, 1, 3, 4))
# reshape
x = x.reshape((batchsize, -1, height, width))
x = x.reshape((batchsize, groups * channels_per_group, height, width))
return x
@ -104,7 +104,7 @@ def ShuffleLayerTrans(x, groups):
# transpose
x = x.transpose((0, 2, 1, 3, 4))
# reshape
x = x.reshape((batchsize, -1, height, width))
x = x.reshape((batchsize, channels_per_group * groups, height, width))
return x
@ -374,7 +374,8 @@ class CondenseNetV2(nn.Layer):
def forward(self, x):
features = self.features(x)
out = features.reshape((features.shape[0], -1))
out = features.reshape((features.shape[0], features.shape[1] *
features.shape[2] * features.shape[3]))
out = self.fc(out)
out = self.fc_act(out)

@ -8,14 +8,15 @@ from .builder import GENERATORS
def default_conv(in_channels, out_channels, kernel_size, bias=True):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.XavierUniform(),
need_clip =True)
return nn.Conv2D(in_channels,
out_channels,
kernel_size,
padding=(kernel_size // 2),
weight_attr=weight_attr,
bias_attr=bias)
weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.XavierUniform(), need_clip=True)
return nn.Conv2D(
in_channels,
out_channels,
kernel_size,
padding=(kernel_size // 2),
weight_attr=weight_attr,
bias_attr=bias)
class MeanShift(nn.Conv2D):

@ -27,6 +27,7 @@ from ...models.ppgan.modules.init import reset_parameters
class RCANModel(BaseModel):
"""Base SR model for single image super-resolution.
"""
def __init__(self, generator, pixel_criterion=None, use_init_weight=False):
"""
Args:
@ -69,16 +70,14 @@ class RCANModel(BaseModel):
loss_pixel.backward()
optims['optim'].step()
else:
print('Skip this batch {}! (Loss: {})'.format(
self.batch + 1, loss_pixel.item()
))
print('Skip this batch {}! (Loss: {})'.format(self.batch + 1,
loss_pixel.item()))
self.batch += 1
if self.batch % 1000 == 0:
self.error_last = loss_pixel.item()/1000
self.error_last = loss_pixel.item() / 1000
print("update error_last:{}".format(self.error_last))
def test_iter(self, metrics=None):
self.nets['generator'].eval()
with paddle.no_grad():
@ -99,8 +98,8 @@ class RCANModel(BaseModel):
def init_sr_weight(net):
def reset_func(m):
if hasattr(m, 'weight') and (not isinstance(
m, (nn.BatchNorm, nn.BatchNorm2D))):
if hasattr(m, 'weight') and (
not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))):
reset_parameters(m)
net.apply(reset_func)

@ -32,7 +32,7 @@ class FPN(nn.Layer):
"""
Module that adds FPN on top of a list of feature maps.
The feature maps are currently supposed to be in increasing depth
order, and must be consecutive
order, and must be consecutive
"""
def __init__(self,
@ -41,38 +41,35 @@ class FPN(nn.Layer):
conv_block=ConvReLU,
top_blocks=None):
super(FPN, self).__init__()
self.inner_blocks = []
self.layer_blocks = []
inner_blocks = []
layer_blocks = []
for idx, in_channels in enumerate(in_channels_list, 1):
inner_block = "fpn_inner{}".format(idx)
layer_block = "fpn_layer{}".format(idx)
if in_channels == 0:
continue
inner_block_module = conv_block(in_channels, out_channels, 1)
layer_block_module = conv_block(out_channels, out_channels, 3, 1)
self.add_sublayer(inner_block, inner_block_module)
self.add_sublayer(layer_block, layer_block_module)
for module in [inner_block_module, layer_block_module]:
for m in module.sublayers():
if isinstance(m, nn.Conv2D):
kaiming_normal_init(m.weight)
self.inner_blocks.append(inner_block)
self.layer_blocks.append(layer_block)
inner_blocks.append(inner_block_module)
layer_blocks.append(layer_block_module)
self.inner_blocks = nn.LayerList(inner_blocks)
self.layer_blocks = nn.LayerList(layer_blocks)
self.top_blocks = top_blocks
def forward(self, x):
last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
results = [getattr(self, self.layer_blocks[-1])(last_inner)]
for feature, inner_block, layer_block in zip(
x[:-1][::-1], self.inner_blocks[:-1][::-1],
self.layer_blocks[:-1][::-1]):
if not inner_block:
continue
last_inner = self.inner_blocks[-1](x[-1])
results = [self.layer_blocks[-1](last_inner)]
for i, feature in enumerate(x[-2::-1]):
inner_block = self.inner_blocks[len(self.inner_blocks) - 2 - i]
layer_block = self.layer_blocks[len(self.layer_blocks) - 2 - i]
inner_top_down = F.interpolate(
last_inner, scale_factor=2, mode="nearest")
inner_lateral = getattr(self, inner_block)(feature)
inner_lateral = inner_block(feature)
last_inner = inner_lateral + inner_top_down
results.insert(0, getattr(self, layer_block)(last_inner))
results.insert(0, layer_block(last_inner))
if isinstance(self.top_blocks, LastLevelP6P7):
last_results = self.top_blocks(x[-1], results[-1])
results.extend(last_results)

@ -17,4 +17,4 @@ from .coco import COCODetection
from .seg_dataset import SegDataset
from .cd_dataset import CDDataset
from .clas_dataset import ClasDataset
from .sr_dataset import SRdataset, ComposeTrans
from .sr_dataset import SRdataset, ComposeTrans

@ -17,6 +17,7 @@ from enum import IntEnum
import os.path as osp
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic

@ -16,6 +16,7 @@ import os.path as osp
import copy
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic

@ -23,7 +23,7 @@ import numpy as np
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
from paddlers.transforms import ImgDecoder, MixupImage
from paddlers.transforms import DecodeImg, MixupImage
from paddlers.tools import YOLOAnchorCluster
@ -256,8 +256,8 @@ class COCODetection(Dataset):
if self.data_fields is not None:
sample_mix = {k: sample_mix[k] for k in self.data_fields}
sample = self.mixup_op(sample=[
ImgDecoder(to_rgb=False)(sample),
ImgDecoder(to_rgb=False)(sample_mix)
DecodeImg(to_rgb=False)(sample),
DecodeImg(to_rgb=False)(sample_mix)
])
sample = self.transforms(sample)
return sample

@ -25,7 +25,7 @@ import numpy as np
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
from paddlers.transforms import ImgDecoder, MixupImage
from paddlers.transforms import DecodeImg, MixupImage
from paddlers.tools import YOLOAnchorCluster
@ -320,8 +320,8 @@ class VOCDetection(Dataset):
if self.data_fields is not None:
sample_mix = {k: sample_mix[k] for k in self.data_fields}
sample = self.mixup_op(sample=[
ImgDecoder(to_rgb=False)(sample),
ImgDecoder(to_rgb=False)(sample_mix)
DecodeImg(to_rgb=False)(sample),
DecodeImg(to_rgb=False)(sample_mix)
])
sample = self.transforms(sample)
return sample

@ -175,9 +175,9 @@ class Predictor(object):
if self._model._postprocess is None:
self._model.build_postprocess_from_labels(topk)
# XXX: Convert ndarray to tensor as self._model._postprocess requires
net_outputs = paddle.to_tensor(net_outputs)
assert net_outputs.shape[1] == 1
outputs = self._model._postprocess(net_outputs.squeeze(1))
assert len(net_outputs) == 1
net_outputs = paddle.to_tensor(net_outputs[0])
outputs = self._model._postprocess(net_outputs)
class_ids = map(itemgetter('class_ids'), outputs)
scores = map(itemgetter('scores'), outputs)
label_names = map(itemgetter('label_names'), outputs)
@ -252,22 +252,26 @@ class Predictor(object):
transforms=None,
warmup_iters=0,
repeats=1):
""" 图片预测
"""
Do prediction.
Args:
img_file(List[str or tuple or np.ndarray], str, tuple, or np.ndarray):
对于场景分类图像复原目标检测和语义分割任务来说该参数可为单一图像路径或是解码后的排列格式为H, W, C
且具有float32类型的BGR图像表示为numpy的ndarray形式或者是一组图像路径或np.ndarray对象构成的列表对于变化检测
任务来说该参数可以为图像路径二元组分别表示前后两个时相影像路径或是两幅图像组成的二元组或者是上述两种二元组
之一构成的列表
topk(int): 场景分类模型预测时使用表示预测前topk的结果默认值为1
transforms (paddlers.transforms): 数据预处理操作默认值为None, 即使用`model.yml`中保存的数据预处理操作
warmup_iters (int): 预热轮数用于评估模型推理以及前后处理速度若大于1会预先重复预测warmup_iters而后才开始正式的预测及其速度评估默认为0
repeats (int): 重复次数用于评估模型推理以及前后处理速度若大于1会预测repeats次取时间平均值默认值为1
img_file(list[str | tuple | np.ndarray] | str | tuple | np.ndarray): For scene classification, image restoration,
object detection and semantic segmentation tasks, `img_file` should be either the path of the image to predict
, a decoded image (a `np.ndarray`, which should be consistent with what you get from passing image path to
`paddlers.transforms.decode_image()`), or a list of image paths or decoded images. For change detection tasks,
`img_file` should be a tuple of image paths, a tuple of decoded images, or a list of tuples.
topk(int, optional): Top-k values to reserve in a classification result. Defaults to 1.
transforms (paddlers.transforms.Compose | None, optional): Pipeline of data preprocessing. If None, load transforms
from `model.yml`. Defaults to None.
warmup_iters (int, optional): Warm-up iterations before measuring the execution time. Defaults to 0.
repeats (int, optional): Number of repetitions to evaluate model inference and data processing speed. If greater than
1, the reported time consumption is the average of all repeats. Defaults to 1.
"""
if repeats < 1:
logging.error("`repeats` must be greater than 1.", exit=True)
if transforms is None and not hasattr(self._model, 'test_transforms'):
raise Exception("Transforms need to be defined, now is None.")
raise ValueError("Transforms need to be defined, now is None.")
if transforms is None:
transforms = self._model.test_transforms
if isinstance(img_file, tuple) and len(img_file) != 2:

@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import ppcls, ppdet, ppseg, ppgan
from . import ppcls, ppdet, ppseg, ppgan

@ -20,4 +20,4 @@ from . import optimizer
from .arch import *
from .optimizer import *
from .data import *
from .utils import *
from .utils import *

@ -201,22 +201,14 @@ class MiddleFlow(nn.Layer):
super(MiddleFlow, self).__init__()
self.block_num = block_num
self._conv_0 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_0")
self._conv_1 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_1")
self._conv_2 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_2")
self._conv_3 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_3")
self._conv_4 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_4")
self._conv_5 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_5")
self._conv_6 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_6")
self._conv_7 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_7")
self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0")
self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1")
self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2")
self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3")
self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4")
self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5")
self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6")
self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7")
if block_num == 16:
self._conv_8 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_8")
@ -297,8 +289,7 @@ class ExitFlow(nn.Layer):
name = "exit_flow"
self._conv_0 = ExitFlowBottleneckBlock(
728, 728, 1024, name=name + "_1")
self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1")
self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2")
self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3")
self._pool = AdaptiveAvgPool2D(1)
@ -362,16 +353,14 @@ def Xception41(pretrained=False, use_ssld=False, **kwargs):
def Xception65(pretrained=False, use_ssld=False, **kwargs):
model = Xception(
entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
return model
def Xception71(pretrained=False, use_ssld=False, **kwargs):
model = Xception(
entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
return model

@ -10,4 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.

@ -15,6 +15,7 @@
import paddle
import paddle.nn as nn
class DSHSDLoss(nn.Layer):
"""
# DSHSD(IEEE ACCESS 2019)
@ -23,6 +24,7 @@ class DSHSDLoss(nn.Layer):
# [DSHSD] epoch:250, bit:48, dataset:nuswide_21, MAP:0.809, Best MAP: 0.815
# [DSHSD] epoch:135, bit:48, dataset:imagenet, MAP:0.647, Best MAP: 0.647
"""
def __init__(self, alpha, multi_label=False):
super(DSHSDLoss, self).__init__()
self.alpha = alpha
@ -65,6 +67,7 @@ class LCDSHLoss(nn.Layer):
# [LCDSH] epoch:145, bit:48, dataset:cifar10-1, MAP:0.798, Best MAP: 0.798
# [LCDSH] epoch:183, bit:48, dataset:nuswide_21, MAP:0.833, Best MAP: 0.834
"""
def __init__(self, n_class, _lambda):
super(LCDSHLoss, self).__init__()
self._lambda = _lambda
@ -75,9 +78,11 @@ class LCDSHLoss(nn.Layer):
# label to ont-hot
label = paddle.flatten(label)
label = paddle.nn.functional.one_hot(label, self.n_class).astype("float32")
s = 2 * (paddle.matmul(label, label, transpose_y=True) > 0).astype("float32") - 1
label = paddle.nn.functional.one_hot(label,
self.n_class).astype("float32")
s = 2 * (paddle.matmul(
label, label, transpose_y=True) > 0).astype("float32") - 1
inner_product = paddle.matmul(feature, feature, transpose_y=True) * 0.5
inner_product = inner_product.clip(min=-50, max=50)
@ -89,4 +94,3 @@ class LCDSHLoss(nn.Layer):
L2 = (sigmoid(inner_product) - sigmoid(inner_product_)).pow(2).mean()
return {"lcdshloss": L1 + self._lambda * L2}

@ -19,10 +19,11 @@ class GoogLeNetLoss(nn.Layer):
"""
Cross entropy loss used after googlenet
"""
def __init__(self, epsilon=None):
super().__init__()
assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth"
assert (epsilon is None or epsilon <= 0 or
epsilon >= 1), "googlenet is not support label_smooth"
def forward(self, inputs, label):
input0, input1, input2 = inputs

@ -26,11 +26,9 @@ class MultiLabelLoss(nn.Layer):
def _binary_crossentropy(self, input, target, class_num):
if self.epsilon is not None:
target = self._labelsmoothing(target, class_num)
cost = F.binary_cross_entropy_with_logits(
logit=input, label=target)
cost = F.binary_cross_entropy_with_logits(logit=input, label=target)
else:
cost = F.binary_cross_entropy_with_logits(
logit=input, label=target)
cost = F.binary_cross_entropy_with_logits(logit=input, label=target)
return cost

@ -36,8 +36,10 @@ class PairwiseCosface(nn.Layer):
dist_mat = paddle.matmul(embedding, embedding, transpose_y=True)
N = dist_mat.shape[0]
is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float')
is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float')
is_pos = targets.reshape([N, 1]).expand([N, N]).equal(
paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal(
paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
# Mask scores related to itself
is_pos = is_pos - paddle.eye(N, N)
@ -48,8 +50,9 @@ class PairwiseCosface(nn.Layer):
logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg)
loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean()
return {"PairwiseCosface": loss}
loss = F.softplus(
paddle.logsumexp(
logit_p, axis=1) + paddle.logsumexp(
logit_n, axis=1)).mean()
return {"PairwiseCosface": loss}

@ -58,8 +58,7 @@ class SupConLoss(nn.Layer):
elif labels is not None:
labels = labels.reshape([-1, 1])
if labels.shape[0] != batch_size:
raise ValueError(
'Num of labels does not match num of features')
raise ValueError('Num of labels does not match num of features')
mask = paddle.cast(
paddle.equal(labels, paddle.t(labels)), 'float32')
else:

@ -34,8 +34,7 @@ class CombinedMetrics(nn.Layer):
metric_name = list(config)[0]
metric_params = config[metric_name]
if metric_params is not None:
self.metric_func_list.append(
eval(metric_name)(**metric_params))
self.metric_func_list.append(eval(metric_name)(**metric_params))
else:
self.metric_func_list.append(eval(metric_name)())

@ -120,8 +120,7 @@ def get_path_from_url(url,
# Mainly used to solve the problem of downloading data from different
# machines in the case of multiple machines. Different ips will download
# data, and the same ip will only download data once.
unique_endpoints = _get_unique_endpoints(ParallelEnv()
.trainer_endpoints[:])
unique_endpoints = _get_unique_endpoints(ParallelEnv().trainer_endpoints[:])
if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
logger.info("Found {}".format(fullpath))
else:

@ -141,8 +141,8 @@ def get_keypoint_res(results, im_id):
}
x = kpt[0::3]
y = kpt[1::3]
x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(
y).item(), np.max(y).item()
x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(y).item(
), np.max(y).item()
ann['area'] = (x1 - x0) * (y1 - y0)
ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
anns.append(ann)

@ -25,8 +25,7 @@ class BaseArch(nn.Layer):
self.scale = 1.
self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
(1, 3, 1, 1))
self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape(
(1, 3, 1, 1))
self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))
for item in cfg_transform:
if 'NormalizeImage' in item:
self.mean = paddle.to_tensor(item['NormalizeImage'][
@ -83,8 +82,7 @@ class BaseArch(nn.Layer):
nms_threshold = 0.5
keep_top_k = 100
if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'
):
if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'):
num_classes = self.bbox_head.num_classes
keep_top_k = self.bbox_post_process.nms.keep_top_k
nms_threshold = self.bbox_post_process.nms.nms_threshold

@ -109,12 +109,12 @@ class L2NormScale(nn.Layer):
@register
@serializable
class VGG(nn.Layer):
def __init__(
self,
depth=16,
normalizations=[20., -1, -1, -1, -1, -1],
extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
[128, 256, 0, 1, 3], [128, 256, 0, 1, 3]]):
def __init__(self,
depth=16,
normalizations=[20., -1, -1, -1, -1, -1],
extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
[128, 256, 0, 1, 3],
[128, 256, 0, 1, 3]]):
super(VGG, self).__init__()
assert depth in [16, 19], \
@ -159,8 +159,8 @@ class VGG(nn.Layer):
for i, v in enumerate(self.extra_block_filters):
assert len(v) == 5, "extra_block_filters size not fix"
extra_conv = self.add_sublayer("conv{}".format(6 + i),
ExtraBlock(last_channels, v[0],
v[1], v[2], v[3], v[4]))
ExtraBlock(last_channels, v[0], v[1],
v[2], v[3], v[4]))
last_channels = v[1]
self.extra_convs.append(extra_conv)
self._out_channels.append(last_channels)

@ -265,8 +265,7 @@ def decode_yolo(box, anchor, downsample_ratio):
"""
x, y, w, h = box
na, grid_h, grid_w = x.shape[1:4]
grid = make_grid(grid_h, grid_w, x.dtype).reshape(
(1, 1, grid_h, grid_w, 2))
grid = make_grid(grid_h, grid_w, x.dtype).reshape((1, 1, grid_h, grid_w, 2))
x1 = (x + grid[:, :, :, :, 0:1]) / grid_w
y1 = (y + grid[:, :, :, :, 1:2]) / grid_h
@ -345,8 +344,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
# convex diagonal squared
c2 = cw**2 + ch**2 + eps
# center distance
rho2 = (
(px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
if diou:
return iou - rho2 / c2
else:
@ -461,8 +459,8 @@ def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
* coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] +
np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
* coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
@ -553,8 +551,7 @@ def poly2rbox(polys):
def cal_line_length(point1, point2):
import math
return math.sqrt(
math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1],
2))
math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
def get_best_begin_point_single(coordinate):

@ -170,12 +170,10 @@ class ConvNormLayer(nn.Layer):
norm_lr = 0. if freeze_norm else 1.
param_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay)
if norm_decay is not None else None)
regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay)
if norm_decay is not None else None)
regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
if norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(
ch_out, weight_attr=param_attr, bias_attr=bias_attr)
@ -293,19 +291,18 @@ class DropBlock(nn.Layer):
@register
@serializable
class AnchorGeneratorSSD(object):
def __init__(
self,
steps=[8, 16, 32, 64, 100, 300],
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
min_ratio=15,
max_ratio=90,
base_size=300,
min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0],
max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0],
offset=0.5,
flip=True,
clip=False,
min_max_aspect_ratios_order=False):
def __init__(self,
steps=[8, 16, 32, 64, 100, 300],
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
min_ratio=15,
max_ratio=90,
base_size=300,
min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0],
max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0],
offset=0.5,
flip=True,
clip=False,
min_max_aspect_ratios_order=False):
self.steps = steps
self.aspect_ratios = aspect_ratios
self.min_ratio = min_ratio
@ -1035,19 +1032,16 @@ class MaskMatrixNMS(object):
seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
# inter.
inter_matrix = paddle.mm(seg_masks,
paddle.transpose(seg_masks, [1, 0]))
inter_matrix = paddle.mm(seg_masks, paddle.transpose(seg_masks, [1, 0]))
n_samples = paddle.shape(cate_labels)
# union.
sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples])
# iou.
iou_matrix = (inter_matrix / (
sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix)
)
sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix))
iou_matrix = paddle.triu(iou_matrix, diagonal=1)
# label_specific matrix.
cate_labels_x = paddle.expand(
cate_labels, shape=[n_samples, n_samples])
cate_labels_x = paddle.expand(cate_labels, shape=[n_samples, n_samples])
label_matrix = paddle.cast(
(cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])),
'float32')
@ -1304,8 +1298,8 @@ class MultiHeadAttention(nn.Layer):
if self._qkv_same_embed_dim:
tensor = F.linear(
x=tensor,
weight=self.in_proj_weight[:, index * self.embed_dim:(
index + 1) * self.embed_dim],
weight=self.in_proj_weight[:, index * self.embed_dim:(index + 1)
* self.embed_dim],
bias=self.in_proj_bias[index * self.embed_dim:(index + 1) *
self.embed_dim]
if self.in_proj_bias is not None else None)

@ -690,13 +690,12 @@ def yolo_box(
if not isinstance(class_num, int):
raise TypeError("Attr class_num of yolo_box must be an integer")
if not isinstance(conf_thresh, float):
raise TypeError(
"Attr ignore_thresh of yolo_box must be a float number")
raise TypeError("Attr ignore_thresh of yolo_box must be a float number")
if in_dygraph_mode():
attrs = ('anchors', anchors, 'class_num', class_num, 'conf_thresh',
conf_thresh, 'downsample_ratio', downsample_ratio,
'clip_bbox', clip_bbox, 'scale_x_y', scale_x_y)
conf_thresh, 'downsample_ratio', downsample_ratio, 'clip_bbox',
clip_bbox, 'scale_x_y', scale_x_y)
boxes, scores = core.ops.yolo_box(x, origin_shape, *attrs)
return boxes, scores
else:
@ -978,8 +977,8 @@ def multiclass_nms(bboxes,
score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold',
nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta,
'normalized', normalized)
output, index, nms_rois_num = core.ops.multiclass_nms3(
bboxes, scores, rois_num, *attrs)
output, index, nms_rois_num = core.ops.multiclass_nms3(bboxes, scores,
rois_num, *attrs)
if not return_index:
index = None
return output, nms_rois_num, index
@ -1116,10 +1115,10 @@ def matrix_nms(bboxes,
if in_dygraph_mode():
attrs = ('background_label', background_label, 'score_threshold',
score_threshold, 'post_threshold', post_threshold,
'nms_top_k', nms_top_k, 'gaussian_sigma', gaussian_sigma,
'use_gaussian', use_gaussian, 'keep_top_k', keep_top_k,
'normalized', normalized)
score_threshold, 'post_threshold', post_threshold, 'nms_top_k',
nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian',
use_gaussian, 'keep_top_k', keep_top_k, 'normalized',
normalized)
out, index, rois_num = core.ops.matrix_nms(bboxes, scores, *attrs)
if not return_index:
index = None
@ -1503,9 +1502,9 @@ def generate_proposals(scores,
"""
if in_dygraph_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN',
post_nms_top_n, 'nms_thresh', nms_thresh, 'min_size',
min_size, 'eta', eta, 'pixel_offset', pixel_offset)
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta,
'pixel_offset', pixel_offset)
rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals_v2(
scores, bbox_deltas, im_shape, anchors, variances, *attrs)
return rpn_rois, rpn_roi_probs, rpn_rois_num
@ -1575,10 +1574,7 @@ def sigmoid_cross_entropy_with_logits(input,
return output
def smooth_l1(input,
label,
inside_weight=None,
outside_weight=None,
def smooth_l1(input, label, inside_weight=None, outside_weight=None,
sigma=None):
input_new = paddle.multiply(input, inside_weight)
label_new = paddle.multiply(label, inside_weight)

@ -209,7 +209,7 @@ class MaskPostProcess(object):
# TODO: support bs > 1 and mask output dtype is bool
pred_result = paddle.zeros(
[num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='int32')
if bbox_num == 1 and bboxes[0][0] == -1:
if (len(bbox_num) == 1 and bbox_num[0] == 1) and bboxes[0][0] == -1:
return pred_result
# TODO: optimize chunk paste

@ -77,8 +77,7 @@ class AnchorGenerator(nn.Layer):
def _calculate_anchors(self, num_features):
sizes = self._broadcast_params(self.anchor_sizes, num_features)
aspect_ratios = self._broadcast_params(self.aspect_ratios,
num_features)
aspect_ratios = self._broadcast_params(self.aspect_ratios, num_features)
cell_anchors = [
self.generate_cell_anchors(s, a)
for s, a in zip(sizes, aspect_ratios)
@ -94,10 +93,7 @@ class AnchorGenerator(nn.Layer):
shifts_x = paddle.arange(
offset * stride, grid_width * stride, step=stride, dtype='float32')
shifts_y = paddle.arange(
offset * stride,
grid_height * stride,
step=stride,
dtype='float32')
offset * stride, grid_height * stride, step=stride, dtype='float32')
shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x)
shift_x = paddle.reshape(shift_x, [-1])
shift_y = paddle.reshape(shift_y, [-1])

@ -40,14 +40,12 @@ def rpn_anchor_target(anchors,
anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True,
ignore_thresh, is_crowd_i, assign_on_cpu)
# Step2: sample anchor
fg_inds, bg_inds = subsample_labels(match_labels,
rpn_batch_size_per_im,
fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
rpn_fg_fraction, 0, use_random)
# Fill with the ignore label (-1), then set positive and negative labels
labels = paddle.full(match_labels.shape, -1, dtype='int32')
if bg_inds.shape[0] > 0:
labels = paddle.scatter(labels, bg_inds,
paddle.zeros_like(bg_inds))
labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds))
if fg_inds.shape[0] > 0:
labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
# Step3: make output
@ -261,15 +259,14 @@ def sample_bbox(matches,
paddle.ones_like(gt_classes) * num_classes,
gt_classes)
gt_classes = paddle.where(match_labels == -1,
paddle.ones_like(gt_classes) * -1,
gt_classes)
paddle.ones_like(gt_classes) * -1, gt_classes)
if is_cascade:
index = paddle.arange(matches.shape[0])
return index, gt_classes
rois_per_image = int(batch_size_per_im)
fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image,
fg_fraction, num_classes, use_random)
fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction,
num_classes, use_random)
if fg_inds.shape[0] == 0 and bg_inds.shape[0] == 0:
# fake output labeled with -1 when all boxes are neither
# foreground nor background
@ -364,9 +361,7 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
rasterize_polygons_within_box(new_segm[j], boxes[j],
resolution))
else:
results.append(
paddle.ones(
[resolution, resolution], dtype='int32'))
results.append(paddle.ones([resolution, resolution], dtype='int32'))
fg_classes = paddle.gather(labels_per_im, fg_inds)
weight = paddle.ones([fg_rois.shape[0]], dtype='float32')
@ -484,8 +479,8 @@ def libra_sample_neg(max_overlaps,
if floor_thr > 0:
floor_set = set(
np.where(
np.logical_and(max_overlaps >= 0, max_overlaps <
floor_thr))[0])
np.logical_and(max_overlaps >= 0, max_overlaps < floor_thr))
[0])
iou_sampling_set = set(np.where(max_overlaps >= floor_thr)[0])
elif floor_thr == 0:
floor_set = set(np.where(max_overlaps == 0)[0])
@ -614,8 +609,7 @@ def libra_sample_bbox(matches,
paddle.ones_like(gt_classes) * num_classes,
gt_classes)
gt_classes = paddle.where(match_labels == -1,
paddle.ones_like(gt_classes) * -1,
gt_classes)
paddle.ones_like(gt_classes) * -1, gt_classes)
sampled_gt_classes = paddle.gather(gt_classes, sampled_inds)
return sampled_inds, sampled_gt_classes

@ -170,9 +170,9 @@ def get_dataset_path(path, annotation, image_dir):
if _dataset_exists(path, annotation, image_dir):
return path
logger.info(
"Dataset {} is not valid for reason above, try searching {} or "
"downloading dataset...".format(osp.realpath(path), DATASET_HOME))
logger.info("Dataset {} is not valid for reason above, try searching {} or "
"downloading dataset...".format(
osp.realpath(path), DATASET_HOME))
data_name = os.path.split(path.strip().lower())[-1]
for name, dataset in DATASETS.items():

@ -17,4 +17,4 @@ from .mpr_predictor import MPRPredictor
from .drn_predictor import DRNPredictor
from .pan_predictor import PANPredictor
from .lesrcnn_predictor import LESRCNNPredictor
from .esrgan_predictor import ESRGANPredictor
from .esrgan_predictor import ESRGANPredictor

@ -16,7 +16,7 @@ import os
import numpy as np
from PIL import Image
import paddle
import paddle
from ppgan.models.generators import DRNGenerator
from ppgan.utils.download import get_path_from_url
from ppgan.utils.logger import get_logger
@ -25,21 +25,25 @@ from .base_predictor import BasePredictor
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/DRNSx4.pdparams'
class DRNPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
self.output = os.path.join(output, 'DRN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
self.model = DRNGenerator((2, 4)) # 实例化模型
self.output = os.path.join(output,
'DRN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
self.model = DRNGenerator((2, 4)) # 实例化模型
if weight_path is None:
weight_path = get_path_from_url(REALSR_WEIGHT_URL)
state_dict = paddle.load(weight_path) #加载权重
state_dict = state_dict['generator']
state_dict = paddle.load(weight_path) #加载权重
state_dict = state_dict['generator']
self.model.load_dict(state_dict)
self.model.eval()
# 标准化
def norm(self, img):
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 1.0
return img.astype('float32')
# 去标准化
def denorm(self, img):
img = img.transpose((1, 2, 0))
@ -54,14 +58,15 @@ class DRNPredictor(BasePredictor):
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img) #图像标准化
x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
img = self.norm(ori_img) #图像标准化
x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
with paddle.no_grad():
out = self.model(x)[2] # 执行预测,DRN模型会输出三个tensor,第一个是原始低分辨率影像,第二个是放大两倍,第三个才是我们所需要的最后的结果
out = self.model(
x
)[2] # 执行预测,DRN模型会输出三个tensor,第一个是原始低分辨率影像,第二个是放大两倍,第三个才是我们所需要的最后的结果
pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
pred_img = Image.fromarray(pred_img) # array转图像
pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
pred_img = Image.fromarray(pred_img) # array转图像
return pred_img
#输入图像文件路径
@ -70,15 +75,15 @@ class DRNPredictor(BasePredictor):
if not os.path.exists(self.output):
os.makedirs(self.output)
pred_img = self.run_image(input) #对输入的图片进行预测
pred_img = self.run_image(input) #对输入的图片进行预测
out_path = None
if self.output:
try:
base_name = os.path.splitext(os.path.basename(input))[0]
except:
base_name = 'result'
out_path = os.path.join(self.output, base_name + '.png') #保存路径
pred_img.save(out_path) #保存输出图片
out_path = os.path.join(self.output, base_name + '.png') #保存路径
pred_img.save(out_path) #保存输出图片
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))

@ -28,6 +28,7 @@ from .base_predictor import BasePredictor
SR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/esrgan_x4.pdparams'
class ESRGANPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
@ -83,4 +84,4 @@ class ESRGANPredictor(BasePredictor):
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
return pred_img, out_path
return pred_img, out_path

@ -16,7 +16,7 @@ import os
import numpy as np
from PIL import Image
import paddle
import paddle
from ppgan.models.generators import LESRCNNGenerator
from ppgan.utils.download import get_path_from_url
from ppgan.utils.logger import get_logger
@ -25,21 +25,25 @@ from .base_predictor import BasePredictor
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/lesrcnn_x4.pdparams'
class LESRCNNPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
self.output = os.path.join(output, 'LESRCNN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
self.model = LESRCNNGenerator() # 实例化模型
self.output = os.path.join(output,
'LESRCNN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
self.model = LESRCNNGenerator() # 实例化模型
if weight_path is None:
weight_path = get_path_from_url(REALSR_WEIGHT_URL)
state_dict = paddle.load(weight_path) #加载权重
state_dict = state_dict['generator']
state_dict = paddle.load(weight_path) #加载权重
state_dict = state_dict['generator']
self.model.load_dict(state_dict)
self.model.eval()
# 标准化
def norm(self, img):
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0
return img.astype('float32')
# 去标准化
def denorm(self, img):
img = img.transpose((1, 2, 0))
@ -54,14 +58,13 @@ class LESRCNNPredictor(BasePredictor):
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img) #图像标准化
x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
img = self.norm(ori_img) #图像标准化
x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
with paddle.no_grad():
out = self.model(x)
pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
pred_img = Image.fromarray(pred_img) # array转图像
pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
pred_img = Image.fromarray(pred_img) # array转图像
return pred_img
#输入图像文件路径
@ -70,16 +73,16 @@ class LESRCNNPredictor(BasePredictor):
if not os.path.exists(self.output):
os.makedirs(self.output)
pred_img = self.run_image(input) #对输入的图片进行预测
pred_img = self.run_image(input) #对输入的图片进行预测
out_path = None
if self.output:
try:
base_name = os.path.splitext(os.path.basename(input))[0]
except:
base_name = 'result'
out_path = os.path.join(self.output, base_name + '.png') #保存路径
pred_img.save(out_path) #保存输出图片
out_path = os.path.join(self.output, base_name + '.png') #保存路径
pred_img.save(out_path) #保存输出图片
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
return pred_img, out_path
return pred_img, out_path

@ -13,10 +13,8 @@ def _make_encoder(backbone,
if backbone == "resnext101_wsl":
# resnext101_wsl
pretrained = _make_pretrained_resnext101_wsl(use_pretrained)
scratch = _make_scratch([256, 512, 1024, 2048],
features,
groups=groups,
expand=expand)
scratch = _make_scratch(
[256, 512, 1024, 2048], features, groups=groups, expand=expand)
else:
print(f"Backbone '{backbone}' not implemented")
assert False
@ -36,34 +34,38 @@ def _make_scratch(in_shape, out_shape, groups=1, expand=False):
out_shape3 = out_shape * 4
out_shape4 = out_shape * 8
scratch.layer1_rn = nn.Conv2D(in_shape[0],
out_shape1,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer2_rn = nn.Conv2D(in_shape[1],
out_shape2,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer3_rn = nn.Conv2D(in_shape[2],
out_shape3,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer4_rn = nn.Conv2D(in_shape[3],
out_shape4,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer1_rn = nn.Conv2D(
in_shape[0],
out_shape1,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer2_rn = nn.Conv2D(
in_shape[1],
out_shape2,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer3_rn = nn.Conv2D(
in_shape[2],
out_shape3,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
scratch.layer4_rn = nn.Conv2D(
in_shape[3],
out_shape4,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False,
groups=groups)
return scratch
@ -89,6 +91,7 @@ def _make_pretrained_resnext101_wsl(use_pretrained):
class ResidualConvUnit(nn.Layer):
"""Residual convolution module.
"""
def __init__(self, features):
"""Init.
@ -97,19 +100,21 @@ class ResidualConvUnit(nn.Layer):
"""
super().__init__()
self.conv1 = nn.Conv2D(features,
features,
kernel_size=3,
stride=1,
padding=1,
bias_attr=True)
self.conv2 = nn.Conv2D(features,
features,
kernel_size=3,
stride=1,
padding=1,
bias_attr=True)
self.conv1 = nn.Conv2D(
features,
features,
kernel_size=3,
stride=1,
padding=1,
bias_attr=True)
self.conv2 = nn.Conv2D(
features,
features,
kernel_size=3,
stride=1,
padding=1,
bias_attr=True)
self.relu = nn.ReLU()
@ -133,6 +138,7 @@ class ResidualConvUnit(nn.Layer):
class FeatureFusionBlock(nn.Layer):
"""Feature fusion block.
"""
def __init__(self, features):
"""Init.
@ -156,9 +162,7 @@ class FeatureFusionBlock(nn.Layer):
output += self.resConfUnit1(xs[1])
output = self.resConfUnit2(output)
output = nn.functional.interpolate(output,
scale_factor=2,
mode="bilinear",
align_corners=True)
output = nn.functional.interpolate(
output, scale_factor=2, mode="bilinear", align_corners=True)
return output

@ -22,6 +22,7 @@ class BaseModel(paddle.nn.Layer):
class MidasNet(BaseModel):
"""Network for monocular depth estimation.
"""
def __init__(self, path=None, features=256, non_negative=True):
"""Init.
@ -47,11 +48,15 @@ class MidasNet(BaseModel):
self.scratch.refinenet1 = FeatureFusionBlock(features)
output_conv = [
nn.Conv2D(features, 128, kernel_size=3, stride=1, padding=1),
nn.Upsample(scale_factor=2, mode="bilinear"),
nn.Conv2D(128, 32, kernel_size=3, stride=1, padding=1),
nn.Conv2D(
features, 128, kernel_size=3, stride=1, padding=1),
nn.Upsample(
scale_factor=2, mode="bilinear"),
nn.Conv2D(
128, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2D(32, 1, kernel_size=1, stride=1, padding=0),
nn.Conv2D(
32, 1, kernel_size=1, stride=1, padding=0),
nn.ReLU() if non_negative else nn.Identity(),
]
if non_negative:

@ -8,6 +8,7 @@ import math
class Resize(object):
"""Resize sample to given size (width, height).
"""
def __init__(self,
width,
height,
@ -96,15 +97,15 @@ class Resize(object):
f"resize_method {self.__resize_method} not implemented")
if self.__resize_method == "lower_bound":
new_height = self.constrain_to_multiple_of(scale_height * height,
min_val=self.__height)
new_width = self.constrain_to_multiple_of(scale_width * width,
min_val=self.__width)
new_height = self.constrain_to_multiple_of(
scale_height * height, min_val=self.__height)
new_width = self.constrain_to_multiple_of(
scale_width * width, min_val=self.__width)
elif self.__resize_method == "upper_bound":
new_height = self.constrain_to_multiple_of(scale_height * height,
max_val=self.__height)
new_width = self.constrain_to_multiple_of(scale_width * width,
max_val=self.__width)
new_height = self.constrain_to_multiple_of(
scale_height * height, max_val=self.__height)
new_width = self.constrain_to_multiple_of(
scale_width * width, max_val=self.__width)
elif self.__resize_method == "minimal":
new_height = self.constrain_to_multiple_of(scale_height * height)
new_width = self.constrain_to_multiple_of(scale_width * width)
@ -122,26 +123,24 @@ class Resize(object):
sample["image"] = cv2.resize(
sample["image"],
(width, height),
interpolation=self.__image_interpolation_method,
)
interpolation=self.__image_interpolation_method, )
if self.__resize_target:
if "disparity" in sample:
sample["disparity"] = cv2.resize(
sample["disparity"],
(width, height),
interpolation=cv2.INTER_NEAREST,
)
interpolation=cv2.INTER_NEAREST, )
if "depth" in sample:
sample["depth"] = cv2.resize(sample["depth"], (width, height),
interpolation=cv2.INTER_NEAREST)
sample["depth"] = cv2.resize(
sample["depth"], (width, height),
interpolation=cv2.INTER_NEAREST)
sample["mask"] = cv2.resize(
sample["mask"].astype(np.float32),
(width, height),
interpolation=cv2.INTER_NEAREST,
)
interpolation=cv2.INTER_NEAREST, )
sample["mask"] = sample["mask"].astype(bool)
return sample
@ -150,6 +149,7 @@ class Resize(object):
class NormalizeImage(object):
"""Normlize image by given mean and std.
"""
def __init__(self, mean, std):
self.__mean = mean
self.__std = std
@ -163,6 +163,7 @@ class NormalizeImage(object):
class PrepareForNet(object):
"""Prepare sample for usage as network input.
"""
def __init__(self):
pass

@ -26,8 +26,8 @@ def write_pfm(path, image, scale=1):
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif (len(image.shape) == 2
or len(image.shape) == 3 and image.shape[2] == 1): # greyscale
elif (len(image.shape) == 2 or len(image.shape) == 3 and
image.shape[2] == 1): # greyscale
color = False
else:
raise Exception(

@ -91,10 +91,10 @@ class MPRPredictor(BasePredictor):
def get_images(self, images_path):
if os.path.isdir(images_path):
return natsorted(
glob(os.path.join(images_path, '*.jpg')) +
glob(os.path.join(images_path, '*.JPG')) +
glob(os.path.join(images_path, '*.png')) +
glob(os.path.join(images_path, '*.PNG')))
glob(os.path.join(images_path, '*.jpg')) + glob(
os.path.join(images_path, '*.JPG')) + glob(
os.path.join(images_path, '*.png')) + glob(
os.path.join(images_path, '*.PNG')))
else:
return [images_path]

@ -16,7 +16,7 @@ import os
import numpy as np
from PIL import Image
import paddle
import paddle
from ppgan.models.generators import PAN
from ppgan.utils.download import get_path_from_url
from ppgan.utils.logger import get_logger
@ -25,21 +25,25 @@ from .base_predictor import BasePredictor
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/pan_x4.pdparams'
class PANPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
self.output = os.path.join(output, 'PAN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
self.model = PAN(3, 3, 40, 24, 16) # 实例化模型
self.output = os.path.join(output,
'PAN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
self.model = PAN(3, 3, 40, 24, 16) # 实例化模型
if weight_path is None:
weight_path = get_path_from_url(REALSR_WEIGHT_URL)
state_dict = paddle.load(weight_path) #加载权重
state_dict = state_dict['generator']
state_dict = paddle.load(weight_path) #加载权重
state_dict = state_dict['generator']
self.model.load_dict(state_dict)
self.model.eval()
# 标准化
def norm(self, img):
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0
return img.astype('float32')
# 去标准化
def denorm(self, img):
img = img.transpose((1, 2, 0))
@ -54,14 +58,13 @@ class PANPredictor(BasePredictor):
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img) #图像标准化
x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
img = self.norm(ori_img) #图像标准化
x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
with paddle.no_grad():
out = self.model(x)
pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
pred_img = Image.fromarray(pred_img) # array转图像
pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
pred_img = Image.fromarray(pred_img) # array转图像
return pred_img
#输入图像文件路径
@ -70,17 +73,16 @@ class PANPredictor(BasePredictor):
if not os.path.exists(self.output):
os.makedirs(self.output)
pred_img = self.run_image(input) #对输入的图片进行预测
pred_img = self.run_image(input) #对输入的图片进行预测
out_path = None
if self.output:
try:
base_name = os.path.splitext(os.path.basename(input))[0]
except:
base_name = 'result'
out_path = os.path.join(self.output, base_name + '.png') #保存路径
pred_img.save(out_path) #保存输出图片
out_path = os.path.join(self.output, base_name + '.png') #保存路径
pred_img.save(out_path) #保存输出图片
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
return pred_img, out_path

@ -27,6 +27,7 @@ from .preprocess.builder import build_transforms
class AnimeGANV2Dataset(paddle.io.Dataset):
"""
"""
def __init__(self,
dataroot,
style,
@ -50,18 +51,14 @@ class AnimeGANV2Dataset(paddle.io.Dataset):
self.anime_root = os.path.join(self.root, f'{self.style}', 'style')
self.smooth_root = os.path.join(self.root, f'{self.style}', 'smooth')
self.real = ImageFolder(self.real_root,
transform=self.transform_real,
loader=self.loader)
self.anime = ImageFolder(self.anime_root,
transform=self.transform_anime,
loader=self.loader)
self.anime_gray = ImageFolder(self.anime_root,
transform=self.transform_gray,
loader=self.loader)
self.smooth_gray = ImageFolder(self.smooth_root,
transform=self.transform_gray,
loader=self.loader)
self.real = ImageFolder(
self.real_root, transform=self.transform_real, loader=self.loader)
self.anime = ImageFolder(
self.anime_root, transform=self.transform_anime, loader=self.loader)
self.anime_gray = ImageFolder(
self.anime_root, transform=self.transform_gray, loader=self.loader)
self.smooth_gray = ImageFolder(
self.smooth_root, transform=self.transform_gray, loader=self.loader)
self.sizes = [
len(fold) for fold in [self.real, self.anime, self.smooth_gray]
]
@ -70,8 +67,9 @@ class AnimeGANV2Dataset(paddle.io.Dataset):
@staticmethod
def loader(path):
return cv2.cvtColor(cv2.imread(path, flags=cv2.IMREAD_COLOR),
cv2.COLOR_BGR2RGB)
return cv2.cvtColor(
cv2.imread(
path, flags=cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
def reshuffle(self):
indexs = []

@ -57,9 +57,8 @@ def scandir(dir_path, suffix=None, recursive=False):
yield rel_path
else:
if recursive:
yield from _scandir(entry.path,
suffix=suffix,
recursive=recursive)
yield from _scandir(
entry.path, suffix=suffix, recursive=recursive)
else:
continue
@ -79,6 +78,7 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
preprocess (list[dict]): A sequence of data preprocess config.
"""
def __init__(self, preprocess=None):
super(BaseDataset, self).__init__()

@ -23,6 +23,7 @@ from .builder import DATASETS
@DATASETS.register()
class SRDataset(BaseDataset):
"""Base super resulotion dataset for image restoration."""
def __init__(self,
lq_folder,
gt_folder,

@ -48,21 +48,24 @@ def build_dataloader(cfg, is_train=True, distributed=True):
dataset = build_dataset(cfg_)
if distributed:
sampler = DistributedBatchSampler(dataset,
batch_size=batch_size,
shuffle=True if is_train else False,
drop_last=True if is_train else False)
sampler = DistributedBatchSampler(
dataset,
batch_size=batch_size,
shuffle=True if is_train else False,
drop_last=True if is_train else False)
dataloader = paddle.io.DataLoader(dataset,
batch_sampler=sampler,
num_workers=num_workers,
use_shared_memory=use_shared_memory)
dataloader = paddle.io.DataLoader(
dataset,
batch_sampler=sampler,
num_workers=num_workers,
use_shared_memory=use_shared_memory)
else:
dataloader = paddle.io.DataLoader(dataset,
batch_size=batch_size,
shuffle=True if is_train else False,
drop_last=True if is_train else False,
use_shared_memory=use_shared_memory,
num_workers=num_workers)
dataloader = paddle.io.DataLoader(
dataset,
batch_size=batch_size,
shuffle=True if is_train else False,
drop_last=True if is_train else False,
use_shared_memory=use_shared_memory,
num_workers=num_workers)
return dataloader

@ -25,6 +25,7 @@ class CommonVisionDataset(paddle.io.Dataset):
"""
Dataset for using paddle vision default datasets, such as mnist, flowers.
"""
def __init__(self,
dataset_name,
transforms=None,

@ -48,13 +48,13 @@ class FirstOrderDataset(Dataset):
file_idx_set = list(file_idx_set)
if len(file_idx_set) != 0:
if POOL_SIZE == 0:
for idx in tqdm.tqdm(file_idx_set,
desc='Extracting frames'):
for idx in tqdm.tqdm(
file_idx_set, desc='Extracting frames'):
_ = self.frameDataset[idx]
else:
# multiprocessing
bar = tqdm.tqdm(total=len(file_idx_set),
desc='Extracting frames')
bar = tqdm.tqdm(
total=len(file_idx_set), desc='Extracting frames')
with Pool(POOL_SIZE) as pl:
_p = 0
while _p <= len(file_idx_set) - 1:
@ -90,10 +90,10 @@ def read_video(name: Path, frame_shape=tuple([256, 256, 3]), saveto='folder'):
- folder with videos
"""
if name.is_dir():
frames = sorted(name.iterdir(),
key=lambda x: int(x.with_suffix('').name))
video_array = np.array([imread(path) for path in frames],
dtype='float32')
frames = sorted(
name.iterdir(), key=lambda x: int(x.with_suffix('').name))
video_array = np.array(
[imread(path) for path in frames], dtype='float32')
return video_array
elif name.suffix.lower() in ['.gif', '.mp4', '.mov']:
try:
@ -123,7 +123,8 @@ def read_video(name: Path, frame_shape=tuple([256, 256, 3]), saveto='folder'):
except FileExistsError:
pass
for idx, img in enumerate(video_array_reshape):
cv2.imwrite(str(sub_dir.joinpath('%i.png' % idx)), img[:,:,[2,1,0]])
cv2.imwrite(
str(sub_dir.joinpath('%i.png' % idx)), img[:, :, [2, 1, 0]])
name.unlink()
return video_array_reshape
else:
@ -138,6 +139,7 @@ class FramesDataset(Dataset):
- folder with all frames
FramesDataset[i]: obtain sample from i-th video in self.videos
"""
def __init__(self, cfg):
self.root_dir = Path(cfg['dataroot'])
self.videos = None
@ -161,8 +163,8 @@ class FramesDataset(Dataset):
else:
train_videos = list(self.root_dir.joinpath('train').iterdir())
test_videos = list(self.root_dir.joinpath('test').iterdir())
self.root_dir = self.root_dir.joinpath(
'train' if self.is_train else 'test')
self.root_dir = self.root_dir.joinpath('train'
if self.is_train else 'test')
if self.is_train:
self.videos = train_videos
@ -184,23 +186,22 @@ class FramesDataset(Dataset):
path = self.videos[idx]
video_name = path.name
if self.is_train and path.is_dir():
frames = sorted(path.iterdir(),
key=lambda x: int(x.with_suffix('').name))
frames = sorted(
path.iterdir(), key=lambda x: int(x.with_suffix('').name))
num_frames = len(frames)
frame_idx = np.sort(
np.random.choice(num_frames, replace=True, size=2))
np.random.choice(
num_frames, replace=True, size=2))
video_array = [imread(str(frames[idx])) for idx in frame_idx]
else:
if self.create_frames_folder:
video_array = read_video(path,
frame_shape=self.frame_shape,
saveto='folder')
video_array = read_video(
path, frame_shape=self.frame_shape, saveto='folder')
self.videos[idx] = path.with_suffix(
'') # rename /xx/xx/xx.gif -> /xx/xx/xx
else:
video_array = read_video(path,
frame_shape=self.frame_shape,
saveto=None)
video_array = read_video(
path, frame_shape=self.frame_shape, saveto=None)
num_frames = len(video_array)
frame_idx = np.sort(
np.random.choice(
@ -220,13 +221,14 @@ class FramesDataset(Dataset):
if self.is_train:
if self.transform is not None: #modify
t = self.transform(tuple(video_array))
out['driving'] = t[0].transpose(2, 0, 1).astype(
np.float32) / 255.0
out['source'] = t[1].transpose(2, 0, 1).astype(
np.float32) / 255.0
out['driving'] = t[0].transpose(2, 0,
1).astype(np.float32) / 255.0
out['source'] = t[1].transpose(2, 0,
1).astype(np.float32) / 255.0
else:
source = np.array(video_array[0],
dtype='float32') / 255.0 # shape is [H, W, C]
source = np.array(
video_array[0],
dtype='float32') / 255.0 # shape is [H, W, C]
driving = np.array(
video_array[1],
dtype='float32') / 255.0 # shape is [H, W, C]
@ -250,6 +252,7 @@ class DatasetRepeater(Dataset):
"""
Pass several times over the same dataset for better i/o performance
"""
def __init__(self, dataset, num_repeats=100):
self.dataset = dataset
self.num_repeats = num_repeats

@ -70,8 +70,8 @@ class ImageFolder(Dataset):
imgs = make_dataset(root)
if len(imgs) == 0:
raise (RuntimeError("Found 0 images in: " + root + "\n"
"Supported image extensions are: " +
",".join(IMG_EXTENSIONS)))
"Supported image extensions are: " + ",".join(
IMG_EXTENSIONS)))
self.root = root
self.imgs = imgs

@ -20,6 +20,7 @@ from .base_dataset import BaseDataset
class PairedDataset(BaseDataset):
"""A dataset class for paired image dataset.
"""
def __init__(self, dataroot, preprocess):
"""Initialize this dataset class.

@ -1,8 +1,7 @@
from .io import LoadImageFromFile, ReadImageSequence, GetNeighboringFramesIdx
from .transforms import (PairedRandomCrop, PairedRandomHorizontalFlip,
PairedRandomVerticalFlip, PairedRandomTransposeHW,
SRPairedRandomCrop, SplitPairedImage, SRNoise,
NormalizeSequence, MirrorVideoSequence,
TransposeSequence)
from .transforms import (
PairedRandomCrop, PairedRandomHorizontalFlip, PairedRandomVerticalFlip,
PairedRandomTransposeHW, SRPairedRandomCrop, SplitPairedImage, SRNoise,
NormalizeSequence, MirrorVideoSequence, TransposeSequence)
from .builder import build_preprocess

@ -35,6 +35,7 @@ class Compose(object):
object will call each given :attr:`transforms` sequencely.
"""
def __init__(self, functions):
self.functions = functions

@ -18,6 +18,7 @@ class LoadImageFromFile(object):
`datas` dict with name of `f'ori_{key}'`. Default: False.
kwargs (dict): Args for file client.
"""
def __init__(self,
key='image',
flag=-1,
@ -74,6 +75,7 @@ class ReadImageSequence(LoadImageFromFile):
`datas` dict with name of `f'ori_{key}'`. Default: False.
kwargs (dict): Args for file client.
"""
def __call__(self, datas):
"""Call function.
@ -130,6 +132,7 @@ class GetNeighboringFramesIdx:
sequence. Default: 0.
filename_tmpl (str): Template for file name. Default: '{:08d}.png'.
"""
def __init__(self, interval_list, start_idx=0, filename_tmpl='{:08d}.png'):
self.interval_list = interval_list
self.filename_tmpl = filename_tmpl

@ -61,8 +61,8 @@ class Transforms():
data = tuple(data)
for transform in self.transforms:
data = transform(data)
if hasattr(transform, 'params') and isinstance(
transform.params, dict):
if hasattr(transform, 'params') and isinstance(transform.params,
dict):
datas.update(transform.params)
if len(self.input_keys) > 1:
@ -176,6 +176,7 @@ class PairedRandomTransposeHW(T.BaseTransform):
prob (float): The propability to transpose the images.
keys (list[str]): The images to be transposed.
"""
def __init__(self, prob=0.5, keys=None):
self.keys = keys
self.prob = prob
@ -220,6 +221,7 @@ class TransposeSequence(T.Transpose):
fake_img_seq = transform(fake_img_seq)
"""
def _apply_image(self, img):
if isinstance(img, list):
imgs = []
@ -277,6 +279,7 @@ class NormalizeSequence(T.Normalize):
fake_img_seq = normalize_seq(fake_img_seq)
"""
def _apply_image(self, img):
if isinstance(img, list):
imgs = [
@ -302,6 +305,7 @@ class SRPairedRandomCrop(T.BaseTransform):
scale (int): model upscale factor.
gt_patch_size (int): cropped gt patch size.
"""
def __init__(self, scale, gt_patch_size, scale_list=False, keys=None):
self.gt_patch_size = gt_patch_size
self.scale = scale
@ -339,16 +343,16 @@ class SRPairedRandomCrop(T.BaseTransform):
]
top_gt, left_gt = int(top * scale), int(left * scale)
gt = [
v[top_gt:top_gt + self.gt_patch_size,
left_gt:left_gt + self.gt_patch_size, ...] for v in gt
v[top_gt:top_gt + self.gt_patch_size, left_gt:left_gt +
self.gt_patch_size, ...] for v in gt
]
else:
# crop lq patch
lq = lq[top:top + lq_patch_size, left:left + lq_patch_size, ...]
# crop corresponding gt patch
top_gt, left_gt = int(top * scale), int(left * scale)
gt = gt[top_gt:top_gt + self.gt_patch_size,
left_gt:left_gt + self.gt_patch_size, ...]
gt = gt[top_gt:top_gt + self.gt_patch_size, left_gt:left_gt +
self.gt_patch_size, ...]
if self.scale_list and self.scale == 4:
lqx2 = F.resize(gt, (lq_patch_size * 2, lq_patch_size * 2),
@ -368,14 +372,14 @@ class SRNoise(T.BaseTransform):
noise_path (str): directory of noise image.
size (int): cropped noise patch size.
"""
def __init__(self, noise_path, size, keys=None):
self.noise_path = noise_path
self.noise_imgs = sorted(glob.glob(noise_path + '*.png'))
self.size = size
self.keys = keys
self.transform = T.Compose([
T.RandomCrop(size),
T.Transpose(),
T.RandomCrop(size), T.Transpose(),
T.Normalize([0., 0., 0.], [255., 255., 255.])
])
@ -396,6 +400,7 @@ class RandomResizedCropProb(T.RandomResizedCrop):
prob (float): probabilty of using random-resized cropping.
size (int): cropped size.
"""
def __init__(self, prob, size, scale, ratio, interpolation, keys=None):
super().__init__(size, scale, ratio, interpolation)
self.prob = prob
@ -480,21 +485,14 @@ class ResizeToScale(T.BaseTransform):
@TRANSFORMS.register()
class PairedColorJitter(T.BaseTransform):
def __init__(self,
brightness=0,
contrast=0,
saturation=0,
hue=0,
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,
keys=None):
super().__init__(keys=keys)
self.brightness = T.transforms._check_input(brightness, 'brightness')
self.contrast = T.transforms._check_input(contrast, 'contrast')
self.saturation = T.transforms._check_input(saturation, 'saturation')
self.hue = T.transforms._check_input(hue,
'hue',
center=0,
bound=(-0.5, 0.5),
clip_first_on_zero=False)
self.hue = T.transforms._check_input(
hue, 'hue', center=0, bound=(-0.5, 0.5), clip_first_on_zero=False)
def _get_params(self, input):
"""Get a randomized transform to be applied on image.
@ -545,6 +543,7 @@ class MirrorVideoSequence:
Args:
keys (list[str]): The frame lists to be extended.
"""
def __init__(self, keys=None):
self.keys = keys

@ -20,6 +20,7 @@ from .builder import DATASETS
class SingleDataset(BaseDataset):
"""
"""
def __init__(self, dataroot, preprocess):
"""Initialize single dataset class.

@ -107,6 +107,7 @@ class ReferenceDataset(Dataset):
class StarGANv2Dataset(BaseDataset):
"""
"""
def __init__(self, dataroot, is_train, preprocess, test_count=0):
"""Initialize single dataset class.
@ -125,10 +126,10 @@ class StarGANv2Dataset(BaseDataset):
else:
files = os.listdir(self.dataroot)
if 'src' in files and 'ref' in files:
self.src_loader = ImageFolder(os.path.join(
self.dataroot, 'src'))
self.ref_loader = ImageFolder(os.path.join(
self.dataroot, 'ref'))
self.src_loader = ImageFolder(
os.path.join(self.dataroot, 'src'))
self.ref_loader = ImageFolder(
os.path.join(self.dataroot, 'ref'))
else:
self.src_loader = ImageFolder(self.dataroot)
self.ref_loader = ImageFolder(self.dataroot)

@ -23,6 +23,7 @@ from .builder import DATASETS
class UnpairedDataset(BaseDataset):
"""
"""
def __init__(self, dataroot_a, dataroot_b, max_size, is_train, preprocess):
"""Initialize unpaired dataset class.

@ -29,6 +29,7 @@ from ..utils.filesystem import makedirs, save, load
from ..utils.timer import TimeAverager
from ..utils.profiler import add_profiler_step
class IterLoader:
def __init__(self, dataloader):
self._dataloader = dataloader
@ -71,6 +72,7 @@ class Trainer:
# | ||
# save checkpoint (model.nets) \/
"""
def __init__(self, cfg):
# base config
self.logger = logging.getLogger(__name__)
@ -220,8 +222,8 @@ class Trainer:
def test(self):
if not hasattr(self, 'test_dataloader'):
self.test_dataloader = build_dataloader(self.cfg.dataset.test,
is_train=False)
self.test_dataloader = build_dataloader(
self.cfg.dataset.test, is_train=False)
iter_loader = IterLoader(self.test_dataloader)
if self.max_eval_steps is None:
self.max_eval_steps = len(self.test_dataloader)
@ -235,9 +237,8 @@ class Trainer:
for i in range(self.max_eval_steps):
if self.max_eval_steps < self.log_interval or i % self.log_interval == 0:
self.logger.info('Test iter: [%d/%d]' %
(i * self.world_size,
self.max_eval_steps * self.world_size))
self.logger.info('Test iter: [%d/%d]' % (
i * self.world_size, self.max_eval_steps * self.world_size))
data = next(iter_loader)
self.model.setup_input(data)
@ -248,8 +249,8 @@ class Trainer:
current_paths = self.model.get_image_paths()
current_visuals = self.model.get_current_visuals()
if len(current_visuals) > 0 and list(
current_visuals.values())[0].shape == 4:
if len(current_visuals) > 0 and list(current_visuals.values())[
0].shape == 4:
num_samples = list(current_visuals.values())[0].shape[0]
else:
num_samples = 1
@ -267,10 +268,11 @@ class Trainer:
else:
visual_results.update({name: img_tensor})
self.visual('visual_test',
visual_results=visual_results,
step=self.batch_id,
is_save_image=True)
self.visual(
'visual_test',
visual_results=visual_results,
step=self.batch_id,
is_save_image=True)
if self.metrics:
for metric_name, metric in self.metrics.items():
@ -398,9 +400,9 @@ class Trainer:
try:
if self.by_epoch:
checkpoint_name_to_be_removed = os.path.join(
self.output_dir, 'epoch_%s_%s.pdparams' %
((epoch - keep * self.weight_interval) //
self.iters_per_epoch, name))
self.output_dir, 'epoch_%s_%s.pdparams' % (
(epoch - keep * self.weight_interval) //
self.iters_per_epoch, name))
else:
checkpoint_name_to_be_removed = os.path.join(
self.output_dir, 'iter_%s_%s.pdparams' %
@ -432,8 +434,8 @@ class Trainer:
for net_name, net in self.model.nets.items():
if net_name in state_dicts:
net.set_state_dict(state_dicts[net_name])
self.logger.info(
'Loaded pretrained weight for net {}'.format(net_name))
self.logger.info('Loaded pretrained weight for net {}'.format(
net_name))
else:
self.logger.warning(
'Can not find state dict of net {}. Skip load pretrained weight for net {}'

@ -56,7 +56,8 @@ def align(image, lms):
# rotation angle
left_eye_corner = lms[36]
right_eye_corner = lms[45]
radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) / (left_eye_corner[0] - right_eye_corner[0]))
radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) /
(left_eye_corner[0] - right_eye_corner[0]))
# image size after rotating
height, width, _ = image.shape
@ -73,7 +74,8 @@ def align(image, lms):
M = np.array([[cos, sin, (1 - cos) * width / 2. - sin * height / 2. + Tx],
[-sin, cos, sin * width / 2. + (1 - cos) * height / 2. + Ty]])
image_rotate = cv2.warpAffine(image, M, (new_w, new_h), borderValue=(255, 255, 255))
image_rotate = cv2.warpAffine(
image, M, (new_w, new_h), borderValue=(255, 255, 255))
landmarks = np.concatenate([lms, np.ones((lms.shape[0], 1))], axis=1)
landmarks_rotate = np.dot(M, landmarks.T).T
@ -99,7 +101,8 @@ def crop(image, lms):
top -= ((right - left) - (bottom - top)) // 2
bottom = top + (right - left)
image_crop = np.ones((bottom - top + 1, right - left + 1, 3), np.uint8) * 255
image_crop = np.ones((bottom - top + 1, right - left + 1, 3),
np.uint8) * 255
h, w = image.shape[:2]
left_white = max(0, -left)
@ -111,5 +114,6 @@ def crop(image, lms):
bottom = min(bottom, h - 1)
bottom_white = top_white + (bottom - top)
image_crop[top_white:bottom_white+1, left_white:right_white+1] = image[top:bottom+1, left:right+1].copy()
image_crop[top_white:bottom_white + 1, left_white:right_white + 1] = image[
top:bottom + 1, left:right + 1].copy()
return image_crop

@ -65,7 +65,8 @@ def batch_detect(net, img_batch):
ymax = pred[:, 2:3]
locs = np.concatenate((xmin, ymin, xmax, ymax), axis=1)
bboxlists.append(
np.concatenate((locs * orig_size + shift, scores), axis=1))
np.concatenate(
(locs * orig_size + shift, scores), axis=1))
return bboxlists

@ -22,18 +22,19 @@ class BlazeBlock(nn.Layer):
padding = (kernel_size - 1) // 2
self.convs = nn.Sequential(
nn.Conv2D(in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=in_channels),
nn.Conv2D(in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0),
)
nn.Conv2D(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=in_channels),
nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0), )
self.act = nn.ReLU()
@ -52,6 +53,7 @@ class BlazeBlock(nn.Layer):
class BlazeFace(nn.Layer):
"""The BlazeFace face detection model.
"""
def __init__(self):
super(BlazeFace, self).__init__()
@ -70,32 +72,34 @@ class BlazeFace(nn.Layer):
def _define_layers(self):
self.backbone1 = nn.Sequential(
nn.Conv2D(in_channels=3,
out_channels=24,
kernel_size=5,
stride=2,
padding=0),
nn.Conv2D(
in_channels=3,
out_channels=24,
kernel_size=5,
stride=2,
padding=0),
nn.ReLU(),
BlazeBlock(24, 24),
BlazeBlock(24, 28),
BlazeBlock(28, 32, stride=2),
BlazeBlock(
28, 32, stride=2),
BlazeBlock(32, 36),
BlazeBlock(36, 42),
BlazeBlock(42, 48, stride=2),
BlazeBlock(
42, 48, stride=2),
BlazeBlock(48, 56),
BlazeBlock(56, 64),
BlazeBlock(64, 72),
BlazeBlock(72, 80),
BlazeBlock(80, 88),
)
BlazeBlock(80, 88), )
self.backbone2 = nn.Sequential(
BlazeBlock(88, 96, stride=2),
BlazeBlock(96, 96),
BlazeBlock(
88, 96, stride=2),
BlazeBlock(96, 96),
BlazeBlock(96, 96),
BlazeBlock(96, 96),
)
BlazeBlock(96, 96), )
self.classifier_8 = nn.Conv2D(88, 2, 1)
self.classifier_16 = nn.Conv2D(96, 6, 1)
@ -240,8 +244,8 @@ class BlazeFace(nn.Layer):
output_detections = []
for i in range(raw_box_tensor.shape[0]):
boxes = paddle.to_tensor(detection_boxes[i, mask[i]])
scores = paddle.to_tensor(
detection_scores[i, mask[i]]).unsqueeze(axis=-1)
scores = paddle.to_tensor(detection_scores[i, mask[i]]).unsqueeze(
axis=-1)
output_detections.append(paddle.concat((boxes, scores), axis=-1))
return output_detections
@ -296,8 +300,8 @@ class BlazeFace(nn.Layer):
first_box = detection[:4]
other_boxes = detections[remaining, :4]
ious = overlap_similarity(paddle.to_tensor(first_box),
paddle.to_tensor(other_boxes))
ious = overlap_similarity(
paddle.to_tensor(first_box), paddle.to_tensor(other_boxes))
mask = ious > self.min_suppression_threshold
mask = mask.numpy()

@ -95,8 +95,8 @@ def batch_detect(net, imgs):
box = batch_decode(paddle.to_tensor(loc), priors, variances)
box = box[:, 0] * 1.0
bboxlist.append(
paddle.concat([box, paddle.to_tensor(score).unsqueeze(1)],
1).numpy())
paddle.concat([box, paddle.to_tensor(score).unsqueeze(1)], 1)
.numpy())
bboxlist = np.array(bboxlist)
if 0 == len(bboxlist):
bboxlist = np.zeros((1, BB, 5))

@ -23,8 +23,8 @@ class L2Norm(nn.Layer):
self.n_channels = n_channels
self.scale = scale
self.eps = 1e-10
self.weight = paddle.create_parameter(shape=[self.n_channels],
dtype='float32')
self.weight = paddle.create_parameter(
shape=[self.n_channels], dtype='float32')
self.weight.set_value(paddle.zeros([self.n_channels]) + self.scale)
def forward(self, x):
@ -67,67 +67,31 @@ class s3fd(nn.Layer):
self.conv4_3_norm = L2Norm(512, scale=8)
self.conv5_3_norm = L2Norm(512, scale=5)
self.conv3_3_norm_mbox_conf = nn.Conv2D(256,
4,
kernel_size=3,
stride=1,
padding=1)
self.conv3_3_norm_mbox_loc = nn.Conv2D(256,
4,
kernel_size=3,
stride=1,
padding=1)
self.conv4_3_norm_mbox_conf = nn.Conv2D(512,
2,
kernel_size=3,
stride=1,
padding=1)
self.conv4_3_norm_mbox_loc = nn.Conv2D(512,
4,
kernel_size=3,
stride=1,
padding=1)
self.conv5_3_norm_mbox_conf = nn.Conv2D(512,
2,
kernel_size=3,
stride=1,
padding=1)
self.conv5_3_norm_mbox_loc = nn.Conv2D(512,
4,
kernel_size=3,
stride=1,
padding=1)
self.fc7_mbox_conf = nn.Conv2D(1024,
2,
kernel_size=3,
stride=1,
padding=1)
self.fc7_mbox_loc = nn.Conv2D(1024,
4,
kernel_size=3,
stride=1,
padding=1)
self.conv6_2_mbox_conf = nn.Conv2D(512,
2,
kernel_size=3,
stride=1,
padding=1)
self.conv6_2_mbox_loc = nn.Conv2D(512,
4,
kernel_size=3,
stride=1,
padding=1)
self.conv7_2_mbox_conf = nn.Conv2D(256,
2,
kernel_size=3,
stride=1,
padding=1)
self.conv7_2_mbox_loc = nn.Conv2D(256,
4,
kernel_size=3,
stride=1,
padding=1)
self.conv3_3_norm_mbox_conf = nn.Conv2D(
256, 4, kernel_size=3, stride=1, padding=1)
self.conv3_3_norm_mbox_loc = nn.Conv2D(
256, 4, kernel_size=3, stride=1, padding=1)
self.conv4_3_norm_mbox_conf = nn.Conv2D(
512, 2, kernel_size=3, stride=1, padding=1)
self.conv4_3_norm_mbox_loc = nn.Conv2D(
512, 4, kernel_size=3, stride=1, padding=1)
self.conv5_3_norm_mbox_conf = nn.Conv2D(
512, 2, kernel_size=3, stride=1, padding=1)
self.conv5_3_norm_mbox_loc = nn.Conv2D(
512, 4, kernel_size=3, stride=1, padding=1)
self.fc7_mbox_conf = nn.Conv2D(
1024, 2, kernel_size=3, stride=1, padding=1)
self.fc7_mbox_loc = nn.Conv2D(
1024, 4, kernel_size=3, stride=1, padding=1)
self.conv6_2_mbox_conf = nn.Conv2D(
512, 2, kernel_size=3, stride=1, padding=1)
self.conv6_2_mbox_loc = nn.Conv2D(
512, 4, kernel_size=3, stride=1, padding=1)
self.conv7_2_mbox_conf = nn.Conv2D(
256, 2, kernel_size=3, stride=1, padding=1)
self.conv7_2_mbox_loc = nn.Conv2D(
256, 4, kernel_size=3, stride=1, padding=1)
def forward(self, x):
h = F.relu(self.conv1_1(x))

@ -55,8 +55,8 @@ def crop(image, center, scale, resolution=256.0):
br = transform([resolution, resolution], center, scale, resolution, True)
br = br.numpy()
if image.ndim > 2:
newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]],
dtype=np.int32)
newDim = np.array(
[br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
newImg = np.zeros(newDim, dtype=np.uint8)
else:
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
@ -69,10 +69,10 @@ def crop(image, center, scale, resolution=256.0):
[max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
newImg[newY[0] - 1:newY[1],
newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[1],
oldX[0] - 1:oldX[1], :]
newImg = cv2.resize(newImg,
dsize=(int(resolution), int(resolution)),
interpolation=cv2.INTER_LINEAR)
newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[
1], oldX[0] - 1:oldX[1], :]
newImg = cv2.resize(
newImg,
dsize=(int(resolution), int(resolution)),
interpolation=cv2.INTER_LINEAR)
return newImg

@ -25,11 +25,7 @@ GPEN_weights = 'https://paddlegan.bj.bcebos.com/models/GPEN-512.pdparams'
class FaceEnhancement(object):
def __init__(self,
path_to_enhance=None,
size = 512,
batch_size=1
):
def __init__(self, path_to_enhance=None, size=512, batch_size=1):
super(FaceEnhancement, self).__init__()
# Initialise the face detector
@ -38,17 +34,19 @@ class FaceEnhancement(object):
model_weights = paddle.load(model_weights_path)
else:
model_weights = paddle.load(path_to_enhance)
self.face_enhance = GPEN(size=512, style_dim=512, n_mlp=8)
self.face_enhance.load_dict(model_weights)
self.face_enhance.eval()
self.size = size
self.mask = np.zeros((512, 512), np.float32)
cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, cv2.LINE_AA)
cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1,
cv2.LINE_AA)
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
self.mask = paddle.tile(paddle.to_tensor(self.mask).unsqueeze(0).unsqueeze(-1), repeat_times=[batch_size,1,1,3]).numpy()
self.mask = paddle.tile(
paddle.to_tensor(self.mask).unsqueeze(0).unsqueeze(-1),
repeat_times=[batch_size, 1, 1, 3]).numpy()
def enhance_from_image(self, img):
if isinstance(img, np.ndarray):
@ -65,14 +63,14 @@ class FaceEnhancement(object):
else:
assert img.shape[1:] == [3, 512, 512]
img_ori = img.transpose([0, 2, 3, 1]).numpy()
img_t = (img/255. - 0.5) / 0.5
img_t = (img / 255. - 0.5) / 0.5
with paddle.no_grad():
out, __ = self.face_enhance(img_t)
image_tensor = out * 0.5 + 0.5
image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB
image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB
image_numpy = paddle.clip(image_tensor, 0, 1) * 255.0
out = image_numpy.astype(np.uint8).cpu().numpy()
return out * self.mask + (1-self.mask) * img_ori
return out * self.mask + (1 - self.mask) * img_ori

@ -21,13 +21,13 @@ from paddle.utils.download import get_path_from_url
from .fcn import FCN
from .hrnet import HRNet_W18
BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/faceseg_FCN-HRNetW18.pdparams'
class FaceSeg:
def __init__(self):
save_pth = get_path_from_url(BISENET_WEIGHT_URL, osp.split(osp.realpath(__file__))[0])
save_pth = get_path_from_url(BISENET_WEIGHT_URL,
osp.split(osp.realpath(__file__))[0])
self.net = FCN(num_classes=2, backbone=HRNet_W18())
state_dict = paddle.load(save_pth)
@ -47,7 +47,8 @@ class FaceSeg:
return mask
def input_transform(self, image):
image_input = cv2.resize(image, (384, 384), interpolation=cv2.INTER_AREA)
image_input = cv2.resize(
image, (384, 384), interpolation=cv2.INTER_AREA)
image_input = (image_input / 255.)[np.newaxis, :, :, :]
image_input = np.transpose(image_input, (0, 3, 1, 2)).astype(np.float32)
image_input = paddle.to_tensor(image_input)

@ -45,6 +45,7 @@ class LPIPSMetric(paddle.metric.Metric):
Returns:
float: lpips result.
"""
def __init__(self, net='vgg', version='0.1', mean=None, std=None):
self.net = net
self.version = version
@ -76,10 +77,10 @@ class LPIPSMetric(paddle.metric.Metric):
for pred, gt in zip(preds, gts):
pred, gt = pred.astype(np.float32) / 255., gt.astype(
np.float32) / 255.
pred = paddle.vision.transforms.normalize(pred.transpose([2, 0, 1]),
self.mean, self.std)
gt = paddle.vision.transforms.normalize(gt.transpose([2, 0, 1]),
self.mean, self.std)
pred = paddle.vision.transforms.normalize(
pred.transpose([2, 0, 1]), self.mean, self.std)
gt = paddle.vision.transforms.normalize(
gt.transpose([2, 0, 1]), self.mean, self.std)
with paddle.no_grad():
value = self.loss_fn(
@ -110,11 +111,13 @@ def spatial_average(in_tens, keepdim=True):
# assumes scale factor is same for H and W
def upsample(in_tens, out_HW=(64, 64)):
in_H, in_W = in_tens.shape[2], in_tens.shape[3]
scale_factor_H, scale_factor_W = 1. * out_HW[0] / in_H, 1. * out_HW[1] / in_W
scale_factor_H, scale_factor_W = 1. * out_HW[0] / in_H, 1. * out_HW[
1] / in_W
return nn.Upsample(scale_factor=(scale_factor_H, scale_factor_W),
mode='bilinear',
align_corners=False)(in_tens)
return nn.Upsample(
scale_factor=(scale_factor_H, scale_factor_W),
mode='bilinear',
align_corners=False)(in_tens)
def normalize_tensor(in_feat, eps=1e-10):
@ -143,8 +146,8 @@ class LPIPS(nn.Layer):
if (verbose):
print(
'Setting up [%s] perceptual loss: trunk [%s], v[%s], spatial [%s]'
% ('LPIPS' if lpips else 'baseline', net, version,
'on' if spatial else 'off'))
% ('LPIPS' if lpips else 'baseline', net, version, 'on'
if spatial else 'off'))
self.pnet_type = net
self.pnet_tune = pnet_tune
@ -207,31 +210,35 @@ class LPIPS(nn.Layer):
feats0, feats1, diffs = {}, {}, {}
for kk in range(self.L):
feats0[kk], feats1[kk] = normalize_tensor(
outs0[kk]), normalize_tensor(outs1[kk])
feats0[kk], feats1[kk] = normalize_tensor(outs0[
kk]), normalize_tensor(outs1[kk])
diffs[kk] = (feats0[kk] - feats1[kk])**2
if (self.lpips):
if (self.spatial):
res = [
upsample(self.lins[kk].model(diffs[kk]),
out_HW=in0.shape[2:]) for kk in range(self.L)
upsample(
self.lins[kk].model(diffs[kk]), out_HW=in0.shape[2:])
for kk in range(self.L)
]
else:
res = [
spatial_average(self.lins[kk].model(diffs[kk]),
keepdim=True) for kk in range(self.L)
spatial_average(
self.lins[kk].model(diffs[kk]), keepdim=True)
for kk in range(self.L)
]
else:
if (self.spatial):
res = [
upsample(diffs[kk].sum(dim=1, keepdim=True),
out_HW=in0.shape[2:]) for kk in range(self.L)
upsample(
diffs[kk].sum(dim=1, keepdim=True),
out_HW=in0.shape[2:]) for kk in range(self.L)
]
else:
res = [
spatial_average(diffs[kk].sum(dim=1, keepdim=True),
keepdim=True) for kk in range(self.L)
spatial_average(
diffs[kk].sum(dim=1, keepdim=True), keepdim=True)
for kk in range(self.L)
]
val = res[0]
@ -251,8 +258,7 @@ class ScalingLayer(nn.Layer):
'shift',
paddle.to_tensor([-.030, -.088, -.188]).reshape([1, 3, 1, 1]))
self.register_buffer(
'scale',
paddle.to_tensor([.458, .448, .450]).reshape([1, 3, 1, 1]))
'scale', paddle.to_tensor([.458, .448, .450]).reshape([1, 3, 1, 1]))
def forward(self, inp):
return (inp - self.shift) / self.scale
@ -260,14 +266,14 @@ class ScalingLayer(nn.Layer):
class NetLinLayer(nn.Layer):
''' A single linear layer which does a 1x1 conv '''
def __init__(self, chn_in, chn_out=1, use_dropout=False):
super(NetLinLayer, self).__init__()
layers = [
nn.Dropout(),
] if (use_dropout) else []
layers = [nn.Dropout(), ] if (use_dropout) else []
layers += [
nn.Conv2D(chn_in, chn_out, 1, stride=1, padding=0, bias_attr=False),
nn.Conv2D(
chn_in, chn_out, 1, stride=1, padding=0, bias_attr=False),
]
self.model = nn.Sequential(*layers)

@ -170,9 +170,8 @@ def _ssim(img1, img2):
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
ssim_map = ((2 * mu1_mu2 + C1) *
(2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
(sigma1_sq + sigma2_sq + C2))
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
(mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
return ssim_map.mean()

@ -29,6 +29,7 @@ from ..utils.filesystem import load
class AnimeGANV2Model(BaseModel):
""" This class implements the AnimeGANV2 model.
"""
def __init__(self,
generator,
discriminator=None,
@ -126,10 +127,11 @@ class AnimeGANV2Model(BaseModel):
@staticmethod
def rgb2yuv(rgb):
kernel = paddle.to_tensor([[0.299, -0.14714119, 0.61497538],
[0.587, -0.28886916, -0.51496512],
[0.114, 0.43601035, -0.10001026]],
dtype='float32')
kernel = paddle.to_tensor(
[[0.299, -0.14714119, 0.61497538],
[0.587, -0.28886916, -0.51496512],
[0.114, 0.43601035, -0.10001026]],
dtype='float32')
rgb = paddle.transpose(rgb, (0, 2, 3, 1))
yuv = paddle.matmul(rgb, kernel)
return yuv

@ -49,6 +49,7 @@ class BaseModel(ABC):
# save checkpoint (model.nets) \/
"""
def __init__(self, params=None):
"""Initialize the BaseModel class.
@ -126,8 +127,8 @@ class BaseModel(ABC):
parameters = []
for net_name in net_names:
parameters += self.nets[net_name].parameters()
self.optimizers[opt_name] = build_optimizer(
cfg_, lr, parameters)
self.optimizers[opt_name] = build_optimizer(cfg_, lr,
parameters)
return self.optimizers
@ -187,17 +188,15 @@ class BaseModel(ABC):
inputs_num = 0
for net in export_model:
input_spec = [
paddle.static.InputSpec(shape=inputs_size[inputs_num + i],
dtype="float32")
paddle.static.InputSpec(
shape=inputs_size[inputs_num + i], dtype="float32")
for i in range(net["inputs_num"])
]
inputs_num = inputs_num + net["inputs_num"]
static_model = paddle.jit.to_static(self.nets[net["name"]],
input_spec=input_spec)
static_model = paddle.jit.to_static(
self.nets[net["name"]], input_spec=input_spec)
if output_dir is None:
output_dir = 'inference_model'
paddle.jit.save(
static_model,
os.path.join(
output_dir, '{}_{}'.format(self.__class__.__name__.lower(),
net["name"])))
paddle.jit.save(static_model,
os.path.join(output_dir, '{}_{}'.format(
self.__class__.__name__.lower(), net["name"])))

@ -29,6 +29,7 @@ class BasicVSRModel(BaseSRModel):
Paper: BasicVSR: The Search for Essential Components in Video Super-Resolution and Beyond, CVPR, 2021
"""
def __init__(self, generator, fix_iter, lr_mult, pixel_criterion=None):
"""Initialize the BasicVSR class.

@ -27,6 +27,7 @@ class GANLoss(nn.Layer):
The GANLoss class abstracts away the need to create the target label tensor
that has the same size as the input.
"""
def __init__(self,
gan_mode,
target_real_label=1.0,

@ -9,6 +9,7 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from ppgan.utils.download import get_path_from_url
from .builder import CRITERIONS
class ConvBlock(nn.Layer):
def __init__(self, input_channels, output_channels, groups, name=None):
super(ConvBlock, self).__init__()
@ -63,6 +64,7 @@ class ConvBlock(nn.Layer):
x = self._pool(x)
return x
class VGG19(nn.Layer):
def __init__(self, layers=19, class_dim=1000):
super(VGG19, self).__init__()
@ -88,13 +90,13 @@ class VGG19(nn.Layer):
self._drop = Dropout(p=0.5, mode="downscale_in_infer")
self._fc1 = Linear(
7 * 7 * 512,
4096,)
4096, )
self._fc2 = Linear(
4096,
4096,)
4096, )
self._out = Linear(
4096,
class_dim,)
class_dim, )
def forward(self, inputs):
features = []
@ -119,14 +121,16 @@ class VGG19(nn.Layer):
x = self._out(x)
return x, features
@CRITERIONS.register()
class PhotoPenPerceptualLoss(nn.Layer):
def __init__(self,
crop_size,
lambda_vgg,
# pretrained='test/vgg19pretrain.pdparams',
pretrained='https://paddlegan.bj.bcebos.com/models/vgg19pretrain.pdparams',
):
def __init__(
self,
crop_size,
lambda_vgg,
# pretrained='test/vgg19pretrain.pdparams',
pretrained='https://paddlegan.bj.bcebos.com/models/vgg19pretrain.pdparams',
):
super(PhotoPenPerceptualLoss, self).__init__()
self.model = VGG19()
weight_path = get_path_from_url(pretrained)
@ -136,7 +140,7 @@ class PhotoPenPerceptualLoss(nn.Layer):
self.rates = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0]
self.crop_size = crop_size
self.lambda_vgg = lambda_vgg
def forward(self, img_r, img_f):
img_r = F.interpolate(img_r, (self.crop_size, self.crop_size))
img_f = F.interpolate(img_f, (self.crop_size, self.crop_size))
@ -146,5 +150,5 @@ class PhotoPenPerceptualLoss(nn.Layer):
for i in range(len(feat_r)):
g_vggloss += self.rates[i] * nn.L1Loss()(feat_r[i], feat_f[i])
g_vggloss *= self.lambda_vgg
return g_vggloss

@ -31,6 +31,7 @@ class L1Loss():
loss_weight (float): Loss weight for L1 loss. Default: 1.0.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
# when loss weight less than zero return None
if loss_weight <= 0:
@ -59,6 +60,7 @@ class CharbonnierLoss():
eps (float): Default: 1e-12.
"""
def __init__(self, eps=1e-12, reduction='sum'):
self.eps = eps
self.reduction = reduction
@ -90,6 +92,7 @@ class MSELoss():
loss_weight (float): Loss weight for MSE loss. Default: 1.0.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
# when loss weight less than zero return None
if loss_weight <= 0:
@ -119,6 +122,7 @@ class BCEWithLogitsLoss():
Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'.
loss_weight (float): Loss weight for MSE loss. Default: 1.0.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
# when loss weight less than zero return None
if loss_weight <= 0:
@ -161,6 +165,7 @@ def calc_emd_loss(pred, target):
class CalcStyleEmdLoss():
"""Calc Style Emd Loss.
"""
def __init__(self):
super(CalcStyleEmdLoss, self).__init__()
@ -183,6 +188,7 @@ class CalcStyleEmdLoss():
class CalcContentReltLoss():
"""Calc Content Relt Loss.
"""
def __init__(self):
super(CalcContentReltLoss, self).__init__()
@ -207,6 +213,7 @@ class CalcContentReltLoss():
class CalcContentLoss():
"""Calc Content Loss.
"""
def __init__(self):
self.mse_loss = nn.MSELoss()
@ -221,14 +228,15 @@ class CalcContentLoss():
if (norm == False):
return self.mse_loss(pred, target)
else:
return self.mse_loss(mean_variance_norm(pred),
mean_variance_norm(target))
return self.mse_loss(
mean_variance_norm(pred), mean_variance_norm(target))
@CRITERIONS.register()
class CalcStyleLoss():
"""Calc Style Loss.
"""
def __init__(self):
self.mse_loss = nn.MSELoss()
@ -241,31 +249,31 @@ class CalcStyleLoss():
"""
pred_mean, pred_std = calc_mean_std(pred)
target_mean, target_std = calc_mean_std(target)
return self.mse_loss(pred_mean, target_mean) + self.mse_loss(
pred_std, target_std)
return self.mse_loss(pred_mean, target_mean) + self.mse_loss(pred_std,
target_std)
@CRITERIONS.register()
class EdgeLoss():
def __init__(self):
k = paddle.to_tensor([[.05, .25, .4, .25, .05]])
self.kernel = paddle.matmul(k.t(),k).unsqueeze(0).tile([3,1,1,1])
self.kernel = paddle.matmul(k.t(), k).unsqueeze(0).tile([3, 1, 1, 1])
self.loss = CharbonnierLoss()
def conv_gauss(self, img):
n_channels, _, kw, kh = self.kernel.shape
img = F.pad(img, [kw//2, kh//2, kw//2, kh//2], mode='replicate')
img = F.pad(img, [kw // 2, kh // 2, kw // 2, kh // 2], mode='replicate')
return F.conv2d(img, self.kernel, groups=n_channels)
def laplacian_kernel(self, current):
filtered = self.conv_gauss(current) # filter
down = filtered[:,:,::2,::2] # downsample
new_filter = paddle.zeros_like(filtered)
new_filter[:,:,::2,::2] = down*4 # upsample
filtered = self.conv_gauss(new_filter) # filter
filtered = self.conv_gauss(current) # filter
down = filtered[:, :, ::2, ::2] # downsample
new_filter = paddle.zeros_like(filtered)
new_filter[:, :, ::2, ::2] = down * 4 # upsample
filtered = self.conv_gauss(new_filter) # filter
diff = current - filtered
return diff
def __call__(self, x, y):
loss = self.loss(self.laplacian_kernel(x), self.laplacian_kernel(y))
return loss
return loss

@ -31,6 +31,7 @@ class CycleGANModel(BaseModel):
CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf
"""
def __init__(self,
generator,
discriminator=None,

@ -28,6 +28,7 @@ class DCGANModel(BaseModel):
This class implements the DCGAN model, for learning a distribution from input images.
DCGAN paper: https://arxiv.org/pdf/1511.06434
"""
def __init__(self, generator, discriminator=None, gan_criterion=None):
"""Initialize the DCGAN class.
Args:

@ -11,34 +11,34 @@ from ...modules.utils import spectral_norm
@DISCRIMINATORS.register()
class AnimeDiscriminator(nn.Layer):
def __init__(self, channel: int = 64, nblocks: int = 3) -> None:
def __init__(self, channel: int=64, nblocks: int=3) -> None:
super().__init__()
channel = channel // 2
last_channel = channel
f = [
spectral_norm(
nn.Conv2D(3, channel, 3, stride=1, padding=1, bias_attr=False)),
nn.Conv2D(
3, channel, 3, stride=1, padding=1, bias_attr=False)),
nn.LeakyReLU(0.2)
]
in_h = 256
for i in range(1, nblocks):
f.extend([
spectral_norm(
nn.Conv2D(last_channel,
channel * 2,
3,
stride=2,
padding=1,
bias_attr=False)),
nn.LeakyReLU(0.2),
spectral_norm(
nn.Conv2D(channel * 2,
channel * 4,
3,
stride=1,
padding=1,
bias_attr=False)),
nn.GroupNorm(1, channel * 4),
nn.Conv2D(
last_channel,
channel * 2,
3,
stride=2,
padding=1,
bias_attr=False)), nn.LeakyReLU(0.2), spectral_norm(
nn.Conv2D(
channel * 2,
channel * 4,
3,
stride=1,
padding=1,
bias_attr=False)), nn.GroupNorm(1, channel * 4),
nn.LeakyReLU(0.2)
])
last_channel = channel * 4
@ -49,15 +49,14 @@ class AnimeDiscriminator(nn.Layer):
self.head = nn.Sequential(*[
spectral_norm(
nn.Conv2D(last_channel,
channel * 2,
3,
stride=1,
padding=1,
bias_attr=False)),
nn.GroupNorm(1, channel * 2),
nn.LeakyReLU(0.2),
spectral_norm(
nn.Conv2D(
last_channel,
channel * 2,
3,
stride=1,
padding=1,
bias_attr=False)), nn.GroupNorm(1, channel * 2),
nn.LeakyReLU(0.2), spectral_norm(
nn.Conv2D(
channel * 2, 1, 3, stride=1, padding=1, bias_attr=False))
])

@ -31,6 +31,7 @@ class FirstOrderDiscriminator(nn.Layer):
loss_weights:
discriminator_gan (int): weight of discriminator loss
"""
def __init__(self, discriminator_cfg, common_params, train_params):
super(FirstOrderDiscriminator, self).__init__()
self.discriminator = MultiScaleDiscriminator(**discriminator_cfg,
@ -47,8 +48,8 @@ class FirstOrderDiscriminator(nn.Layer):
kp_driving = generated['kp_driving']
discriminator_maps_generated = self.discriminator(
pyramide_generated, kp=detach_kp(kp_driving))
discriminator_maps_real = self.discriminator(pyramide_real,
kp=detach_kp(kp_driving))
discriminator_maps_real = self.discriminator(
pyramide_real, kp=detach_kp(kp_driving))
loss_values = {}
value_total = 0
@ -66,6 +67,7 @@ class DownBlock2d(nn.Layer):
"""
Simple block for processing video (encoder).
"""
def __init__(self,
in_features,
out_features,
@ -74,16 +76,15 @@ class DownBlock2d(nn.Layer):
pool=False,
sn=False):
super(DownBlock2d, self).__init__()
self.conv = nn.Conv2D(in_features,
out_features,
kernel_size=kernel_size)
self.conv = nn.Conv2D(
in_features, out_features, kernel_size=kernel_size)
if sn:
self.conv = spectral_norm(self.conv)
else:
self.sn = None
if norm:
self.norm = nn.InstanceNorm2D(num_features=out_features,
epsilon=1e-05)
self.norm = nn.InstanceNorm2D(
num_features=out_features, epsilon=1e-05)
else:
self.norm = None
@ -117,19 +118,21 @@ class Discriminator(nn.Layer):
down_blocks = []
for i in range(num_blocks):
down_blocks.append(
DownBlock2d(num_channels + num_kp * use_kp if i == 0 else min(
max_features, block_expansion * (2**i)),
min(max_features, block_expansion * (2**(i + 1))),
norm=(i != 0),
kernel_size=4,
pool=(i != num_blocks - 1),
sn=sn))
DownBlock2d(
num_channels + num_kp * use_kp
if i == 0 else min(max_features, block_expansion * (2**i)),
min(max_features, block_expansion * (2**(i + 1))),
norm=(i != 0),
kernel_size=4,
pool=(i != num_blocks - 1),
sn=sn))
self.down_blocks = nn.LayerList(down_blocks)
self.conv = nn.Conv2D(self.down_blocks[len(self.down_blocks) -
1].conv.parameters()[0].shape[0],
1,
kernel_size=1)
self.conv = nn.Conv2D(
self.down_blocks[len(self.down_blocks) - 1].conv.parameters()[0]
.shape[0],
1,
kernel_size=1)
if sn:
self.conv = spectral_norm(self.conv)
else:
@ -156,6 +159,7 @@ class MultiScaleDiscriminator(nn.Layer):
"""
Multi-scale (scale) discriminator
"""
def __init__(self, scales=(), **kwargs):
super(MultiScaleDiscriminator, self).__init__()
self.scales = scales

@ -25,27 +25,25 @@ class LapStyleDiscriminator(nn.Layer):
num_layer = 3
num_channel = 32
self.head = nn.Sequential(
('conv',
nn.Conv2D(3, num_channel, kernel_size=3, stride=1, padding=1)),
('norm', nn.BatchNorm2D(num_channel)),
('conv', nn.Conv2D(
3, num_channel, kernel_size=3, stride=1,
padding=1)), ('norm', nn.BatchNorm2D(num_channel)),
('LeakyRelu', nn.LeakyReLU(0.2)))
self.body = nn.Sequential()
for i in range(num_layer - 2):
self.body.add_sublayer(
'conv%d' % (i + 1),
nn.Conv2D(num_channel,
num_channel,
kernel_size=3,
stride=1,
padding=1))
nn.Conv2D(
num_channel,
num_channel,
kernel_size=3,
stride=1,
padding=1))
self.body.add_sublayer('norm%d' % (i + 1),
nn.BatchNorm2D(num_channel))
self.body.add_sublayer('LeakyRelu%d' % (i + 1), nn.LeakyReLU(0.2))
self.tail = nn.Conv2D(num_channel,
1,
kernel_size=3,
stride=1,
padding=1)
self.tail = nn.Conv2D(
num_channel, 1, kernel_size=3, stride=1, padding=1)
def forward(self, x):
x = self.head(x)

@ -25,44 +25,47 @@ from ppgan.utils.photopen import build_norm_layer, simam, Dict
from .builder import DISCRIMINATORS
class NLayersDiscriminator(nn.Layer):
def __init__(self, opt):
super(NLayersDiscriminator, self).__init__()
kw = 4
padw = int(np.ceil((kw - 1.0) / 2))
nf = opt.ndf
input_nc = self.compute_D_input_nc(opt)
layer_count = 0
layer = nn.Sequential(
nn.Conv2D(input_nc, nf, kw, 2, padw),
nn.GELU()
)
self.add_sublayer('block_'+str(layer_count), layer)
layer = nn.Sequential(nn.Conv2D(input_nc, nf, kw, 2, padw), nn.GELU())
self.add_sublayer('block_' + str(layer_count), layer)
layer_count += 1
feat_size_prev = np.floor((opt.crop_size + padw * 2 - (kw - 2)) / 2).astype('int64')
feat_size_prev = np.floor(
(opt.crop_size + padw * 2 - (kw - 2)) / 2).astype('int64')
InstanceNorm = build_norm_layer('instance')
for n in range(1, opt.n_layers_D):
nf_prev = nf
nf = min(nf * 2, 512)
stride = 1 if n == opt.n_layers_D - 1 else 2
feat_size = np.floor((feat_size_prev + padw * 2 - (kw - stride)) / stride).astype('int64')
feat_size = np.floor((feat_size_prev + padw * 2 - (kw - stride)) /
stride).astype('int64')
feat_size_prev = feat_size
layer = nn.Sequential(
spectral_norm(nn.Conv2D(nf_prev, nf, kw, stride, padw,
weight_attr=None,
bias_attr=None)),
spectral_norm(
nn.Conv2D(
nf_prev,
nf,
kw,
stride,
padw,
weight_attr=None,
bias_attr=None)),
InstanceNorm(nf),
nn.GELU()
)
self.add_sublayer('block_'+str(layer_count), layer)
nn.GELU())
self.add_sublayer('block_' + str(layer_count), layer)
layer_count += 1
layer = nn.Conv2D(nf, 1, kw, 1, padw)
self.add_sublayer('block_'+str(layer_count), layer)
self.add_sublayer('block_' + str(layer_count), layer)
layer_count += 1
def forward(self, input):
@ -80,22 +83,22 @@ class NLayersDiscriminator(nn.Layer):
if not opt.no_instance:
input_nc += 1
return input_nc
@DISCRIMINATORS.register()
class MultiscaleDiscriminator(nn.Layer):
def __init__(self,
ndf,
num_D,
crop_size,
label_nc,
output_nc,
contain_dontcare_label,
no_instance,
n_layers_D,
):
def __init__(
self,
ndf,
num_D,
crop_size,
label_nc,
output_nc,
contain_dontcare_label,
no_instance,
n_layers_D, ):
super(MultiscaleDiscriminator, self).__init__()
opt = {
'ndf': ndf,
'num_D': num_D,
@ -105,7 +108,6 @@ class MultiscaleDiscriminator(nn.Layer):
'contain_dontcare_label': contain_dontcare_label,
'no_instance': no_instance,
'n_layers_D': n_layers_D,
}
opt = Dict(opt)
@ -115,16 +117,16 @@ class MultiscaleDiscriminator(nn.Layer):
feat_size = opt.crop_size
for j in range(i):
sequence += [nn.AvgPool2D(3, 2, 1)]
feat_size = np.floor((feat_size + 1 * 2 - (3 - 2)) / 2).astype('int64')
feat_size = np.floor(
(feat_size + 1 * 2 - (3 - 2)) / 2).astype('int64')
opt.crop_size = feat_size
sequence += [NLayersDiscriminator(opt)]
opt.crop_size = crop_size_bkp
sequence = nn.Sequential(*sequence)
self.add_sublayer('nld_'+str(i), sequence)
self.add_sublayer('nld_' + str(i), sequence)
def forward(self, input):
output = []
for layer in self._sub_layers.values():
output.append(layer(input))
return output

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save