diff --git a/.dev_scripts/github/update_model_index.py b/.dev_scripts/github/update_model_index.py index b7ff4a4da2..3e673bdd2b 100755 --- a/.dev_scripts/github/update_model_index.py +++ b/.dev_scripts/github/update_model_index.py @@ -164,9 +164,11 @@ def parse_md(md_file): i += 1 # parse table - elif lines[i][0] == '|' and i + 1 < len(lines) and \ - (lines[i + 1][:3] == '| :' or lines[i + 1][:2] == '|:' - or lines[i + 1][:2] == '|-'): + elif (lines[i][0] == '|') and (i + 1 < len(lines)) and ( + lines[i + 1][:3] == '| :' or lines[i + 1][:2] == '|:' + or lines[i + 1][:2] == '|-') and ( + 'SKIP THIS TABLE' not in lines[i - 2] # for aot-gan + ): cols = [col.strip() for col in lines[i].split('|')][1:-1] config_idx = cols.index('Method') checkpoint_idx = cols.index('Download') diff --git a/README.md b/README.md index 768cd6785a..5d001f6fcb 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,7 @@ Supported algorithms: - [x] [DeepFillv1](configs/inpainting/deepfillv1/README.md) (CVPR'2018) - [x] [PConv](configs/inpainting/partial_conv/README.md) (ECCV'2018) - [x] [DeepFillv2](configs/inpainting/deepfillv2/README.md) (CVPR'2019) +- [x] [AOT-GAN](configs/inpainting/AOT-GAN/README.md) (TVCG'2021) diff --git a/README_zh-CN.md b/README_zh-CN.md index 6173e67777..a2057ee419 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -125,6 +125,7 @@ pip3 install -e . - [x] [DeepFillv1](configs/inpainting/deepfillv1/README.md) (CVPR'2018) - [x] [PConv](configs/inpainting/partial_conv/README.md) (ECCV'2018) - [x] [DeepFillv2](configs/inpainting/deepfillv2/README.md) (CVPR'2019) +- [x] [AOT-GAN](configs/inpainting/AOT-GAN/README.md) (TVCG'2021) diff --git a/configs/inpainting/AOT-GAN/AOT-GAN_512x512_4x12_places.py b/configs/inpainting/AOT-GAN/AOT-GAN_512x512_4x12_places.py new file mode 100644 index 0000000000..9bac8319bf --- /dev/null +++ b/configs/inpainting/AOT-GAN/AOT-GAN_512x512_4x12_places.py @@ -0,0 +1,187 @@ +model = dict( + type='AOTInpaintor', + encdec=dict( + type='AOTEncoderDecoder', + encoder=dict(type='AOTEncoder'), + decoder=dict(type='AOTDecoder'), + dilation_neck=dict( + type='AOTBlockNeck', dilation_rates=(1, 2, 4, 8), num_aotblock=8)), + disc=dict( + type='SoftMaskPatchDiscriminator', + in_channels=3, + base_channels=64, + num_conv=3, + with_spectral_norm=True, + ), + loss_gan=dict( + type='GANLoss', + gan_type='smgan', + loss_weight=0.01, + ), + loss_composed_percep=dict( + type='PerceptualLoss', + vgg_type='vgg19', + layer_weights={ + '1': 1., + '6': 1., + '11': 1., + '20': 1., + '29': 1., + }, + layer_weights_style={ + '8': 1., + '17': 1., + '26': 1., + '31': 1., + }, + perceptual_weight=0.1, + style_weight=250), + loss_out_percep=True, + loss_l1_valid=dict( + type='L1Loss', + loss_weight=1., + ), + pretrained=None) + +train_cfg = dict(disc_step=1) +test_cfg = dict(metrics=['l1', 'psnr', 'ssim']) + +dataset_type = 'ImgInpaintingDataset' +input_shape = (512, 512) + +mask_root = 'data/masks' + +train_pipeline = [ + dict(type='LoadImageFromFile', key='gt_img', channel_order='rgb'), + dict( + type='LoadMask', + mask_mode='set', + mask_config=dict( + mask_list_file=f'{mask_root}/train_places_mask_list.txt', + prefix=mask_root, + io_backend='disk', + flag='unchanged', + file_client_kwargs=dict())), + dict( + type='RandomResizedCrop', + keys=['gt_img'], + crop_size=input_shape, + ), + dict(type='Flip', keys=['gt_img', 'mask'], direction='horizontal'), + dict( + type='Resize', + keys=['mask'], + scale=input_shape, + keep_ratio=False, + interpolation='nearest'), + dict(type='RandomRotation', keys=['mask'], degrees=(0.0, 45.0)), + dict( + type='ColorJitter', + keys=['gt_img'], + brightness=0.5, + contrast=0.5, + saturation=0.5, + hue=0.5), + dict( + type='Normalize', + keys=['gt_img'], + mean=[127.5] * 3, + std=[127.5] * 3, + to_rgb=False), + dict(type='GetMaskedImage'), + dict( + type='Collect', + keys=['gt_img', 'masked_img', 'mask'], + meta_keys=['gt_img_path']), + dict(type='ImageToTensor', keys=['gt_img', 'masked_img', 'mask']) +] + +test_pipeline = [ + dict(type='LoadImageFromFile', key='gt_img', channel_order='rgb'), + dict( + type='LoadMask', + mask_mode='set', + mask_config=dict( + mask_list_file=f'{mask_root}/mask_0.5-0.6_list.txt', + prefix=mask_root + '/mask_512', + io_backend='disk', + flag='unchanged', + file_client_kwargs=dict())), + dict( + type='RandomResizedCrop', + keys=['gt_img'], + crop_size=(512, 512), + ), + dict( + type='Normalize', + keys=['gt_img'], + mean=[127.5] * 3, + std=[127.5] * 3, + to_rgb=True), + dict(type='GetMaskedImage'), + dict( + type='Collect', + keys=['gt_img', 'masked_img', 'mask'], + meta_keys=['gt_img_path']), + dict(type='ImageToTensor', keys=['gt_img', 'masked_img', 'mask']) +] + +data_root = 'data/places365' + +data = dict( + workers_per_gpu=4, + train_dataloader=dict(samples_per_gpu=12, drop_last=True), + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type=dataset_type, + ann_file=f'{data_root}/train_places_img_list.txt', + data_prefix=data_root, + pipeline=train_pipeline, + test_mode=False), + val=dict( + type=dataset_type, + ann_file=f'{data_root}/val_places_img_list.txt', + data_prefix=data_root, + pipeline=test_pipeline, + test_mode=True), + test=dict( + type=dataset_type, + ann_file=(f'{data_root}/val_places_img_list.txt'), + data_prefix=data_root, + pipeline=test_pipeline, + test_mode=True)) + +optimizers = dict( + generator=dict(type='Adam', lr=0.0001, betas=(0.0, 0.9)), + disc=dict(type='Adam', lr=0.0001, betas=(0.0, 0.9))) + +lr_config = dict(policy='Fixed', by_epoch=False) + +checkpoint_config = dict(by_epoch=False, interval=10000) +log_config = dict( + interval=100, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook'), + dict(type='PaviLoggerHook', init_kwargs=dict(project='mmedit')) + ]) + +visual_config = dict( + type='VisualizationHook', + output_dir='visual', + interval=1000, + res_name_list=['gt_img', 'masked_img', 'fake_res', 'fake_img'], +) + +evaluation = dict(interval=50000) + +total_iters = 500002 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './workdirs/aotgan_places' +load_from = None +resume_from = None +workflow = [('train', 10000)] +exp_name = 'AOT-GAN_512x512_4x12_places' +find_unused_parameters = False diff --git a/configs/inpainting/AOT-GAN/README.md b/configs/inpainting/AOT-GAN/README.md new file mode 100644 index 0000000000..88ca892694 --- /dev/null +++ b/configs/inpainting/AOT-GAN/README.md @@ -0,0 +1,62 @@ +# AOT-GAN (TVCG'2021) + +> [AOT-GAN: Aggregated Contextual Transformations for High-Resolution Image Inpainting](https://arxiv.org/pdf/2104.01431.pdf) + + + +## Abstract + + + +State-of-the-art image inpainting approaches can suffer from generating distorted structures and blurry textures in high-resolution images (e.g., 512x512). The challenges mainly drive from (1) image content reasoning from distant contexts, and (2) fine-grained texture synthesis for a large missing region. To overcome these two challenges, we propose an enhanced GAN-based model, named Aggregated COntextual-Transformation GAN (AOT-GAN), for high-resolution image inpainting. Specifically, to enhance context reasoning, we construct the generator of AOT-GAN by stacking multiple layers of a proposed AOT block. The AOT blocks aggregate contextual transformations from various receptive fields, allowing to capture both informative distant image contexts and rich patterns of interest for context reasoning. For improving texture synthesis, we enhance the discriminator of AOT-GAN by training it with a tailored mask-prediction task. Such a training objective forces the discriminator to distinguish the detailed appearances of real and synthesized patches, and in turn, facilitates the generator to synthesize clear textures. Extensive comparisons on Places2, the most challenging benchmark with 1.8 million high-resolution images of 365 complex scenes, show that our model outperforms the state-of-the-art by a significant margin in terms of FID with 38.60% relative improvement. A user study including more than 30 subjects further validates the superiority of AOT-GAN. We further evaluate the proposed AOT-GAN in practical applications, e.g., logo removal, face editing, and object removal. Results show that our model achieves promising completions in the real world. We release code and models in [this https URL](https://github.com/researchmm/AOT-GAN-for-Inpainting). + + + +
+ +
+ +## Results and models + +**Places365-Challenge** + +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | Download | +| :-------------------------------------------------------------------: | :----------------: | :--------: | :---------: | :-----------: | :------: | :---: | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [AOT-GAN](/configs/inpainting/AOT-GAN/AOT-GAN_512x512_4x12_places.py) | free-form (50-60%) | 512x512 | 500k | Places365-val | 7.07 | 19.01 | 0.682 | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.json) | + +More results for different mask area: + + + +| Metric | Mask Area | Paper Results | Reimplemented Results | +| :-------------- | :-------- | :------------ | :-------------------- | +| L1 (10^-2) | 1 – 10% | 0.55 | 0.54 | +| (lower better) | 10 – 20% | 1.19 | 1.47 | +| | 20 – 30% | 2.11 | 2.79 | +| | 30 – 40% | 3.20 | 4.38 | +| | 40 – 50% | 4.51 | 6.28 | +| | 50 – 60% | 7.07 | 10.16 | +| PSNR | 1 – 10% | 34.79 | inf | +| (higher better) | 10 – 20% | 29.49 | 31.22 | +| | 20 – 30% | 26.03 | 27.65 | +| | 30 – 40% | 23.58 | 25.06 | +| | 40 – 50% | 21.65 | 23.01 | +| | 50 – 60% | 19.01 | 20.05 | +| SSIM | 1 – 10% | 0.976 | 0.982 | +| (higher better) | 10 – 20% | 0.940 | 0.951 | +| | 20 – 30% | 0.890 | 0.911 | +| | 30 – 40% | 0.835 | 0.866 | +| | 40 – 50% | 0.773 | 0.815 | +| | 50 – 60% | 0.682 | 0.739 | + +## Citation + +```bibtex +@inproceedings{yan2021agg, + author = {Zeng, Yanhong and Fu, Jianlong and Chao, Hongyang and Guo, Baining}, + title = {Aggregated Contextual Transformations for High-Resolution Image Inpainting}, + booktitle = {Arxiv}, + pages={-}, + year = {2020} +} +``` diff --git a/configs/inpainting/AOT-GAN/README_zh-CN.md b/configs/inpainting/AOT-GAN/README_zh-CN.md new file mode 100644 index 0000000000..57adb392c6 --- /dev/null +++ b/configs/inpainting/AOT-GAN/README_zh-CN.md @@ -0,0 +1,58 @@ +# AOT-GAN (TVCG'2021) + +> [AOT-GAN: Aggregated Contextual Transformations for High-Resolution Image Inpainting](https://arxiv.org/pdf/2104.01431.pdf) + + + +## 摘要 + + + + + +
+ +
+ +## 结果与模型 + +**Places365-Challenge** + +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | 下载 | +| :-------------------------------------------------------------------: | :----------------: | :-----: | :---: | :-----------: | :---: | :---: | :---: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [AOT-GAN](/configs/inpainting/AOT-GAN/AOT-GAN_512x512_4x12_places.py) | free-form (50-60%) | 512x512 | 500k | Places365-val | 7.07 | 19.01 | 0.682 | [模型](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth) \| [日志](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.json) | + + + +| 评估指标 | 掩膜缺损 | 论文结果 | 复现结果 | +| :-------------- | :------- | :---- | :---- | +| L1 (10^-2) | 1 – 10% | 0.55 | 0.54 | +| (lower better) | 10 – 20% | 1.19 | 1.47 | +| | 20 – 30% | 2.11 | 2.79 | +| | 30 – 40% | 3.20 | 4.38 | +| | 40 – 50% | 4.51 | 6.28 | +| | 50 – 60% | 7.07 | 10.16 | +| PSNR | 1 – 10% | 34.79 | inf | +| (higher better) | 10 – 20% | 29.49 | 31.22 | +| | 20 – 30% | 26.03 | 27.65 | +| | 30 – 40% | 23.58 | 25.06 | +| | 40 – 50% | 21.65 | 23.01 | +| | 50 – 60% | 19.01 | 20.05 | +| SSIM | 1 – 10% | 0.976 | 0.982 | +| (higher better) | 10 – 20% | 0.940 | 0.951 | +| | 20 – 30% | 0.890 | 0.911 | +| | 30 – 40% | 0.835 | 0.866 | +| | 40 – 50% | 0.773 | 0.815 | +| | 50 – 60% | 0.682 | 0.739 | + +## 引用 + +```bibtex +@inproceedings{yan2021agg, + author = {Zeng, Yanhong and Fu, Jianlong and Chao, Hongyang and Guo, Baining}, + title = {Aggregated Contextual Transformations for High-Resolution Image Inpainting}, + booktitle = {Arxiv}, + pages={-}, + year = {2020} +} +``` diff --git a/configs/inpainting/AOT-GAN/metafile.yml b/configs/inpainting/AOT-GAN/metafile.yml new file mode 100644 index 0000000000..a433d69818 --- /dev/null +++ b/configs/inpainting/AOT-GAN/metafile.yml @@ -0,0 +1,22 @@ +Collections: +- Metadata: + Architecture: + - AOT-GAN + Name: AOT-GAN + Paper: + - https://arxiv.org/pdf/2104.01431.pdf + README: configs/inpainting/AOT-GAN/README.md +Models: +- Config: configs/inpainting/AOT-GAN/AOT-GAN_512x512_4x12_places.py + In Collection: AOT-GAN + Metadata: + Training Data: PLACES + Name: AOT-GAN_512x512_4x12_places + Results: + - Dataset: PLACES + Metrics: + PSNR: 19.01 + SSIM: 0.682 + l1 error: 7.07 + Task: Inpainting + Weights: https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth diff --git a/mmedit/models/inpaintors/aot_inpaintor.py b/mmedit/models/inpaintors/aot_inpaintor.py index 22a9f656fc..dda12aba31 100644 --- a/mmedit/models/inpaintors/aot_inpaintor.py +++ b/mmedit/models/inpaintors/aot_inpaintor.py @@ -144,7 +144,7 @@ def forward_test(self, else: eval_results[metric_name] = self._eval_metrics[ metric_name]()(data_dict).item() - output['eval_results'] = eval_results + output['eval_result'] = eval_results else: output['fake_res'] = fake_res output['fake_img'] = fake_img diff --git a/model-index.yml b/model-index.yml index 7b0d0ca2e6..396c61105b 100644 --- a/model-index.yml +++ b/model-index.yml @@ -1,4 +1,5 @@ Import: +- configs/inpainting/AOT-GAN/metafile.yml - configs/inpainting/deepfillv1/metafile.yml - configs/inpainting/deepfillv2/metafile.yml - configs/inpainting/global_local/metafile.yml diff --git a/tests/test_models/test_inpaintors/test_aot_inpaintor.py b/tests/test_models/test_inpaintors/test_aot_inpaintor.py index e7c23c3fd6..650e5ee864 100644 --- a/tests/test_models/test_inpaintors/test_aot_inpaintor.py +++ b/tests/test_models/test_inpaintors/test_aot_inpaintor.py @@ -39,10 +39,10 @@ def test_aot_inpaintor(): inpaintor data_batch = dict(gt_img=gt_img, mask=mask, masked_img=masked_img) output = inpaintor.forward_test(**data_batch) - assert 'eval_results' in output + assert 'eval_result' in output output = inpaintor.val_step(data_batch) - assert 'eval_results' in output + assert 'eval_result' in output optim_g = torch.optim.SGD(inpaintor.generator.parameters(), lr=0.1) optim_d = torch.optim.SGD(inpaintor.disc.parameters(), lr=0.1) @@ -60,10 +60,10 @@ def test_aot_inpaintor(): # test forward test w/o save image outputs = inpaintor.forward_test( masked_img[0:1], mask[0:1], gt_img=gt_img[0:1, ...]) - assert 'eval_results' in outputs - assert outputs['eval_results']['l1'] > 0 - assert outputs['eval_results']['psnr'] > 0 - assert outputs['eval_results']['ssim'] > 0 + assert 'eval_result' in outputs + assert outputs['eval_result']['l1'] > 0 + assert outputs['eval_result']['psnr'] > 0 + assert outputs['eval_result']['ssim'] > 0 # test forward test w/o eval metrics inpaintor.test_cfg = dict() @@ -131,10 +131,10 @@ def test_aot_inpaintor(): inpaintor.cuda() data_batch = dict(gt_img=gt_img, mask=mask, masked_img=masked_img) output = inpaintor.forward_test(**data_batch) - assert 'eval_results' in output + assert 'eval_result' in output output = inpaintor.val_step(data_batch) - assert 'eval_results' in output + assert 'eval_result' in output optim_g = torch.optim.SGD(inpaintor.generator.parameters(), lr=0.1) optim_d = torch.optim.SGD(inpaintor.disc.parameters(), lr=0.1) @@ -152,10 +152,10 @@ def test_aot_inpaintor(): # test forward test w/o save image outputs = inpaintor.forward_test( masked_img[0:1], mask[0:1], gt_img=gt_img[0:1, ...]) - assert 'eval_results' in outputs - assert outputs['eval_results']['l1'] > 0 - assert outputs['eval_results']['psnr'] > 0 - assert outputs['eval_results']['ssim'] > 0 + assert 'eval_result' in outputs + assert outputs['eval_result']['l1'] > 0 + assert outputs['eval_result']['psnr'] > 0 + assert outputs['eval_result']['ssim'] > 0 # test forward test w/o eval metrics inpaintor.test_cfg = dict()