From 05a2febe84370938cd6a46cf39d7166b51e5c6af Mon Sep 17 00:00:00 2001 From: jackroos Date: Mon, 7 Dec 2020 15:41:09 +0800 Subject: [PATCH 1/3] Fix a bug of offset normalization in MSDeformAttn module. --- README.md | 14 +++++++++----- models/ops/modules/ms_deform_attn.py | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f3154fa..3b767a0 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,10 @@ By [Xizhou Zhu](https://scholar.google.com/citations?user=02RXI00AAAAJ), [Weiji This repository is an official implementation of the paper [Deformable DETR: Deformable Transformers for End-to-End Object Detection](https://arxiv.org/abs/2010.04159). +## Update + +**[2020.12.07]** Fix a bug of sampling offset normalization (see [this issue](https://github.com/fundamentalvision/Deformable-DETR/issues/6)) in the MSDeformAttn module. The final accuracy on COCO is slightly improved. Code and pre-trained models has been updated. This bug only occurs in this released version but not in the original implementation used in our paper. + ## Introduction @@ -40,11 +44,11 @@ If you find Deformable DETR useful in your research, please consider citing: | DETR-DC5 | 500 | 43.3 | 22.5 | 47.3 | 61.1 | 41 |187|7000|14.0|11.4|12.4| - | | DETR-DC5 | 50 | 35.3 | 15.2 | 37.5 | 53.6 | 41 |187|700|14.0|11.4|12.4| - | | DETR-DC5+ | 50 | 36.2 | 16.3 | 39.2 | 53.9 | 41 |187|700|14.0|11.4|12.4| - | -| **Deformable DETR
(single scale)
** | 50 | 39.1 | 20.2 | 43.1 | 55.6 | 34 |78|160|3.2|27.0|42.4| [config](./configs/r50_deformable_detr_single_scale.sh)
[log](https://drive.google.com/file/d/1AiXs9dHVl1jRjS2s6rfS9c5wuaXkAZk_/view?usp=sharing)
[model](https://drive.google.com/file/d/1635D3bcdC3mRQr-ET5l5yI8eNkuy1i_2/view?usp=sharing)
| -| **Deformable DETR
(single scale, DC5)
** | 50 | 41.0 | 23.1 | 45.2 | 55.9 | 34 |128|215|4.3|22.1|29.4| [config](./configs/r50_deformable_detr_single_scale_dc5.sh)
[log](https://drive.google.com/file/d/1rDFFXkWOVupuhP9IhP-hGwAAdlFshy1E/view?usp=sharing)
[model](https://drive.google.com/file/d/1bRgqYfQpMdSqe6U56HnW9ytF9VynDTCY/view?usp=sharing)
| -| **Deformable DETR** | 50 | 43.8 | 26.3 | 46.8 | 58.2 | 40 |173|325|6.5|15.0|19.4|[config](./configs/r50_deformable_detr.sh)
[log](https://drive.google.com/file/d/1VB4vWZrF2xcU8d8PfBgtzyS5q0HbQ3xB/view?usp=sharing)
[model](https://drive.google.com/file/d/1vw1PssvHBhLQI8s5wRWDjl9yDMfqElbg/view?usp=sharing)
| -| **+ iterative bounding box refinement** | 50 | 45.8 | 28.4 | 49.0 | 61.8 | 41 |173|325|6.5|15.0|19.4|[config](./configs/r50_deformable_detr_plus_iterative_bbox_refinement.sh)
[log](https://drive.google.com/file/d/1Bp3EVhsvdUeNu4ASlBkTgNWloIkL1oi8/view?usp=sharing)
[model](https://drive.google.com/file/d/1GhgOyWdSC-rPkDI1Gz2aDaj97pC_dozl/view?usp=sharing)
| -| **++ two-stage Deformable DETR** | 50 | 46.4 | 28.6 | 49.7 | 61.3 | 41 |173|340|6.8|14.5|18.8|[config](./configs/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh)
[log](https://drive.google.com/file/d/1VTfDrBxEfpwXoC1aNkN70DI9WRI_JS3z/view?usp=sharing)
[model](https://drive.google.com/file/d/1Xfsg7-glLyJOtH-Q0qt6QVlz23J6t8xT/view?usp=sharing)
| +| **Deformable DETR
(single scale)
** | 50 | 39.4 | 20.6 | 43.0 | 55.5 | 34 |78|160|3.2|27.0|42.4| [config](./configs/r50_deformable_detr_single_scale.sh)
[log](https://drive.google.com/file/d/1n3ZnZ-UAqmTUR4AZoM4qQntIDn6qCZx4/view?usp=sharing)
[model](https://drive.google.com/file/d/1WEjQ9_FgfI5sw5OZZ4ix-OKk-IJ_-SDU/view?usp=sharing)
| +| **Deformable DETR
(single scale, DC5)
** | 50 | 41.5 | 24.1 | 45.3 | 56.0 | 34 |128|215|4.3|22.1|29.4| [config](./configs/r50_deformable_detr_single_scale_dc5.sh)
[log](https://drive.google.com/file/d/1-UfTp2q4GIkJjsaMRIkQxa5k5vn8_n-B/view?usp=sharing)
[model](https://drive.google.com/file/d/1m_TgMjzH7D44fbA-c_jiBZ-xf-odxGdk/view?usp=sharing)
| +| **Deformable DETR** | 50 | 44.5 | 27.1 | 47.6 | 59.6 | 40 |173|325|6.5|15.0|19.4|[config](./configs/r50_deformable_detr.sh)
[log](https://drive.google.com/file/d/18YSLshFjc_erOLfFC-hHu4MX4iyz1Dqr/view?usp=sharing)
[model](https://drive.google.com/file/d/1nDWZWHuRwtwGden77NLM9JoWe-YisJnA/view?usp=sharing)
| +| **+ iterative bounding box refinement** | 50 | 46.2 | 28.3 | 49.2 | 61.5 | 41 |173|325|6.5|15.0|19.4|[config](./configs/r50_deformable_detr_plus_iterative_bbox_refinement.sh)
[log](https://drive.google.com/file/d/1DFNloITi1SFBWjYzvVEAI75ndwmGM1Uj/view?usp=sharing)
[model](https://drive.google.com/file/d/1JYKyRYzUH7uo9eVfDaVCiaIGZb5YTCuI/view?usp=sharing)
| +| **++ two-stage Deformable DETR** | 50 | 46.9 | 29.6 | 50.1 | 61.6 | 41 |173|340|6.8|14.5|18.8|[config](./configs/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage.sh)
[log](https://drive.google.com/file/d/1ozi0wbv5-Sc5TbWt1jAuXco72vEfEtbY/view?usp=sharing)
[model](https://drive.google.com/file/d/15I03A7hNTpwuLNdfuEmW9_taZMNVssEp/view?usp=sharing)
| *Note:* diff --git a/models/ops/modules/ms_deform_attn.py b/models/ops/modules/ms_deform_attn.py index 6500e6d..663d64a 100644 --- a/models/ops/modules/ms_deform_attn.py +++ b/models/ops/modules/ms_deform_attn.py @@ -100,8 +100,9 @@ def forward(self, query, reference_points, input_flatten, input_spatial_shapes, attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) # N, Len_q, n_heads, n_levels, n_points, 2 if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1) sampling_locations = reference_points[:, :, None, :, None, :] \ - + sampling_offsets / input_spatial_shapes[None, None, None, :, None, :] + + sampling_offsets / offset_normalizer[None, None, None, :, None, :] elif reference_points.shape[-1] == 4: sampling_locations = reference_points[:, :, None, :, None, :2] \ + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5 From ecd3210c07180d9cd36827c8be657516b50613cc Mon Sep 17 00:00:00 2001 From: jackroos Date: Mon, 7 Dec 2020 16:14:14 +0800 Subject: [PATCH 2/3] Update docs. --- README.md | 8 ++++---- docs/changelog.md | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 docs/changelog.md diff --git a/README.md b/README.md index 3b767a0..5d1eee9 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,6 @@ By [Xizhou Zhu](https://scholar.google.com/citations?user=02RXI00AAAAJ), [Weiji This repository is an official implementation of the paper [Deformable DETR: Deformable Transformers for End-to-End Object Detection](https://arxiv.org/abs/2010.04159). -## Update - -**[2020.12.07]** Fix a bug of sampling offset normalization (see [this issue](https://github.com/fundamentalvision/Deformable-DETR/issues/6)) in the MSDeformAttn module. The final accuracy on COCO is slightly improved. Code and pre-trained models has been updated. This bug only occurs in this released version but not in the original implementation used in our paper. - ## Introduction @@ -23,6 +19,10 @@ This repository is an official implementation of the paper [Deformable DETR: Def This project is released under the [Apache 2.0 license](./LICENSE). +## Changelog + +See [changelog.md]() for detailed logs of major changes. + ## Citing Deformable DETR If you find Deformable DETR useful in your research, please consider citing: diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..1ed5e79 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,3 @@ +## Changelog + +**[2020.12.07]** Fix a bug of sampling offset normalization (see [this issue](https://github.com/fundamentalvision/Deformable-DETR/issues/6)) in the MSDeformAttn module. The final accuracy on COCO is slightly improved. Code and pre-trained models have been updated. This bug only occurs in this released version but not in the original implementation used in our paper. \ No newline at end of file From c968389a45f387be276f810455e8171d7cf39c2e Mon Sep 17 00:00:00 2001 From: jackroos Date: Mon, 7 Dec 2020 16:17:00 +0800 Subject: [PATCH 3/3] Update docs. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d1eee9..c9db563 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This project is released under the [Apache 2.0 license](./LICENSE). ## Changelog -See [changelog.md]() for detailed logs of major changes. +See [changelog.md](./docs/changelog.md) for detailed logs of major changes. ## Citing Deformable DETR