From ff98229a3d551dfa6fbac983c7f26a36e59fa905 Mon Sep 17 00:00:00 2001 From: Jerry Jiarui XU Date: Mon, 7 Sep 2020 19:59:44 +0800 Subject: [PATCH] [Feature] Support PointRend (#109) * [Feature] Support PointRend * add previous test * update modelzoo --- README.md | 1 + configs/_base_/models/pointrend_r50.py | 56 +++ configs/point_rend/README.md | 27 ++ .../pointrend_r101_512x1024_80k_cityscapes.py | 2 + .../pointrend_r101_512x512_160k_ade20k.py | 2 + .../pointrend_r50_512x1024_80k_cityscapes.py | 5 + .../pointrend_r50_512x512_160k_ade20k.py | 32 ++ docs/model_zoo.md | 16 + mmseg/models/decode_heads/__init__.py | 4 +- mmseg/models/decode_heads/point_head.py | 349 ++++++++++++++++++ tests/test_models/test_forward.py | 5 + tests/test_models/test_heads.py | 59 ++- 12 files changed, 554 insertions(+), 4 deletions(-) create mode 100644 configs/_base_/models/pointrend_r50.py create mode 100644 configs/point_rend/README.md create mode 100644 configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py create mode 100644 configs/point_rend/pointrend_r101_512x512_160k_ade20k.py create mode 100644 configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py create mode 100644 configs/point_rend/pointrend_r50_512x512_160k_ade20k.py create mode 100644 mmseg/models/decode_heads/point_head.py diff --git a/README.md b/README.md index d0205fa6b4..14484f93aa 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ Supported methods: - [x] [OCRNet](configs/ocrnet) - [x] [Fast-SCNN](configs/fastscnn) - [x] [Semantic FPN](configs/sem_fpn) +- [x] [PointRend](configs/point_rend) - [x] [EMANet](configs/emanet) - [x] [DNLNet](configs/dnlnet) - [x] [Mixed Precision (FP16) Training](configs/fp16/README.md) diff --git a/configs/_base_/models/pointrend_r50.py b/configs/_base_/models/pointrend_r50.py new file mode 100644 index 0000000000..1a56af3a87 --- /dev/null +++ b/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) +# model training and testing settings +train_cfg = dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75) +test_cfg = dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2) diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md new file mode 100644 index 0000000000..11d1d423d5 --- /dev/null +++ b/configs/point_rend/README.md @@ -0,0 +1,27 @@ +# PointRend: Image Segmentation as Rendering + +## Introduction +``` +@misc{alex2019pointrend, + title={PointRend: Image Segmentation as Rendering}, + author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick}, + year={2019}, + eprint={1912.08193}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Results and models + +### Cityscapes +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|-----------|----------|-----------|--------:|---------:|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| PointRend | R-50 | 512x1024 | 80000 | 3.1 | 8.48 | 76.47 | 78.13 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json) | +| PointRend | R-101 | 512x1024 | 80000 | 4.2 | 7.00 | 78.30 | 79.97 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) | + +### ADE20K +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|-----------|----------|-----------|--------:|---------:|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| PointRend | R-50 | 512x512 | 160000 | 5.1 | 17.31 | 37.64 | 39.17 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json) | +| PointRend | R-101 | 512x512 | 160000 | 6.1 | 15.50 | 40.02 | 41.60 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) | diff --git a/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py b/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..a8c14c8cf9 --- /dev/null +++ b/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py b/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000000..4d1f8c8154 --- /dev/null +++ b/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..96cbaa48d6 --- /dev/null +++ b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +lr_config = dict(warmup='linear', warmup_iters=200) diff --git a/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000000..db8c634c0f --- /dev/null +++ b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) +]) +lr_config = dict(warmup='linear', warmup_iters=200) diff --git a/docs/model_zoo.md b/docs/model_zoo.md index 39cb0be6f5..404eb44832 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -89,6 +89,22 @@ Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/ma Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) for details. +### Semantic FPN + +Please refer to [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/semfpn) for details. + +### PointRend + +Please refer to [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) for details. + +### EMANet + +Please refer to [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) for details. + +### DNLNet + +Please refer to [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) for details. + ### Mixed Precision (FP16) Training Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details. diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py index 0730827cab..6f3217ec03 100644 --- a/mmseg/models/decode_heads/__init__.py +++ b/mmseg/models/decode_heads/__init__.py @@ -10,6 +10,7 @@ from .gc_head import GCHead from .nl_head import NLHead from .ocr_head import OCRHead +from .point_head import PointHead from .psa_head import PSAHead from .psp_head import PSPHead from .sep_aspp_head import DepthwiseSeparableASPPHead @@ -19,5 +20,6 @@ __all__ = [ 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', - 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead' + 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', + 'PointHead' ] diff --git a/mmseg/models/decode_heads/point_head.py b/mmseg/models/decode_heads/point_head.py new file mode 100644 index 0000000000..90a23635d9 --- /dev/null +++ b/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,349 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, normal_init +from mmcv.ops import point_sample + +from mmseg.models.builder import HEADS +from mmseg.ops import resize +from ..losses import accuracy +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@HEADS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__(self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs): + super(PointHead, self).__init__( + input_transform='multiple_select', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ + else 0 + self.fc_seg = nn.Conv1d( + fc_in_channels, + self.num_classes, + kernel_size=1, + stride=1, + padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.fc_seg, std=0.001) + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [ + point_sample(_, points, align_corners=self.align_corners) + for _ in x + ] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample( + prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train( + prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + point_label = point_sample( + gt_semantic_seg.float(), + points, + mode='nearest', + align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + losses = self.losses(point_logits, point_label) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test( + refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats( + prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape( + batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_( + 2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view( + batch_size, channels, height, width) + + return refined_seg_logits + + def losses(self, point_logits, point_label): + """Compute segmentation loss.""" + loss = dict() + loss['loss_point'] = self.loss_decode( + point_logits, point_label, ignore_index=self.ignore_index) + loss['acc_point'] = accuracy(point_logits, point_label) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand( + batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros( + batch_size, + num_points, + 2, + dtype=torch.float, + device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % + width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // + width).float() * h_step + return point_indices, point_coords diff --git a/tests/test_models/test_forward.py b/tests/test_models/test_forward.py index dbe9035103..be797a74d0 100644 --- a/tests/test_models/test_forward.py +++ b/tests/test_models/test_forward.py @@ -157,6 +157,11 @@ def test_sem_fpn_forward(): _test_encoder_decoder_forward('sem_fpn/fpn_r50_512x1024_80k_cityscapes.py') +def test_point_rend_forward(): + _test_encoder_decoder_forward( + 'point_rend/pointrend_r50_512x1024_80k_cityscapes.py') + + def test_mobilenet_v2_forward(): _test_encoder_decoder_forward( 'mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py') diff --git a/tests/test_models/test_heads.py b/tests/test_models/test_heads.py index b9ba02a68d..02460cbc4e 100644 --- a/tests/test_models/test_heads.py +++ b/tests/test_models/test_heads.py @@ -3,13 +3,15 @@ import pytest import torch from mmcv.cnn import ConvModule +from mmcv.utils import ConfigDict from mmcv.utils.parrots_wrapper import SyncBatchNorm from mmseg.models.decode_heads import (ANNHead, ASPPHead, CCHead, DAHead, - DepthwiseSeparableASPPHead, DNLHead, + DepthwiseSeparableASPPHead, + DepthwiseSeparableFCNHead, DNLHead, EMAHead, EncHead, FCNHead, GCHead, - NLHead, OCRHead, PSAHead, PSPHead, - UPerHead) + NLHead, OCRHead, PointHead, PSAHead, + PSPHead, UPerHead) from mmseg.models.decode_heads.decode_head import BaseDecodeHead @@ -542,6 +544,40 @@ def test_dw_aspp_head(): assert outputs.shape == (1, head.num_classes, 45, 45) +def test_sep_fcn_head(): + # test sep_fcn_head with concat_input=False + head = DepthwiseSeparableFCNHead( + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01)) + x = [torch.rand(2, 128, 32, 32)] + output = head(x) + assert output.shape == (2, head.num_classes, 32, 32) + assert not head.concat_input + from mmseg.ops.separable_conv_module import DepthwiseSeparableConvModule + assert isinstance(head.convs[0], DepthwiseSeparableConvModule) + assert isinstance(head.convs[1], DepthwiseSeparableConvModule) + assert head.conv_seg.kernel_size == (1, 1) + + head = DepthwiseSeparableFCNHead( + in_channels=64, + channels=64, + concat_input=True, + num_classes=19, + in_index=-1, + norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01)) + x = [torch.rand(3, 64, 32, 32)] + output = head(x) + assert output.shape == (3, head.num_classes, 32, 32) + assert head.concat_input + from mmseg.ops.separable_conv_module import DepthwiseSeparableConvModule + assert isinstance(head.convs[0], DepthwiseSeparableConvModule) + assert isinstance(head.convs[1], DepthwiseSeparableConvModule) + + def test_dnl_head(): # DNL with 'embedded_gaussian' mode head = DNLHead(in_channels=32, channels=16, num_classes=19) @@ -598,3 +634,20 @@ def test_emanet_head(): head, inputs = to_cuda(head, inputs) outputs = head(inputs) assert outputs.shape == (1, head.num_classes, 45, 45) + + +def test_point_head(): + + inputs = [torch.randn(1, 32, 45, 45)] + point_head = PointHead( + in_channels=[32], in_index=[0], channels=16, num_classes=19) + assert len(point_head.fcs) == 3 + fcn_head = FCNHead(in_channels=32, channels=16, num_classes=19) + if torch.cuda.is_available(): + head, inputs = to_cuda(point_head, inputs) + head, inputs = to_cuda(fcn_head, inputs) + prev_output = fcn_head(inputs) + test_cfg = ConfigDict( + subdivision_steps=2, subdivision_num_points=8196, scale_factor=2) + output = point_head.forward_test(inputs, prev_output, None, test_cfg) + assert output.shape == (1, point_head.num_classes, 180, 180)