diff --git a/maskrcnn_benchmark/config/defaults.py b/maskrcnn_benchmark/config/defaults.py index fce8fa161..7768d57c3 100644 --- a/maskrcnn_benchmark/config/defaults.py +++ b/maskrcnn_benchmark/config/defaults.py @@ -25,6 +25,7 @@ _C.MODEL.MASK_ON = False _C.MODEL.DEVICE = "cuda" _C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" +_C.MODEL.CLS_AGNOSTIC_BBOX_REG = False # If the WEIGHT starts with a catalog://, like :R-50, the code will look for # the path in paths_catalog. Else, it will use it as the specified absolute diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py index 196892550..a04635673 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py +++ b/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py @@ -17,7 +17,12 @@ class PostProcessor(nn.Module): """ def __init__( - self, score_thresh=0.05, nms=0.5, detections_per_img=100, box_coder=None + self, + score_thresh=0.05, + nms=0.5, + detections_per_img=100, + box_coder=None, + cls_agnostic_bbox_reg=False ): """ Arguments: @@ -33,6 +38,7 @@ def __init__( if box_coder is None: box_coder = BoxCoder(weights=(10., 10., 5., 5.)) self.box_coder = box_coder + self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg def forward(self, x, boxes): """ @@ -54,9 +60,13 @@ def forward(self, x, boxes): boxes_per_image = [len(box) for box in boxes] concat_boxes = torch.cat([a.bbox for a in boxes], dim=0) + if self.cls_agnostic_bbox_reg: + box_regression = box_regression[:, -4:] proposals = self.box_coder.decode( box_regression.view(sum(boxes_per_image), -1), concat_boxes ) + if self.cls_agnostic_bbox_reg: + proposals = proposals.repeat(1, class_prob.shape[1]) num_classes = class_prob.shape[1] @@ -145,8 +155,13 @@ def make_roi_box_post_processor(cfg): score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH nms_thresh = cfg.MODEL.ROI_HEADS.NMS detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG + cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG postprocessor = PostProcessor( - score_thresh, nms_thresh, detections_per_img, box_coder + score_thresh, + nms_thresh, + detections_per_img, + box_coder, + cls_agnostic_bbox_reg ) return postprocessor diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py index 2c21f6cdb..9f2771d02 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py +++ b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py @@ -18,7 +18,13 @@ class FastRCNNLossComputation(object): Also supports FPN """ - def __init__(self, proposal_matcher, fg_bg_sampler, box_coder): + def __init__( + self, + proposal_matcher, + fg_bg_sampler, + box_coder, + cls_agnostic_bbox_reg=False + ): """ Arguments: proposal_matcher (Matcher) @@ -28,6 +34,7 @@ def __init__(self, proposal_matcher, fg_bg_sampler, box_coder): self.proposal_matcher = proposal_matcher self.fg_bg_sampler = fg_bg_sampler self.box_coder = box_coder + self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) @@ -143,7 +150,11 @@ def __call__(self, class_logits, box_regression): # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] - map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) + if self.cls_agnostic_bbox_reg: + map_inds = torch.tensor([4, 5, 6, 7], device=device) + else: + map_inds = 4 * labels_pos[:, None] + torch.tensor( + [0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], @@ -170,6 +181,13 @@ def make_roi_box_loss_evaluator(cfg): cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION ) - loss_evaluator = FastRCNNLossComputation(matcher, fg_bg_sampler, box_coder) + cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG + + loss_evaluator = FastRCNNLossComputation( + matcher, + fg_bg_sampler, + box_coder, + cls_agnostic_bbox_reg + ) return loss_evaluator diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py index e05fcbb1d..740767850 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py +++ b/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py @@ -14,7 +14,8 @@ def __init__(self, config, pretrained=None): num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) self.cls_score = nn.Linear(num_inputs, num_classes) - self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) + num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes + self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) nn.init.constant_(self.cls_score.bias, 0) @@ -37,7 +38,8 @@ def __init__(self, cfg): representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.cls_score = nn.Linear(representation_size, num_classes) - self.bbox_pred = nn.Linear(representation_size, num_classes * 4) + num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes + self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001)