diff --git a/maskrcnn_benchmark/config/defaults.py b/maskrcnn_benchmark/config/defaults.py index 65fbdaddd..6cb0b0ae6 100644 --- a/maskrcnn_benchmark/config/defaults.py +++ b/maskrcnn_benchmark/config/defaults.py @@ -62,6 +62,8 @@ _C.INPUT.VERTICAL_FLIP_PROB_TRAIN = 0.0 +_C.INPUT.TRAIN_WITH_BACKGROUND = False + # ----------------------------------------------------------------------------- # Dataset # ----------------------------------------------------------------------------- diff --git a/maskrcnn_benchmark/data/build.py b/maskrcnn_benchmark/data/build.py index 26239155d..1dde47fc4 100644 --- a/maskrcnn_benchmark/data/build.py +++ b/maskrcnn_benchmark/data/build.py @@ -15,7 +15,7 @@ from .transforms import build_transforms -def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True): +def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True, train_with_background=False): """ Arguments: dataset_list (list[str]): Contains the names of the datasets, i.e., @@ -37,7 +37,10 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True): # for COCODataset, we want to remove images without annotations # during training if data["factory"] == "COCODataset": - args["remove_images_without_annotations"] = is_train + if is_train: + args["remove_images_without_annotations"] = not train_with_background + else: + args["remove_images_without_annotations"] = is_train if data["factory"] == "PascalVOCDataset": args["use_difficult"] = not is_train args["transforms"] = transforms @@ -153,7 +156,7 @@ def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0): # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(cfg, is_train) - datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train) + datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train, cfg.INPUT.TRAIN_WITH_BACKGROUND) if is_train: # save category_id to label name mapping diff --git a/maskrcnn_benchmark/data/datasets/coco.py b/maskrcnn_benchmark/data/datasets/coco.py index cc10f29d1..78d1f45e6 100644 --- a/maskrcnn_benchmark/data/datasets/coco.py +++ b/maskrcnn_benchmark/data/datasets/coco.py @@ -85,6 +85,10 @@ def __getitem__(self, idx): masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) + else: + masks = [] + masks = SegmentationMask(masks, img.size, mode='poly') + target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] diff --git a/maskrcnn_benchmark/engine/trainer.py b/maskrcnn_benchmark/engine/trainer.py index 560b63e1c..097bfb55b 100644 --- a/maskrcnn_benchmark/engine/trainer.py +++ b/maskrcnn_benchmark/engine/trainer.py @@ -57,8 +57,9 @@ def do_train( for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): - logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) - continue + # logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) + # continue + pass data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration diff --git a/maskrcnn_benchmark/layers/smooth_l1_loss.py b/maskrcnn_benchmark/layers/smooth_l1_loss.py index 9c4664bb4..ddbc074dc 100644 --- a/maskrcnn_benchmark/layers/smooth_l1_loss.py +++ b/maskrcnn_benchmark/layers/smooth_l1_loss.py @@ -8,6 +8,8 @@ def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ + if input.numel() == 0: + size_average = False n = torch.abs(input - target) cond = n < beta loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) diff --git a/maskrcnn_benchmark/modeling/matcher.py b/maskrcnn_benchmark/modeling/matcher.py index 35ec5f1fe..92331b97a 100644 --- a/maskrcnn_benchmark/modeling/matcher.py +++ b/maskrcnn_benchmark/modeling/matcher.py @@ -53,9 +53,12 @@ def __call__(self, match_quality_matrix): if match_quality_matrix.numel() == 0: # empty targets or proposals not supported during training if match_quality_matrix.shape[0] == 0: - raise ValueError( - "No ground-truth boxes available for one of the images " - "during training") + # raise ValueError( + # "No ground-truth boxes available for one of the images " + # "during training") + length = match_quality_matrix.size(1) + device = match_quality_matrix.device + return torch.ones(length, dtype=torch.int64, device=device) * -1 else: raise ValueError( "No proposal boxes available for one of the images " diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py index a1fdd2308..f7aaf59f7 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py +++ b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py @@ -45,7 +45,15 @@ def match_targets_to_proposals(self, proposal, target): # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds - matched_targets = target[matched_idxs.clamp(min=0)] + if len(target): + matched_targets = target[matched_idxs.clamp(min=0)] + else: + device = target.get_field('labels').device + dtype = target.get_field('labels').dtype + labels = torch.zeros_like(matched_idxs, dtype=dtype, device=device) + matched_targets = target + matched_targets.add_field('labels', labels) + matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets @@ -70,9 +78,13 @@ def prepare_targets(self, proposals, targets): labels_per_image[ignore_inds] = -1 # -1 is ignored by sampler # compute regression targets - regression_targets_per_image = self.box_coder.encode( - matched_targets.bbox, proposals_per_image.bbox - ) + if not matched_targets.bbox.shape[0]: + zeros = torch.zeros_like(labels_per_image, dtype=torch.float32) + regression_targets_per_image = torch.stack((zeros, zeros, zeros, zeros), dim=1) + else: + regression_targets_per_image = self.box_coder.encode( + matched_targets.bbox, proposals_per_image.bbox + ) labels.append(labels_per_image) regression_targets.append(regression_targets_per_image) diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py index d4c5e3621..2b72bf995 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py +++ b/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py @@ -61,7 +61,14 @@ def match_targets_to_proposals(self, proposal, target): # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds - matched_targets = target[matched_idxs.clamp(min=0)] + if len(target): + matched_targets = target[matched_idxs.clamp(min=0)] + else: + matched_targets = target + device = target.get_field('labels').device + dtype = target.get_field('labels').dtype + labels = torch.zeros_like(matched_idxs, dtype=dtype, device=device) + matched_targets.add_field('labels', labels) matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py index a9ce245b6..70fd4cb8f 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py +++ b/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py @@ -63,6 +63,11 @@ def forward(self, features, proposals, targets=None): # during training, only focus on positive boxes all_proposals = proposals proposals, positive_inds = keep_only_positive_boxes(proposals) + + if all(len(proposal) < 1 for proposal in proposals): + proposal = proposals[0] + proposal.bbox = proposal.bbox.new([[0, 0, 10, 10]]) + positive_inds[0][0] = 1 if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: x = features x = x[torch.cat(positive_inds, dim=0)] diff --git a/maskrcnn_benchmark/modeling/rpn/loss.py b/maskrcnn_benchmark/modeling/rpn/loss.py index 840e35453..e6fdabaeb 100644 --- a/maskrcnn_benchmark/modeling/rpn/loss.py +++ b/maskrcnn_benchmark/modeling/rpn/loss.py @@ -49,7 +49,10 @@ def match_targets_to_anchors(self, anchor, target, copied_fields=[]): # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds - matched_targets = target[matched_idxs.clamp(min=0)] + if len(target): + matched_targets = target[matched_idxs.clamp(min=0)] + else: + matched_targets = target matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets @@ -79,9 +82,13 @@ def prepare_targets(self, anchors, targets): labels_per_image[inds_to_discard] = -1 # compute regression targets - regression_targets_per_image = self.box_coder.encode( - matched_targets.bbox, anchors_per_image.bbox - ) + if not matched_targets.bbox.shape[0]: + zeros = torch.zeros_like(labels_per_image) + regression_targets_per_image = torch.stack((zeros, zeros, zeros, zeros), dim=1) + else: + regression_targets_per_image = self.box_coder.encode( + matched_targets.bbox, anchors_per_image.bbox + ) labels.append(labels_per_image) regression_targets.append(regression_targets_per_image)