facebookresearch · abcxs · Jul 6, 2019 · Jul 7, 2019 · Jul 9, 2019
diff --git a/maskrcnn_benchmark/config/defaults.py b/maskrcnn_benchmark/config/defaults.py
@@ -62,6 +62,8 @@
 
 _C.INPUT.VERTICAL_FLIP_PROB_TRAIN = 0.0
 
+_C.INPUT.TRAIN_WITH_BACKGROUND = False
+
 # -----------------------------------------------------------------------------
 # Dataset
 # -----------------------------------------------------------------------------

diff --git a/maskrcnn_benchmark/data/build.py b/maskrcnn_benchmark/data/build.py
@@ -15,7 +15,7 @@
 from .transforms import build_transforms
 
 
-def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True):
+def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True, train_with_background=False):
     """
     Arguments:
         dataset_list (list[str]): Contains the names of the datasets, i.e.,
@@ -37,7 +37,10 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True):
         # for COCODataset, we want to remove images without annotations
         # during training
         if data["factory"] == "COCODataset":
-            args["remove_images_without_annotations"] = is_train
+            if is_train:
+                args["remove_images_without_annotations"] = not train_with_background
+            else:
+                args["remove_images_without_annotations"] = is_train
         if data["factory"] == "PascalVOCDataset":
             args["use_difficult"] = not is_train
         args["transforms"] = transforms
@@ -153,7 +156,7 @@ def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0):
 
     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(cfg, is_train)
-    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train)
+    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train, cfg.INPUT.TRAIN_WITH_BACKGROUND)
 
     if is_train:
         # save category_id to label name mapping

diff --git a/maskrcnn_benchmark/data/datasets/coco.py b/maskrcnn_benchmark/data/datasets/coco.py
@@ -85,6 +85,10 @@ def __getitem__(self, idx):
             masks = [obj["segmentation"] for obj in anno]
             masks = SegmentationMask(masks, img.size, mode='poly')
             target.add_field("masks", masks)
+        else:
+            masks = []
+            masks = SegmentationMask(masks, img.size, mode='poly')
+            target.add_field("masks", masks)
 
         if anno and "keypoints" in anno[0]:
             keypoints = [obj["keypoints"] for obj in anno]

diff --git a/maskrcnn_benchmark/engine/trainer.py b/maskrcnn_benchmark/engine/trainer.py
@@ -57,8 +57,9 @@ def do_train(
     for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
 
         if any(len(target) < 1 for target in targets):
-            logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
-            continue
+            # logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
+            # continue
+            pass
         data_time = time.time() - end
         iteration = iteration + 1
         arguments["iteration"] = iteration

diff --git a/maskrcnn_benchmark/layers/smooth_l1_loss.py b/maskrcnn_benchmark/layers/smooth_l1_loss.py
@@ -8,6 +8,8 @@ def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
     very similar to the smooth_l1_loss from pytorch, but with
     the extra beta parameter
     """
+    if input.numel() == 0:
+        size_average = False
     n = torch.abs(input - target)
     cond = n < beta
     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)

diff --git a/maskrcnn_benchmark/modeling/matcher.py b/maskrcnn_benchmark/modeling/matcher.py
@@ -53,9 +53,12 @@ def __call__(self, match_quality_matrix):
         if match_quality_matrix.numel() == 0:
             # empty targets or proposals not supported during training
             if match_quality_matrix.shape[0] == 0:
-                raise ValueError(
-                    "No ground-truth boxes available for one of the images "
-                    "during training")
+                # raise ValueError(
+                #     "No ground-truth boxes available for one of the images "
+                #     "during training")
+                length = match_quality_matrix.size(1)
+                device = match_quality_matrix.device
+                return torch.ones(length, dtype=torch.int64, device=device) * -1
             else:
                 raise ValueError(
                     "No proposal boxes available for one of the images "

diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py
@@ -45,7 +45,15 @@ def match_targets_to_proposals(self, proposal, target):
         # NB: need to clamp the indices because we can have a single
         # GT in the image, and matched_idxs can be -2, which goes
         # out of bounds
-        matched_targets = target[matched_idxs.clamp(min=0)]
+        if len(target):
+            matched_targets = target[matched_idxs.clamp(min=0)]
+        else:
+            device = target.get_field('labels').device
+            dtype = target.get_field('labels').dtype
+            labels = torch.zeros_like(matched_idxs, dtype=dtype, device=device)
+            matched_targets = target
+            matched_targets.add_field('labels', labels)
+
         matched_targets.add_field("matched_idxs", matched_idxs)
         return matched_targets
 
@@ -70,9 +78,13 @@ def prepare_targets(self, proposals, targets):
             labels_per_image[ignore_inds] = -1  # -1 is ignored by sampler
 
             # compute regression targets
-            regression_targets_per_image = self.box_coder.encode(
-                matched_targets.bbox, proposals_per_image.bbox
-            )
+            if not matched_targets.bbox.shape[0]:
+                zeros = torch.zeros_like(labels_per_image, dtype=torch.float32)
+                regression_targets_per_image = torch.stack((zeros, zeros, zeros, zeros), dim=1)
+            else:
+                regression_targets_per_image = self.box_coder.encode(
+                    matched_targets.bbox, proposals_per_image.bbox
+                )
 
             labels.append(labels_per_image)
             regression_targets.append(regression_targets_per_image)

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py
@@ -61,7 +61,14 @@ def match_targets_to_proposals(self, proposal, target):
         # NB: need to clamp the indices because we can have a single
         # GT in the image, and matched_idxs can be -2, which goes
         # out of bounds
-        matched_targets = target[matched_idxs.clamp(min=0)]
+        if len(target):
+            matched_targets = target[matched_idxs.clamp(min=0)]
+        else:
+            matched_targets = target
+            device = target.get_field('labels').device
+            dtype = target.get_field('labels').dtype
+            labels = torch.zeros_like(matched_idxs, dtype=dtype, device=device)
+            matched_targets.add_field('labels', labels)
         matched_targets.add_field("matched_idxs", matched_idxs)
         return matched_targets
 

diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py
@@ -63,6 +63,11 @@ def forward(self, features, proposals, targets=None):
             # during training, only focus on positive boxes
             all_proposals = proposals
             proposals, positive_inds = keep_only_positive_boxes(proposals)
+
+            if all(len(proposal) < 1 for proposal in proposals):
+                proposal = proposals[0]
+                proposal.bbox = proposal.bbox.new([[0, 0, 10, 10]])
+                positive_inds[0][0] = 1
         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
             x = features
             x = x[torch.cat(positive_inds, dim=0)]

diff --git a/maskrcnn_benchmark/modeling/rpn/loss.py b/maskrcnn_benchmark/modeling/rpn/loss.py
@@ -49,7 +49,10 @@ def match_targets_to_anchors(self, anchor, target, copied_fields=[]):
         # NB: need to clamp the indices because we can have a single
         # GT in the image, and matched_idxs can be -2, which goes
         # out of bounds
-        matched_targets = target[matched_idxs.clamp(min=0)]
+        if len(target):
+            matched_targets = target[matched_idxs.clamp(min=0)]
+        else:
+            matched_targets = target
         matched_targets.add_field("matched_idxs", matched_idxs)
         return matched_targets
 
@@ -79,9 +82,13 @@ def prepare_targets(self, anchors, targets):
                 labels_per_image[inds_to_discard] = -1
 
             # compute regression targets
-            regression_targets_per_image = self.box_coder.encode(
-                matched_targets.bbox, anchors_per_image.bbox
-            )
+            if not matched_targets.bbox.shape[0]:
+                zeros = torch.zeros_like(labels_per_image)
+                regression_targets_per_image = torch.stack((zeros, zeros, zeros, zeros), dim=1)
+            else:
+                regression_targets_per_image = self.box_coder.encode(
+                    matched_targets.bbox, anchors_per_image.bbox
+                )
 
             labels.append(labels_per_image)
             regression_targets.append(regression_targets_per_image)