Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.

train with background in maskrcnn #958

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions maskrcnn_benchmark/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@

_C.INPUT.VERTICAL_FLIP_PROB_TRAIN = 0.0

_C.INPUT.TRAIN_WITH_BACKGROUND = False

# -----------------------------------------------------------------------------
# Dataset
# -----------------------------------------------------------------------------
Expand Down
9 changes: 6 additions & 3 deletions maskrcnn_benchmark/data/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from .transforms import build_transforms


def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True):
def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True, train_with_background=False):
"""
Arguments:
dataset_list (list[str]): Contains the names of the datasets, i.e.,
Expand All @@ -37,7 +37,10 @@ def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True):
# for COCODataset, we want to remove images without annotations
# during training
if data["factory"] == "COCODataset":
args["remove_images_without_annotations"] = is_train
if is_train:
args["remove_images_without_annotations"] = not train_with_background
else:
args["remove_images_without_annotations"] = is_train
if data["factory"] == "PascalVOCDataset":
args["use_difficult"] = not is_train
args["transforms"] = transforms
Expand Down Expand Up @@ -153,7 +156,7 @@ def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0):

# If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(cfg, is_train)
datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train)
datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train, cfg.INPUT.TRAIN_WITH_BACKGROUND)

if is_train:
# save category_id to label name mapping
Expand Down
4 changes: 4 additions & 0 deletions maskrcnn_benchmark/data/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ def __getitem__(self, idx):
masks = [obj["segmentation"] for obj in anno]
masks = SegmentationMask(masks, img.size, mode='poly')
target.add_field("masks", masks)
else:
masks = []
masks = SegmentationMask(masks, img.size, mode='poly')
target.add_field("masks", masks)

if anno and "keypoints" in anno[0]:
keypoints = [obj["keypoints"] for obj in anno]
Expand Down
5 changes: 3 additions & 2 deletions maskrcnn_benchmark/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ def do_train(
for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

if any(len(target) < 1 for target in targets):
logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
continue
# logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
# continue
pass
data_time = time.time() - end
iteration = iteration + 1
arguments["iteration"] = iteration
Expand Down
2 changes: 2 additions & 0 deletions maskrcnn_benchmark/layers/smooth_l1_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
very similar to the smooth_l1_loss from pytorch, but with
the extra beta parameter
"""
if input.numel() == 0:
size_average = False
n = torch.abs(input - target)
cond = n < beta
loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
Expand Down
9 changes: 6 additions & 3 deletions maskrcnn_benchmark/modeling/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ def __call__(self, match_quality_matrix):
if match_quality_matrix.numel() == 0:
# empty targets or proposals not supported during training
if match_quality_matrix.shape[0] == 0:
raise ValueError(
"No ground-truth boxes available for one of the images "
"during training")
# raise ValueError(
# "No ground-truth boxes available for one of the images "
# "during training")
length = match_quality_matrix.size(1)
device = match_quality_matrix.device
return torch.ones(length, dtype=torch.int64, device=device) * -1
else:
raise ValueError(
"No proposal boxes available for one of the images "
Expand Down
20 changes: 16 additions & 4 deletions maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,15 @@ def match_targets_to_proposals(self, proposal, target):
# NB: need to clamp the indices because we can have a single
# GT in the image, and matched_idxs can be -2, which goes
# out of bounds
matched_targets = target[matched_idxs.clamp(min=0)]
if len(target):
matched_targets = target[matched_idxs.clamp(min=0)]
else:
device = target.get_field('labels').device
dtype = target.get_field('labels').dtype
labels = torch.zeros_like(matched_idxs, dtype=dtype, device=device)
matched_targets = target
matched_targets.add_field('labels', labels)

matched_targets.add_field("matched_idxs", matched_idxs)
return matched_targets

Expand All @@ -70,9 +78,13 @@ def prepare_targets(self, proposals, targets):
labels_per_image[ignore_inds] = -1 # -1 is ignored by sampler

# compute regression targets
regression_targets_per_image = self.box_coder.encode(
matched_targets.bbox, proposals_per_image.bbox
)
if not matched_targets.bbox.shape[0]:
zeros = torch.zeros_like(labels_per_image, dtype=torch.float32)
regression_targets_per_image = torch.stack((zeros, zeros, zeros, zeros), dim=1)
else:
regression_targets_per_image = self.box_coder.encode(
matched_targets.bbox, proposals_per_image.bbox
)

labels.append(labels_per_image)
regression_targets.append(regression_targets_per_image)
Expand Down
9 changes: 8 additions & 1 deletion maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,14 @@ def match_targets_to_proposals(self, proposal, target):
# NB: need to clamp the indices because we can have a single
# GT in the image, and matched_idxs can be -2, which goes
# out of bounds
matched_targets = target[matched_idxs.clamp(min=0)]
if len(target):
matched_targets = target[matched_idxs.clamp(min=0)]
else:
matched_targets = target
device = target.get_field('labels').device
dtype = target.get_field('labels').dtype
labels = torch.zeros_like(matched_idxs, dtype=dtype, device=device)
matched_targets.add_field('labels', labels)
matched_targets.add_field("matched_idxs", matched_idxs)
return matched_targets

Expand Down
5 changes: 5 additions & 0 deletions maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ def forward(self, features, proposals, targets=None):
# during training, only focus on positive boxes
all_proposals = proposals
proposals, positive_inds = keep_only_positive_boxes(proposals)

if all(len(proposal) < 1 for proposal in proposals):
proposal = proposals[0]
proposal.bbox = proposal.bbox.new([[0, 0, 10, 10]])
positive_inds[0][0] = 1
if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
x = features
x = x[torch.cat(positive_inds, dim=0)]
Expand Down
15 changes: 11 additions & 4 deletions maskrcnn_benchmark/modeling/rpn/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ def match_targets_to_anchors(self, anchor, target, copied_fields=[]):
# NB: need to clamp the indices because we can have a single
# GT in the image, and matched_idxs can be -2, which goes
# out of bounds
matched_targets = target[matched_idxs.clamp(min=0)]
if len(target):
matched_targets = target[matched_idxs.clamp(min=0)]
else:
matched_targets = target
matched_targets.add_field("matched_idxs", matched_idxs)
return matched_targets

Expand Down Expand Up @@ -79,9 +82,13 @@ def prepare_targets(self, anchors, targets):
labels_per_image[inds_to_discard] = -1

# compute regression targets
regression_targets_per_image = self.box_coder.encode(
matched_targets.bbox, anchors_per_image.bbox
)
if not matched_targets.bbox.shape[0]:
zeros = torch.zeros_like(labels_per_image)
regression_targets_per_image = torch.stack((zeros, zeros, zeros, zeros), dim=1)
else:
regression_targets_per_image = self.box_coder.encode(
matched_targets.bbox, anchors_per_image.bbox
)

labels.append(labels_per_image)
regression_targets.append(regression_targets_per_image)
Expand Down