Add support for Pascal Context 59 classes (#459)

* Create PascalContextDataset59 class in mmseg/datasets/pascal_context.py; * Set reduce_zero_label=True for train_pipeline and PascalContextDataset59; * Add some configs for Pascal-Context 59 classes training and testing;
open-mmlab · Apr 8, 2021 · b081822 · b081822
1 parent 9524f37
commit b081822
Show file tree

Hide file tree

Showing 15 changed files with 232 additions and 5 deletions.
diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py
@@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'PascalContextDataset59'
+data_root = 'data/VOCdevkit/VOC2010/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (520, 520)
+crop_size = (480, 480)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', reduce_zero_label=True),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline))
diff --git a/configs/_base_/schedules/schedule_160k_epochwise.py b/configs/_base_/schedules/schedule_160k_epochwise.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=160000)
+checkpoint_config = dict(by_epoch=True, interval=16000)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/configs/_base_/schedules/schedule_20k_epochwise.py b/configs/_base_/schedules/schedule_20k_epochwise.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=20000)
+checkpoint_config = dict(by_epoch=True, interval=2000)
+evaluation = dict(interval=2000, metric='mIoU')
diff --git a/configs/_base_/schedules/schedule_40k_epochwise.py b/configs/_base_/schedules/schedule_40k_epochwise.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=40000)
+checkpoint_config = dict(by_epoch=True, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
diff --git a/configs/_base_/schedules/schedule_80k_epochwisse.py b/configs/_base_/schedules/schedule_80k_epochwisse.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=80000)
+checkpoint_config = dict(by_epoch=True, interval=8000)
+evaluation = dict(interval=8000, metric='mIoU')
diff --git a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py',
+    '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=59),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=59),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=59),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_480x480_40k_pascal_context_59.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_480x480_80k_pascal_context_59.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_480x480_40k_pascal_context_59.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_480x480_80k_pascal_context_59.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/demo/MMSegmentation_Tutorial.ipynb b/demo/MMSegmentation_Tutorial.ipynb
@@ -1411,6 +1411,13 @@
      }
     }
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ]
 }
diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py
@@ -6,13 +6,25 @@
 from .dataset_wrappers import ConcatDataset, RepeatDataset
 from .drive import DRIVEDataset
 from .hrf import HRFDataset
-from .pascal_context import PascalContextDataset
+from .pascal_context import PascalContextDataset, PascalContextDataset59
 from .stare import STAREDataset
 from .voc import PascalVOCDataset
 
 __all__ = [
-    'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
-    'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
-    'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset',
-    'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 'STAREDataset'
+    'CustomDataset',
+    'build_dataloader',
+    'ConcatDataset',
+    'RepeatDataset',
+    'DATASETS',
+    'build_dataset',
+    'PIPELINES',
+    'CityscapesDataset',
+    'PascalVOCDataset',
+    'ADE20KDataset',
+    'PascalContextDataset',
+    'PascalContextDataset59',
+    'ChaseDB1Dataset',
+    'DRIVEDataset',
+    'HRFDataset',
+    'STAREDataset',
 ]
diff --git a/mmseg/datasets/pascal_context.py b/mmseg/datasets/pascal_context.py
@@ -52,3 +52,52 @@ def __init__(self, split, **kwargs):
             reduce_zero_label=False,
             **kwargs)
         assert osp.exists(self.img_dir) and self.split is not None
+
+
+@DATASETS.register_module()
+class PascalContextDataset59(CustomDataset):
+    """PascalContext dataset.
+
+    In segmentation map annotation for PascalContext, 0 stands for background,
+    which is included in 60 categories. ``reduce_zero_label`` is fixed to
+    False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
+    fixed to '.png'.
+
+    Args:
+        split (str): Split txt file for PascalContext.
+    """
+
+    CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
+               'cat', 'chair', 'cow', 'table', 'dog', 'horse', 'motorbike',
+               'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor',
+               'bag', 'bed', 'bench', 'book', 'building', 'cabinet', 'ceiling',
+               'cloth', 'computer', 'cup', 'door', 'fence', 'floor', 'flower',
+               'food', 'grass', 'ground', 'keyboard', 'light', 'mountain',
+               'mouse', 'curtain', 'platform', 'sign', 'plate', 'road', 'rock',
+               'shelves', 'sidewalk', 'sky', 'snow', 'bedclothes', 'track',
+               'tree', 'truck', 'wall', 'water', 'window', 'wood')
+
+    PALETTE = [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3],
+               [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230],
+               [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61],
+               [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140],
+               [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200],
+               [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71],
+               [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92],
+               [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6],
+               [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8],
+               [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8],
+               [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255],
+               [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140],
+               [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0],
+               [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0],
+               [0, 235, 255], [0, 173, 255], [31, 0, 255]]
+
+    def __init__(self, split, **kwargs):
+        super(PascalContextDataset59, self).__init__(
+            img_suffix='.jpg',
+            seg_map_suffix='.png',
+            split=split,
+            reduce_zero_label=True,
+            **kwargs)
+        assert osp.exists(self.img_dir) and self.split is not None