Add blood vessel dataset processing script (#184)

* Add blood vessel dataset processing script * Fix syntax error * Fix syntax error * Fix syntax error * Fix bugs * Fix bugs * Fix bugs * Use safe functions and expand more apis * Use safe functions and expand more apis * Fix hard code and verify dataset integrity
open-mmlab · Oct 18, 2020 · cb2e0d3 · cb2e0d3
1 parent 5a76a71
commit cb2e0d3
Show file tree

Hide file tree

Showing 6 changed files with 544 additions and 1 deletion.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -46,6 +46,34 @@ mmsegmentation
 │   │   │   ├── images
 │   │   │   │   ├── training
 │   │   │   │   ├── validation
+│   ├── CHASE_DB1
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+│   ├── DRIVE
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+│   ├── HRF
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+│   ├── STARE
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
 
 ```
 
@@ -93,6 +121,54 @@ If you would like to use Pascal Context dataset, please install [Detail](https:/
 python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
 ```
 
+### CHASE DB1
+
+The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip).
+
+To convert CHASE DB1 dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip
+```
+
+The script will make directory structure automatically.
+
+### DRIVE
+
+The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially.
+
+To convert DRIVE dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip
+```
+
+The script will make directory structure automatically.
+
+### HRF
+
+First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip).
+
+To convert HRF dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip
+```
+
+The script will make directory structure automatically.
+
+### STARE
+
+First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar).
+
+To convert STARE dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar
+```
+
+The script will make directory structure automatically.
+
 ## Inference with pretrained models
 
 We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),

diff --git a/setup.cfg b/setup.cfg
@@ -8,6 +8,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmseg
-known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
+known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
diff --git a/tools/convert_datasets/chase_db1.py b/tools/convert_datasets/chase_db1.py
@@ -0,0 +1,83 @@
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+
+CHASE_DB1_LEN = 28 * 3
+TRAINING_LEN = 60
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert CHASE_DB1 dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'CHASE_DB1')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mmcv.mkdir_or_exist(out_dir)
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting CHASEDB1.zip...')
+        zip_file = zipfile.ZipFile(dataset_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+
+        assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
+            'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN)
+
+        for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(img,
+                             osp.join(out_dir, 'images', 'training', img_name))
+            else:
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(img_name)[0] + '.jpg'))
+
+        for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img, osp.join(out_dir, 'images', 'validation', img_name))
+            else:
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.jpg'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/convert_datasets/drive.py b/tools/convert_datasets/drive.py
@@ -0,0 +1,109 @@
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import cv2
+import mmcv
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert DRIVE dataset to mmsegmentation format')
+    parser.add_argument(
+        'training_path', help='the training part of DRIVE dataset')
+    parser.add_argument(
+        'testing_path', help='the testing part of DRIVE dataset')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    training_path = args.training_path
+    testing_path = args.testing_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'DRIVE')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mmcv.mkdir_or_exist(out_dir)
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting training.zip...')
+        zip_file = zipfile.ZipFile(training_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+        now_dir = osp.join(tmp_dir, 'training', 'images')
+        for img_name in os.listdir(now_dir):
+            img = mmcv.imread(osp.join(now_dir, img_name))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'training',
+                         osp.splitext(img_name)[0] + '.jpg'))
+
+        now_dir = osp.join(tmp_dir, 'training', '1st_manual')
+        for img_name in os.listdir(now_dir):
+            cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+            ret, img = cap.read()
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(img_name)[0] + '.jpg'))
+
+        print('Extracting test.zip...')
+        zip_file = zipfile.ZipFile(testing_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating validation dataset...')
+        now_dir = osp.join(tmp_dir, 'test', 'images')
+        for img_name in os.listdir(now_dir):
+            img = mmcv.imread(osp.join(now_dir, img_name))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'validation',
+                         osp.splitext(img_name)[0] + '.jpg'))
+
+        now_dir = osp.join(tmp_dir, 'test', '1st_manual')
+        if osp.exists(now_dir):
+            for img_name in os.listdir(now_dir):
+                cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+                ret, img = cap.read()
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.jpg'))
+
+        now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
+        if osp.exists(now_dir):
+            for img_name in os.listdir(now_dir):
+                cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+                ret, img = cap.read()
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.jpg'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()