From cb2e0d3cb1affe4e77b25493cafe4aca255fddbc Mon Sep 17 00:00:00 2001 From: yamengxi <49829199+yamengxi@users.noreply.github.com> Date: Sun, 18 Oct 2020 16:22:42 +0800 Subject: [PATCH] Add blood vessel dataset processing script (#184) * Add blood vessel dataset processing script * Fix syntax error * Fix syntax error * Fix syntax error * Fix bugs * Fix bugs * Fix bugs * Use safe functions and expand more apis * Use safe functions and expand more apis * Fix hard code and verify dataset integrity --- docs/getting_started.md | 76 +++++++++++++ setup.cfg | 2 +- tools/convert_datasets/chase_db1.py | 83 ++++++++++++++ tools/convert_datasets/drive.py | 109 ++++++++++++++++++ tools/convert_datasets/hrf.py | 110 +++++++++++++++++++ tools/convert_datasets/stare.py | 165 ++++++++++++++++++++++++++++ 6 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 tools/convert_datasets/chase_db1.py create mode 100644 tools/convert_datasets/drive.py create mode 100644 tools/convert_datasets/hrf.py create mode 100644 tools/convert_datasets/stare.py diff --git a/docs/getting_started.md b/docs/getting_started.md index 23b3021511..15fd155b5d 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -46,6 +46,34 @@ mmsegmentation │ │ │ ├── images │ │ │ │ ├── training │ │ │ │ ├── validation +│ ├── CHASE_DB1 +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── DRIVE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── HRF +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── STARE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation ``` @@ -93,6 +121,54 @@ If you would like to use Pascal Context dataset, please install [Detail](https:/ python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json ``` +### CHASE DB1 + +The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip). + +To convert CHASE DB1 dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip +``` + +The script will make directory structure automatically. + +### DRIVE + +The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially. + +To convert DRIVE dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip +``` + +The script will make directory structure automatically. + +### HRF + +First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip). + +To convert HRF dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip +``` + +The script will make directory structure automatically. + +### STARE + +First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar). + +To convert STARE dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar +``` + +The script will make directory structure automatically. + ## Inference with pretrained models We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.), diff --git a/setup.cfg b/setup.cfg index cb533f4b59..a5fb07d401 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,6 +8,6 @@ line_length = 79 multi_line_output = 0 known_standard_library = setuptools known_first_party = mmseg -known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch +known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY diff --git a/tools/convert_datasets/chase_db1.py b/tools/convert_datasets/chase_db1.py new file mode 100644 index 0000000000..e127a04f74 --- /dev/null +++ b/tools/convert_datasets/chase_db1.py @@ -0,0 +1,83 @@ +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv + +CHASE_DB1_LEN = 28 * 3 +TRAINING_LEN = 60 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert CHASE_DB1 dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='path of CHASEDB1.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'CHASE_DB1') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting CHASEDB1.zip...') + zip_file = zipfile.ZipFile(dataset_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + + assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN) + + for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite(img, + osp.join(out_dir, 'images', 'training', img_name)) + else: + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.jpg')) + + for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, osp.join(out_dir, 'images', 'validation', img_name)) + else: + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.jpg')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/drive.py b/tools/convert_datasets/drive.py new file mode 100644 index 0000000000..9da30fb1b4 --- /dev/null +++ b/tools/convert_datasets/drive.py @@ -0,0 +1,109 @@ +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import cv2 +import mmcv + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert DRIVE dataset to mmsegmentation format') + parser.add_argument( + 'training_path', help='the training part of DRIVE dataset') + parser.add_argument( + 'testing_path', help='the testing part of DRIVE dataset') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + training_path = args.training_path + testing_path = args.testing_path + if args.out_dir is None: + out_dir = osp.join('data', 'DRIVE') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting training.zip...') + zip_file = zipfile.ZipFile(training_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + now_dir = osp.join(tmp_dir, 'training', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(img_name)[0] + '.jpg')) + + now_dir = osp.join(tmp_dir, 'training', '1st_manual') + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.jpg')) + + print('Extracting test.zip...') + zip_file = zipfile.ZipFile(testing_path) + zip_file.extractall(tmp_dir) + + print('Generating validation dataset...') + now_dir = osp.join(tmp_dir, 'test', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(img_name)[0] + '.jpg')) + + now_dir = osp.join(tmp_dir, 'test', '1st_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.jpg')) + + now_dir = osp.join(tmp_dir, 'test', '2nd_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.jpg')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/hrf.py b/tools/convert_datasets/hrf.py new file mode 100644 index 0000000000..3f00b9bcd9 --- /dev/null +++ b/tools/convert_datasets/hrf.py @@ -0,0 +1,110 @@ +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv + +HRF_LEN = 15 +TRAINING_LEN = 5 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert HRF dataset to mmsegmentation format') + parser.add_argument('healthy_path', help='the path of healthy.zip') + parser.add_argument( + 'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip') + parser.add_argument('glaucoma_path', help='the path of glaucoma.zip') + parser.add_argument( + 'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip') + parser.add_argument( + 'diabetic_retinopathy_path', + help='the path of diabetic_retinopathy.zip') + parser.add_argument( + 'diabetic_retinopathy_manualsegm_path', + help='the path of diabetic_retinopathy_manualsegm.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + images_path = [ + args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path + ] + annotations_path = [ + args.healthy_manualsegm_path, args.glaucoma_manualsegm_path, + args.diabetic_retinopathy_manualsegm_path + ] + if args.out_dir is None: + out_dir = osp.join('data', 'HRF') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + print('Generating images...') + for now_path in images_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.jpg')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.jpg')) + + print('Generating annotations...') + for now_path in annotations_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.jpg')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.jpg')) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/stare.py b/tools/convert_datasets/stare.py new file mode 100644 index 0000000000..28129da08b --- /dev/null +++ b/tools/convert_datasets/stare.py @@ -0,0 +1,165 @@ +import argparse +import gzip +import os +import os.path as osp +import tarfile +import tempfile + +import mmcv + +STARE_LEN = 20 +TRAINING_LEN = 10 + + +def un_gz(src, dst): + g_file = gzip.GzipFile(src) + with open(dst, 'wb+') as f: + f.write(g_file.read()) + g_file.close() + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert STARE dataset to mmsegmentation format') + parser.add_argument('image_path', help='the path of stare-images.tar') + parser.add_argument('labels_ah', help='the path of labels-ah.tar') + parser.add_argument('labels_vk', help='the path of labels-vk.tar') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + image_path = args.image_path + labels_ah = args.labels_ah + labels_vk = args.labels_vk + if args.out_dir is None: + out_dir = osp.join('data', 'STARE') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting stare-images.tar...') + with tarfile.open(image_path) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.jpg')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.jpg')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-ah.tar...') + with tarfile.open(labels_ah) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a threshold + # to convert the nonstandard annotation imgs. The value divided by + # 128 equivalent to '1 if value >= 128 else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.jpg')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.jpg')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-vk.tar...') + with tarfile.open(labels_vk) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.jpg')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.jpg')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main()