Skip to content

Commit

Permalink
Add blood vessel dataset processing script (#184)
Browse files Browse the repository at this point in the history
* Add blood vessel dataset processing script

* Fix syntax error

* Fix syntax error

* Fix syntax error

* Fix bugs

* Fix bugs

* Fix bugs

* Use safe functions and expand more apis

* Use safe functions and expand more apis

* Fix hard code and verify dataset integrity
  • Loading branch information
yamengxi authored Oct 18, 2020
1 parent 5a76a71 commit cb2e0d3
Show file tree
Hide file tree
Showing 6 changed files with 544 additions and 1 deletion.
76 changes: 76 additions & 0 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,34 @@ mmsegmentation
│ │ │ ├── images
│ │ │ │ ├── training
│ │ │ │ ├── validation
│ ├── CHASE_DB1
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ ├── DRIVE
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ ├── HRF
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ ├── STARE
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
```

Expand Down Expand Up @@ -93,6 +121,54 @@ If you would like to use Pascal Context dataset, please install [Detail](https:/
python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
```

### CHASE DB1

The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip).

To convert CHASE DB1 dataset to MMSegmentation format, you should run the following command:

```shell
python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip
```

The script will make directory structure automatically.

### DRIVE

The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially.

To convert DRIVE dataset to MMSegmentation format, you should run the following command:

```shell
python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip
```

The script will make directory structure automatically.

### HRF

First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip).

To convert HRF dataset to MMSegmentation format, you should run the following command:

```shell
python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip
```

The script will make directory structure automatically.

### STARE

First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar).

To convert STARE dataset to MMSegmentation format, you should run the following command:

```shell
python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar
```

The script will make directory structure automatically.

## Inference with pretrained models

We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmseg
known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
83 changes: 83 additions & 0 deletions tools/convert_datasets/chase_db1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import argparse
import os
import os.path as osp
import tempfile
import zipfile

import mmcv

CHASE_DB1_LEN = 28 * 3
TRAINING_LEN = 60


def parse_args():
parser = argparse.ArgumentParser(
description='Convert CHASE_DB1 dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args


def main():
args = parse_args()
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join('data', 'CHASE_DB1')
else:
out_dir = args.out_dir

print('Making directories...')
mmcv.mkdir_or_exist(out_dir)
mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))

with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
print('Extracting CHASEDB1.zip...')
zip_file = zipfile.ZipFile(dataset_path)
zip_file.extractall(tmp_dir)

print('Generating training dataset...')

assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN)

for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, img_name))
if osp.splitext(img_name)[1] == '.jpg':
mmcv.imwrite(img,
osp.join(out_dir, 'images', 'training', img_name))
else:
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a
# threshold to convert the nonstandard annotation imgs. The
# value divided by 128 is equivalent to '1 if value >= 128
# else 0'
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(img_name)[0] + '.jpg'))

for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
img = mmcv.imread(osp.join(tmp_dir, img_name))
if osp.splitext(img_name)[1] == '.jpg':
mmcv.imwrite(
img, osp.join(out_dir, 'images', 'validation', img_name))
else:
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(img_name)[0] + '.jpg'))

print('Removing the temporary files...')

print('Done!')


if __name__ == '__main__':
main()
109 changes: 109 additions & 0 deletions tools/convert_datasets/drive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import argparse
import os
import os.path as osp
import tempfile
import zipfile

import cv2
import mmcv


def parse_args():
parser = argparse.ArgumentParser(
description='Convert DRIVE dataset to mmsegmentation format')
parser.add_argument(
'training_path', help='the training part of DRIVE dataset')
parser.add_argument(
'testing_path', help='the testing part of DRIVE dataset')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args


def main():
args = parse_args()
training_path = args.training_path
testing_path = args.testing_path
if args.out_dir is None:
out_dir = osp.join('data', 'DRIVE')
else:
out_dir = args.out_dir

print('Making directories...')
mmcv.mkdir_or_exist(out_dir)
mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))

with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
print('Extracting training.zip...')
zip_file = zipfile.ZipFile(training_path)
zip_file.extractall(tmp_dir)

print('Generating training dataset...')
now_dir = osp.join(tmp_dir, 'training', 'images')
for img_name in os.listdir(now_dir):
img = mmcv.imread(osp.join(now_dir, img_name))
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'training',
osp.splitext(img_name)[0] + '.jpg'))

now_dir = osp.join(tmp_dir, 'training', '1st_manual')
for img_name in os.listdir(now_dir):
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
ret, img = cap.read()
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'training',
osp.splitext(img_name)[0] + '.jpg'))

print('Extracting test.zip...')
zip_file = zipfile.ZipFile(testing_path)
zip_file.extractall(tmp_dir)

print('Generating validation dataset...')
now_dir = osp.join(tmp_dir, 'test', 'images')
for img_name in os.listdir(now_dir):
img = mmcv.imread(osp.join(now_dir, img_name))
mmcv.imwrite(
img,
osp.join(out_dir, 'images', 'validation',
osp.splitext(img_name)[0] + '.jpg'))

now_dir = osp.join(tmp_dir, 'test', '1st_manual')
if osp.exists(now_dir):
for img_name in os.listdir(now_dir):
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
ret, img = cap.read()
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a
# threshold to convert the nonstandard annotation imgs. The
# value divided by 128 is equivalent to '1 if value >= 128
# else 0'
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(img_name)[0] + '.jpg'))

now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
if osp.exists(now_dir):
for img_name in os.listdir(now_dir):
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
ret, img = cap.read()
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(out_dir, 'annotations', 'validation',
osp.splitext(img_name)[0] + '.jpg'))

print('Removing the temporary files...')

print('Done!')


if __name__ == '__main__':
main()
Loading

0 comments on commit cb2e0d3

Please sign in to comment.