-
Notifications
You must be signed in to change notification settings - Fork 8
/
verify_dataset.py
41 lines (30 loc) · 1.09 KB
/
verify_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pathlib
import itertools
import PIL.Image
import tqdm
def test_size(root):
""" test that all images are of size 1000x1000
Args:
root: dataset root
"""
root = pathlib.Path(root)
for img_path in tqdm.tqdm(sorted(root.glob('*/*.*'))):
with PIL.Image.open(img_path) as img:
if img.size != (1000, 1000):
print('{} has wrong size of {}'.format(img_path, img.size))
def test_triplets(root):
""" test that rgb, dem and seg are all there """
def get_coords(img_path):
return tuple((int(x) for x in img_path.stem.split('_')[:2]))
root = pathlib.Path(root)
for city in tqdm.tqdm(root.iterdir()):
img_paths = sorted(city.glob('*.*'), key=get_coords)
for key, group in itertools.groupby(img_paths, key=get_coords):
group = list(group)
if len(group) != 3 or len(group) != 4:
print('missing a file in {}: {}'.format(city.name, [g.name for g in group]))
root = 'dataset'
print('Checking image dimensions')
test_size(root)
print('\nChecking missing files')
test_triplets(root)