-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_wv.py
executable file
·261 lines (206 loc) · 9.85 KB
/
process_wv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
"""
Copyright 2018 Defense Innovation Unit Experimental
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from PIL import Image
import tensorflow as tf
import io
import glob
from tqdm import tqdm
import numpy as np
import logging
import argparse
import os
import json
import wv_util as wv
import tfr_util as tfr
import aug_util as aug
import csv
import rasterio
"""
A script that processes xView imagery.
Args:
image_folder: A folder path to the directory storing xView .tif files
ie ("xView_data/")
json_filepath: A file path to the GEOJSON ground truth file
ie ("xView_gt.geojson")
test_percent (-t): The percentage of input images to use for test set
suffix (-s): The suffix for output TFRecord files. Default suffix 't1' will output
xview_train_t1.record and xview_test_t1.record
augment (-a): A boolean value of whether or not to use augmentation
Outputs:
Writes two files to the current directory containing training and test data in
TFRecord format ('xview_train_SUFFIX.record' and 'xview_test_SUFFIX.record')
"""
def get_images_from_filename_array(coords,chips,classes,folder_names,res=(250,250)):
"""
Gathers and chips all images within a given folder at a given resolution.
Args:
coords: an array of bounding box coordinates
chips: an array of filenames that each coord/class belongs to.
classes: an array of classes for each bounding box
folder_names: a list of folder names containing images
res: an (X,Y) tuple where (X,Y) are (width,height) of each chip respectively
Output:
images, boxes, classes arrays containing chipped images, bounding boxes, and classes, respectively.
"""
images =[]
boxes = []
clses = []
k = 0
bi = 0
for folder in folder_names:
fnames = glob.glob(folder + "*.tif")
fnames.sort()
for fname in tqdm(fnames):
#Needs to be "X.tif" ie ("5.tif")
name = fname.split("\\")[-1]
arr = wv.get_image(fname)
print(arr.shape)
img,box,cls = wv.chip_image(arr,coords[chips==name],classes[chips==name],res)
for im in img:
images.append(im)
for b in box:
boxes.append(b)
for c in cls:
clses.append(cls)
k = k + 1
return images, boxes, clses
def shuffle_images_and_boxes_classes(im,box,cls):
"""
Shuffles images, boxes, and classes, while keeping relative matching indices
Args:
im: an array of images
box: an array of bounding box coordinates ([xmin,ymin,xmax,ymax])
cls: an array of classes
Output:
Shuffle image, boxes, and classes arrays, respectively
"""
assert len(im) == len(box)
assert len(box) == len(cls)
perm = np.random.permutation(len(im))
out_b = {}
out_c = {}
k = 0
for ind in perm:
out_b[k] = box[ind]
out_c[k] = cls[ind]
k = k + 1
return im[perm], out_b, out_c
'''
Datasets
_multires: multiple resolutions. Currently [(500,500),(400,400),(300,300),(200,200)]
_aug: Augmented dataset
'''
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("image_folder", help="Path to folder containing image chips (ie 'Image_Chips/' ")
parser.add_argument("json_filepath", help="Filepath to GEOJSON coordinate file")
parser.add_argument("-t", "--test_percent", type=float, default=0.333,
help="Percent to split into test (ie .25 = test set is 25% total)")
parser.add_argument("-s", "--suffix", type=str, default='t1',
help="Output TFRecord suffix. Default suffix 't1' will output 'xview_train_t1.record' and 'xview_test_t1.record'")
parser.add_argument("-a","--augment", type=bool, default=False,
help="A boolean value whether or not to use augmentation")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
#resolutions should be largest -> smallest. We take the number of chips in the largest resolution and make
#sure all future resolutions have less than 1.5times that number of images to prevent chip size imbalance.
#res = [(500,500),(400,400),(300,300),(200,200)]
res = [(300,300)]
AUGMENT = args.augment
SAVE_IMAGES = False
images = {}
boxes = {}
train_chips = 0
test_chips = 0
#Parameters
max_chips_per_res = 100000
train_writer = tf.python_io.TFRecordWriter("xview_train_%s.record" % args.suffix)
test_writer = tf.python_io.TFRecordWriter("xview_test_%s.record" % args.suffix)
coords,chips,classes,uids = wv.get_labels_w_id(args.json_filepath)
for res_ind, it in enumerate(res):
tot_box = 0
logging.info("Res: %s" % str(it))
ind_chips = 0
fnames = glob.glob(args.image_folder + "*.tif")
fnames.sort()
for fname in tqdm(fnames):
#Needs to be "X.tif", ie ("5.tif")
#Be careful!! Depending on OS you may need to change from '/' to '\\'. Use '/' for UNIX and '\\' for windows
name = fname.split("/")[-1]
#arr = wv.get_image(fname)
with rasterio.open(fname, "r") as r:
arr = r.read()
arr = np.rollaxis(arr,0,3)
im,box,classes_final = wv.chip_image(arr,coords[chips==name],classes[chips==name],it)
#Shuffle images & boxes all at once. Comment out the line below if you don't want to shuffle images
im,box,classes_final = shuffle_images_and_boxes_classes(im,box,classes_final)
split_ind = int(im.shape[0] * args.test_percent)
for idx, image in enumerate(im):
tf_example = tfr.to_tf_example(image,box[idx],classes_final[idx])
#Check to make sure that the TF_Example has valid bounding boxes.
#If there are no valid bounding boxes, then don't save the image to the TFRecord.
float_list_value = tf_example.features.feature['image/object/bbox/xmin'].float_list.value
if (ind_chips < max_chips_per_res and np.array(float_list_value).any()):
tot_box+=np.array(float_list_value).shape[0]
if idx < split_ind:
test_writer.write(tf_example.SerializeToString())
test_chips+=1
else:
train_writer.write(tf_example.SerializeToString())
train_chips += 1
ind_chips +=1
#Make augmentation probability proportional to chip size. Lower chip size = less chance.
#This makes the chip-size imbalance less severe.
prob = np.random.randint(0,np.max(res))
#for 200x200: p(augment) = 200/500 ; for 300x300: p(augment) = 300/500 ...
if AUGMENT and prob < it[0]:
for extra in range(3):
center = np.array([int(image.shape[0]/2),int(image.shape[1]/2)])
deg = np.random.randint(-10,10)
#deg = np.random.normal()*30
newimg = aug.salt_and_pepper(aug.gaussian_blur(image))
#.3 probability for each of shifting vs rotating vs shift(rotate(image))
p = np.random.randint(0,3)
if p == 0:
newimg,nb = aug.shift_image(newimg,box[idx])
elif p == 1:
newimg,nb = aug.rotate_image_and_boxes(newimg,deg,center,box[idx])
elif p == 2:
newimg,nb = aug.rotate_image_and_boxes(newimg,deg,center,box[idx])
newimg,nb = aug.shift_image(newimg,nb)
newimg = (newimg).astype(np.uint8)
if idx%1000 == 0 and SAVE_IMAGES:
Image.fromarray(newimg).save('process/img_%s_%s_%s.png'%(name,extra,it[0]))
if len(nb) > 0:
tf_example = tfr.to_tf_example(newimg,nb,classes_final[idx])
#Don't count augmented chips for chip indices
if idx < split_ind:
test_writer.write(tf_example.SerializeToString())
test_chips += 1
else:
train_writer.write(tf_example.SerializeToString())
train_chips+=1
else:
if SAVE_IMAGES:
aug.draw_bboxes(newimg,nb).save('process/img_nobox_%s_%s_%s.png'%(name,extra,it[0]))
if res_ind == 0:
max_chips_per_res = int(ind_chips * 1.5)
logging.info("Max chips per resolution: %s " % max_chips_per_res)
logging.info("Tot Box: %d" % tot_box)
logging.info("Chips: %d" % ind_chips)
logging.info("saved: %d train chips" % train_chips)
logging.info("saved: %d test chips" % test_chips)
train_writer.close()
test_writer.close()