-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpredict.py
235 lines (209 loc) · 9.06 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, ArgumentError
import os
from os.path import join
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import traceback
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.backend import set_image_data_format
from keras.models import model_from_json
import skimage
from PIL import Image
sys.path.append('./methods')
from trainCNN import get_preprocess
from preprocess import get_class_labels
def get_binary_pred(model_pred_df):
"""
Get probability of Urgent vs Non-Urgent diagnosis from predicted class label probabilities
:param model_pred_df: Dataframe containing multi-class probabilities for each sample
:return: Dataframe containing binary probabilities for each sample
"""
urgent_labels = ['CNV', 'DME']
urgent_cols = []
pred_urgent_df = pd.DataFrame(index=model_pred_df.index)
for col in model_pred_df.columns:
if (urgent_labels[0] in col) or (urgent_labels[1] in col):
urgent_cols.append(col)
assert (len(urgent_cols) == 2)
pred_urgent_df['urgent_proba'] = model_pred_df[urgent_cols[0]] + model_pred_df[urgent_cols[1]]
return pred_urgent_df
def mean_prediction(model_pred_df, y_vals=[0, 1, 2, 3]):
"""
output the mean probability predicted for each class
:param model_pred_df: dataframe containing the predictions
from all models for each class (pandas Dataframe)
:param y_vals: possible output classes (list like object)
:return: mean probability for each class (pandas dataframe)
"""
img_type_dict = get_class_labels(intKey=True)
mean_pred = pd.DataFrame(index=model_pred_df.index)
for yi in np.unique(y_vals):
mean = model_pred_df.filter(regex='_{}'.format(yi)).mean(axis=1)
mean_pred['proba_{}'.format(img_type_dict[yi])] = mean
mean_pred = mean_pred.div(mean_pred.sum(axis=1), axis=0)
return mean_pred
def load_model(model_name, ensemble_path):
"""
load keras model from json and model weights
:param model_name: name of model (str)
:param ensemble_path: path of directory containing
all models of the ensemble classifier (str)
:return: keras model, directory containing model files
"""
model_dirs = os.listdir(ensemble_path)
model_dirs = [d for d in model_dirs if os.path.isdir(join(ensemble_path, d))]
# find path to individual model
model_dir = None
for dir in model_dirs:
if model_name in dir:
model_dir = dir
model_dir_path = join(ensemble_path,
model_dir)
model_weights_path = join(model_dir_path,
model_name + ".hdf5")
# load json and create model
print('loading {} json'.format(model_name))
json_path = join(os.path.dirname(model_weights_path),
'{}_architecture.json'.format(model_name))
json_file = open(json_path, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights(model_weights_path)
print("Loaded {} json from disk".format(model_name))
return model, model_dir_path
def preprocess_imgs(x_path, new_size):
"""
preprocess images in the directory and return data
:param x_path: path to image directory containing .jpeg files (str)
:param new_size: desired shape of each image array (tuple - [x_res, y_res, channels])
:return: preprocessed image array (np array) [nImages, x_res, y_res, channels],
list of image names (list)
"""
img_files = os.listdir(x_path)
img_files = [f for f in img_files if f.endswith('.jpeg')]
img_stack = []
for img_fname in img_files:
img_path = os.path.join(x_path, img_fname)
img_arr = np.array(Image.open(img_path))
img_arr = skimage.transform.resize(img_arr, new_size)
img_arr = (img_arr - img_arr.min()) / img_arr.max()
img_stack.append(img_arr)
img_stack = np.stack(img_stack, axis=0)
img_names = [n.split('.')[0] for n in img_files] # include text before .jpeg file ending
return img_stack, img_names
def save_model_results(model_name, y_pred, img_names, model_dir_path, img_type_dict):
"""
save predictions of individual models
:param model_name: name of model (e.g. VGG16) (str)
:param y_pred: model predictions (npy array)
:param img_names: list of image filenames (list)
:param model_dir_path: path to the directory containing the model (str)
:param img_type_dict: mapping from integer labels to image labels (dict)
:return: dataframe containing model predictions (pandas DF)
"""
cols = ["{}_{}_{}".format(model_name, i, l)
for (l, i)
in img_type_dict.items()]
y_pred_df = pd.DataFrame(y_pred,
columns=cols,
index=img_names)
y_pred_df.to_csv(join(model_dir_path,
"{}_predictions.csv".format(model_name)))
return y_pred_df
def save_predictions(model_pred_dict, models, img_names, out_path):
"""
save the predictions of the ensemble classifier
:param model_pred_dict: dictionary containing the predictions of each model (dict)
:param models: list of model names (list)
:param img_names: list of image filenames (list)
:param out_path: directory where predictions are saved (str)
:return:
"""
# merge predictions into a single dataframe
model_pred_df = pd.DataFrame(index=img_names)
for modelName in models:
model_pred_df = pd.merge(model_pred_df,
model_pred_dict[modelName],
left_index=True,
right_index=True)
# calculate average probability for each class
mean_pred_df = mean_prediction(model_pred_df)
binary_pred_df = get_binary_pred(mean_pred_df)
# save dataframes to csv
model_pred_df.to_csv(join(out_path,
"individualModelPredictions.csv"))
mean_pred_df.to_csv(join(out_path,
"ensembleClfMeanProba.csv"))
binary_pred_df.to_csv(join(out_path,
"urgentProba.csv"))
def get_parser():
"""defines the parser for this script"""
module_parser = ArgumentParser(
formatter_class=ArgumentDefaultsHelpFormatter)
module_parser.add_argument("-i", dest="x_path", type=str,
help="input image directory")
module_parser.add_argument("-o", dest="out_path", type=str,
help="output dir path")
module_parser.add_argument("-m", dest="ensemble_path",
type=str,
help="path to model directory")
return module_parser
def main(x_path, out_path, ensemble_path):
"""
takes a directory of new images and runs inference with the classifier
:param x_path: path to directory of new images (str)
:param out_path: path to directory for predictions (str)
:param ensemble_path: path to metaClf directory (str)
:return: None
"""
"""############################################################################
0. Preprocess Data
############################################################################"""
set_image_data_format('channels_last')
img_type_dict = get_class_labels(intKey=True)
model_pred_dict = {}
# generate predictions for each individual model
models = ['InceptionV3', 'VGG16', 'ResNet50', 'Xception']
for model_name in models:
# chose which preprocessing method to use by detecting the model
preprocess_input = get_preprocess(model_name)
if model_name == "VGG16":
res = 224
else:
res = 299
new_size = (res, res, 3)
img_data, img_names = preprocess_imgs(x_path, new_size)
img_data = preprocess_input(img_data)
"""############################################################################
0. load model & predict
############################################################################"""
# load model
model, model_dir_path = load_model(model_name, ensemble_path)
# run inference
y_pred = model.predict(img_data,
batch_size=1,
verbose=1)
# save intermediate predictions to csv
y_pred_df = save_model_results(model_name, y_pred,
img_names, model_dir_path,
img_type_dict)
model_pred_dict[model_name] = y_pred_df
save_predictions(model_pred_dict, models, img_names, out_path)
if __name__ == "__main__":
parser = get_parser()
try:
args = parser.parse_args()
main(args.x_path,
args.out_path,
args.ensemble_path)
print('predict.py ... done!')
except ArgumentError as arg_exception:
traceback.print_exc()
except Exception as exception:
traceback.print_exc()
sys.exit()