-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathmain_classify_feats.py
134 lines (107 loc) · 4.68 KB
/
main_classify_feats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
from random import shuffle
import numpy as np
from scipy.io import loadmat
from sklearn.svm import LinearSVC, NuSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import zscore
def generate_fold_indices(no_examples_per_class=np.asarray([2000, 4000]), no_class=2, no_folds=10,
train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
"""
Generates indices for k-fold cross validation.
"""
assert(test_ratio * no_folds == 1)
no_train = np.round(no_examples_per_class * train_ratio).astype(np.int)
no_val = np.round(no_examples_per_class * val_ratio).astype(np.int)
no_test = (no_examples_per_class - no_train - no_val).astype(np.int)
indices = {'train': np.empty((no_folds, sum(no_train)), dtype=np.int),
'val': np.empty((no_folds, sum(no_val)), dtype=np.int),
'test': np.empty((no_folds, sum(no_test)), dtype=np.int)}
shuffled_inds = [range(no_examples_per_class[0]), no_examples_per_class[0] + range(no_examples_per_class[1])]
shuffle(shuffled_inds[0])
shuffle(shuffled_inds[1])
for ind_fold in range(no_folds):
# class #1
copied_shuffled_inds = list(shuffled_inds[0])
indices['test'][ind_fold, :no_test[0]] = copied_shuffled_inds[ind_fold * no_test[0]:(ind_fold + 1) * no_test[0]]
del copied_shuffled_inds[ind_fold * no_test[0]:(ind_fold + 1) * no_test[0]]
shuffle(copied_shuffled_inds)
indices['train'][ind_fold, :no_train[0]] = copied_shuffled_inds[:no_train[0]]
indices['val'][ind_fold, :no_val[0]] = copied_shuffled_inds[no_train[0]:]
# class #2
copied_shuffled_inds = list(shuffled_inds[1])
indices['test'][ind_fold, no_test[0]:] = copied_shuffled_inds[ind_fold * no_test[1]:(ind_fold + 1) * no_test[1]]
del copied_shuffled_inds[ind_fold * no_test[1]:(ind_fold + 1) * no_test[1]]
shuffle(copied_shuffled_inds)
indices['train'][ind_fold, no_train[0]:] = copied_shuffled_inds[:no_train[1]]
indices['val'][ind_fold, no_val[0]:] = copied_shuffled_inds[no_train[1]:]
return indices
def main():
domains = ['IR', 'VL']
preprocess_methods = ['mean_subtraction', 'scaling']
#init_methods = ['random', 'ImageNet']
init_methods = ['ImageNet']
model_names = ['ResNet50', 'VGG19']
stages = ['stage_1', 'stage_2', 'stage_3', 'stage_4', 'stage_5']
#sampling_method = ['pca', 'uniform']
sampling_method = ['uniform']
classifiers = ['SVM', 'NB', 'RF']
indices = generate_fold_indices()
f = open(os.path.join(log_path, 'log.txt'), 'w')
for sampling_method in sampling_method:
out_dict = loadmat('feats_' + sampling_method + '.mat')
print sampling_method
f.write(sampling_method + '\n')
for domain in domains:
for preprocess_method in preprocess_methods:
for init_method in init_methods:
for model_name in model_names:
for ind_stage, stage in enumerate(stages):
name = domain + '_' + preprocess_method + '_' + init_method + '_' + model_name + '_' + stage
print name
f.write(name + '\n')
feats = out_dict[name + '_feats']
labels = out_dict[name + '_labels']
for classifier in classifiers:
conf_mat = [0, 0, 0, 0] # TN, FP, FN, TP
for ind_fold in range(10):
inds_train = np.concatenate((indices['train'][ind_fold], indices['val'][ind_fold]))
inds_test = indices['test'][ind_fold]
feats_train = feats[inds_train]
labels_train = labels[inds_train, 0]
feats_test = feats[inds_test]
labels_test = labels[inds_test, 0]
if classifier == 'SVM':
clf = LinearSVC()
elif classifier == 'NB':
clf = GaussianNB()
elif classifier == 'RF':
clf = RandomForestClassifier()
clf.fit(feats_train, labels_train)
preds = clf.predict(feats_test)
asd = feats_train[0,:]
asd = asd[np.newaxis, :]
import time
start = time.time()
clf.predict(asd)
end = time.time()
print(end - start)
print classifier
preds = preds.astype(np.bool)
labels_test = labels_test.astype(np.bool)
conf_mat[0] += np.sum(np.invert(preds) & np.invert(labels_test))
conf_mat[1] += np.sum(preds & np.invert(labels_test))
conf_mat[2] += np.sum(np.invert(preds) & labels_test)
conf_mat[3] += np.sum(preds & labels_test)
percent_error = 100 - ((conf_mat[0] + conf_mat[3]) / float(sum(conf_mat))) * 100
print classifier + ' ' + str(percent_error)
#print conf_mat
f.write(classifier + ' ' + str(percent_error) + '\n')
#f.write(str(conf_mat) + '\n')
f.close()
if __name__ == "__main__":
log_path = os.path.join('log', 'feat_class')
if not os.path.isdir(log_path):
os.mkdir(log_path)
main()