-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidator.py
181 lines (137 loc) · 5.37 KB
/
validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# Model validation module. This module generates insights from the model performance results.
import pickle
import matplotlib.pyplot as plt
from os.path import join
import pandas as pd
import sklearn.svm
import numpy as np
import seaborn as sn
from classes.StatisticalClassifier import StatisticalClassifier
from util.plotter_util import plot_confusion_matrix, plot_bar_chart
# Letters
J_LETTER = "J"
Z_LETTER = "Z"
# IO
WRITE_BINARY = "wb"
READ_BINARY = "rb"
# Files and directories
FEATURES_SOURCE_FILE ="7500_features_dump.pkl"
MODEL_SOURCE_FILE = "trained_model.pkl"
FEATURES_DIRECTORY_ROUTE = "./features/"
MODEL_DIRECTORY_ROUTE = "./model"
# SVM
SVM_KERNEL = "linear"
# Labels and messages
OVERALL_SCORE = "Overall score: "
CONFUSION_TITLE = "Matriz de confusión para la clasificación del abecedario ASL"
CONFUSION_X_LABEL = "Predicciones"
CONFUSION_Y_LABEL = "Objetivo"
def load_object(load_path):
"""
Function that loads the list of features from disk to python
:param load_path: path in which the features are saved
:return: list of tuples containing observations and labels
"""
with open(join(load_path, FEATURES_SOURCE_FILE), READ_BINARY) as reader:
return pickle.load(reader)
def load_model(load_path):
"""
Function that load a pretrained model from disk
:param load_path: path in which the model was saved
:return: Classification model
"""
with open(load_path, READ_BINARY) as reader:
return pickle.load(reader)
def save_model(features, save_path):
"""
Function that saves an array of features and labels to a pickle file
:param save_path: path in which the features will be saved
:param features: array of features to save
"""
with open(join(save_path, MODEL_SOURCE_FILE), WRITE_BINARY) as writer:
writer.write(pickle.dumps(features))
def show_confusion_matrix(confusion_matrix):
sn.heatmap(confusion_matrix, annot=True, fmt='g')
plt.title(CONFUSION_TITLE)
plt.xlabel(CONFUSION_X_LABEL)
plt.ylabel(CONFUSION_Y_LABEL)
plt.show()
def get_support_vector_machine_model(vectors):
"""
Function that instances and trains the SVM model
:param vectors: dictionary of labels and feature vectors
:return: Trained SVM model
"""
# Instance the model
svm = sklearn.svm.SVC(kernel=SVM_KERNEL, C=1.0)
# Convert the data
x = []
y = []
for _class in vectors.keys():
x.extend(vectors[_class])
y.extend([_class]*len(vectors[_class]))
# Fit the model to the training data
svm.fit(x, y)
return svm
def get_classifier(trained_model_path=""):
"""
Function that instantiates the classifier, trains it and returns it
:param trained_model_path: if not empty, it will load a trained model
:return: classifier and test dataset
"""
if trained_model_path == "":
statisticalClassifier = StatisticalClassifier()
# Load default features
vectors = load_object(FEATURES_DIRECTORY_ROUTE)
# Split dataset into training and testing
train_dataset, test_dataset = statisticalClassifier.split(vectors)
# Train the classifier
statisticalClassifier.fit(train_dataset)
# return trained classifier and test dataset
return statisticalClassifier, test_dataset
else:
return load_model(trained_model_path)
def main_statistical():
"""
Procedure that retrieves features, trains the classifier and tests it
"""
statisticalClassifier, test_dataset = get_classifier()
score, confusion_matrix = statisticalClassifier.score(test_dataset)
show_confusion_matrix(confusion_matrix)
for i in range(confusion_matrix.shape[0]):
print(f"Class {i}: accuracy: {confusion_matrix[i, i] / np.sum(confusion_matrix[i])}")
print(OVERALL_SCORE, score)
save_model(statisticalClassifier, MODEL_DIRECTORY_ROUTE)
def main_SVM():
statisticalClassifier = StatisticalClassifier()
vectors = load_object(FEATURES_DIRECTORY_ROUTE)
train, test = statisticalClassifier.split(vectors)
svm = get_support_vector_machine_model(train)
# Convert the test data
x = []
y = []
for _class in test.keys():
x.extend(test[_class])
y.extend([_class]*len(test[_class]))
prediction = svm.predict(x)
classes_count = len(test.keys())
confusion_matrix = np.zeros((classes_count, classes_count))
test_keys = list(test.keys())
test_keys.sort()
class_map = {_class: index for index, _class in enumerate(test_keys)}
for i in range(len(prediction)):
confusion_matrix[class_map[y[i]], class_map[prediction[i]]] += 1
letters = list(filter(lambda c: c != J_LETTER and c != Z_LETTER, list(map(chr, range(65, 91)))))
confusion_matrix_data_frame = pd.DataFrame(confusion_matrix, index=[i for i in letters], columns=[i for i in letters])
plot_confusion_matrix(confusion_matrix_data_frame)
accuracy_level_letter = []
for i in range(confusion_matrix.shape[0]):
current_accuracy = confusion_matrix[i, i] / np.sum(confusion_matrix[i])
accuracy_level_letter.append(current_accuracy)
print(f"Class {i}: accuracy: {current_accuracy}")
print(OVERALL_SCORE, svm.score(x, y))
plot_bar_chart(letters, accuracy_level_letter)
# Example validation run: python ./validator.py
if __name__ == '__main__':
# This function will only be run if the current file is run directly
main_SVM()