-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbag_of_words.py
72 lines (50 loc) · 1.96 KB
/
bag_of_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import confusion_matrix
import read_files
import vocabulary_helpers
import get_data
import plot_data
import pickle
# categories which have more than 100 images: 80 - train 20 -test.
# 1. airplanes.
# 2. chandelier.
# 3. motorbikes.
# categories which have moret than 90 images: 70 - train 20 - test - butterfly.
# 1. butterlfy.
categories = ['airplanes', 'chandelier', 'motorbikes', 'butterfly', 'revolver',
'spoon']
print("Training Images: ")
print("Readin image files: ")
train_dict = read_files.get_image_dict(categories, "train")
test_dict = read_files.get_image_dict(categories, "test")
print("Parsing image files into categories: ")
[train_imgs, train_labels] = get_data.get_images_and_labels(train_dict)
[test_imgs, test_labels] = get_data.get_images_and_labels(test_dict)
print("Generating vocabulary: ")
(f_vocabulary, i_vocab) = vocabulary_helpers.generate_vocabulary(train_imgs)
(t_f_vocabulary, t_i_vocab) = vocabulary_helpers.generate_vocabulary(test_imgs)
n_clusters = 1000
# kmeans = vocabulary_helpers.generate_clusters(f_vocabulary, n_clusters)
kmeans = pickle.load(open("bov_pickle_1000.sav", 'rb'))
print("generating features: ")
print("Creating feature vectors for test and train images from vocabulary set.")
train_data = vocabulary_helpers.generate_features(i_vocab, kmeans, n_clusters)
test_data = vocabulary_helpers.generate_features(t_i_vocab, kmeans, n_clusters)
print("Applying SVM classifier.")
# SVM Classifier.
clf = svm.SVC()
fitted = clf.fit(train_data, train_labels)
predict = clf.predict(test_data)
print("Actual:")
print(test_labels)
print("predicted:")
print(predict)
# Confusion matrix.
test_labels = np.asarray(test_labels)
cnf_matrix = confusion_matrix(predict, test_labels)
np.set_printoptions(precision=2)
plot_data.plot_confusion_matrix(cnf_matrix, classes=categories,
title='Confusion matrix')
plt.show()