This repository has been archived by the owner on May 4, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathknn_classify.m
executable file
·118 lines (103 loc) · 3.16 KB
/
knn_classify.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
function [new_accu, train_accu] = knn_classify(train_data, train_label, new_data, new_label, K)
new_data_size = length(new_label);
train_data_size = length(train_label);
% Standardise
mean_train_data = mean(train_data);
std_train_data = std(train_data);
standardise_new_data = bsxfun(@minus, new_data, mean_train_data );
standardise_new_data = bsxfun(@rdivide, standardise_new_data, std_train_data);
%(new_data-mean_train_data)./std_train_data
%standardise_train_data = (new_data-mean_train_data)./std_train_data
standardise_train_data = bsxfun(@minus, train_data, mean_train_data );
standardise_train_data = bsxfun(@rdivide, standardise_train_data, std_train_data);
test_label = [];
all_distances = {};
for i =1:new_data_size
distances = [];
sample = standardise_new_data(i,:);
for j=1:train_data_size
distances(end+1) = norm(sample-standardise_train_data(j,:));
end
%all_distances{i} = (distances);
[sorted idx] = sort(distances, 'ascend');
indices= idx(1:K);
pivot = sorted(K);
k=K;
if pivot == sorted(K+1)
% search for all indices s
while sorted(k+1) ~= pivot
k=k+1;
indices(end+1)=k;
end
end
all_labels = [];
for j=1:length(indices)
all_labels(end+1) = train_label(indices(j));
end
test_label(end+1) = mode(all_labels);
end
%test_l = test_label;
new_accu=0;
train_accu = 0;
for i=1:new_data_size
if test_label(i) == new_label(i)
new_accu = new_accu+1;
end
end
new_accu = new_accu/new_data_size;
test_label = [];
all_distances = {};
for i =1:train_data_size
distances = [];
sample = standardise_train_data(i,:);
for j=1:train_data_size
if i ~= j
distances(end+1) = norm(sample-standardise_train_data(j,:));
end
end
all_distances{i} = distances;
end
for i=1:train_data_size
distances = cell2mat(all_distances(i));
[sorted idx] = sort(distances, 'ascend');
indices= idx(1:K);
pivot = sorted(K);
k=K;
if pivot == sorted(K+1)
% search for all indices s
while sorted(k+1) ~= pivot
k=k+1;
indices(end+1)=k;
end
end
all_labels = [];
for j=1:length(indices)
all_labels(end+1) = train_label(indices(j));
end
test_label(end+1) = mode(all_labels);
end
%train_l = test_label;
train_accu =0;
for i=1:train_data_size
if test_label(i) == train_label(i)
train_accu = train_accu+1;
end
end
train_accu = train_accu/(train_data_size);
end
% k-nearest neighbor classifier
% Input:
% train_data: N*D matrix, each row as a sample and each column as a
% feature
% train_label: N*1 vector, each row as a label
% new_data: M*D matrix, each row as a sample and each column as a
% feature
% new_label: M*1 vector, each row as a label
% K: number of nearest neighbors
%
% Output:
% new_accu: accuracy of classifying new_data
% train_accu: accuracy of classifying train_data (using leave-one-out
% strategy)
%
% CSCI 567: Machine Learning, Fall 2015, Homework 1