-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPurchase.m
135 lines (106 loc) · 4.44 KB
/
Purchase.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
% Initialization
clear
clc
hold off
close all
% Import Data Set
Data = readtable('Social_Network_Ads.csv');
% From Table to matrix
Data = Data{:,:};
% Select Training Set
%Train_Data = Data(1:length(Data)/2 ,2:end);
Train_Data = Data(mod(Data(:,3),2) == 0, 2:end);
% Select Test Set Odd Age people only
Test_Data = Data(mod(Data(:,3),2) == 1, 2:end);
% Initialized value of Thetas
Theta = zeros(3,1);
% Feature Scaling
global X_Data
X_Data = [ones(1,length(Train_Data)); Train_Data(:, 2)'; Train_Data(:, 3)'];
X_Data = [X_Data(1, :); (X_Data(2, :) - mean(Train_Data(:, 2))) ./ (max(Train_Data(:, 2)) - min(Train_Data(:, 2))); (X_Data(3, :) - mean(Train_Data(:, 3))) ./ (max(Train_Data(:, 3)) - min(Train_Data(:, 3)))];
% Defining the output
global Y_Data
Y_Data = Train_Data(:,end);
% Number of Iterations For Gradient Descent
Iter_Times = 100;
% MATLAB will find best Thetas for me
% Setting optemization options
options = optimset('GradObj', 'on', 'MaxIter', Iter_Times);
% Start optimization to find Thetas
[FinalThetas, CostVal, Flagg] = fminunc(@Cost_Function, Theta, options);
% Plot Training data with the result of classification
figure(1)
X_Plot = -1: 0.05: 1;
Y_Plot = -(FinalThetas(1) + X_Plot * FinalThetas(2)) / (FinalThetas(3));
plot(X_Plot, Y_Plot, '-k');
set(gcf,'name','Training set with output classifier')
hold on
% Scale Training Set to match scaled features and Thetas
Train_Data(:, 2) = (Train_Data(:, 2) - mean(Train_Data(:, 2))) ./ (max(Train_Data(:, 2)) - min(Train_Data(:, 2)));
Train_Data(:, 3) = (Train_Data(:, 3) - mean(Train_Data(:, 3))) ./ (max(Train_Data(:, 3)) - min(Train_Data(:, 3)));
% Initialize data to scatter "To be available to legend with no errors"
Purchasing_Coordinates = zeros(1,2);
Not_Purchasing_Coordinates = zeros(1,2);
for i = 1:length(Train_Data)
if Train_Data(i,4) == 1
Purchasing_Coordinates(end + 1, :) = [Train_Data(i,2) Train_Data(i,3)];
else
Not_Purchasing_Coordinates(end + 1, :) = [Train_Data(i,2) Train_Data(i,3)];
end
hold on
end
% Plotting data
scatter(Purchasing_Coordinates(2:end, 1), Purchasing_Coordinates(2:end ,2), 'r')
scatter(Not_Purchasing_Coordinates(2:end ,1), Not_Purchasing_Coordinates(2:end ,2), 'g')
% Labeling the figure
title('Training set with result of Logistic Regression')
xlabel('Age (After scaling)')
ylabel('Salary (After scaling)')
legend('Classifier Edge','Will purchase','Will not Purchase')
% Scale Test Set to match scaled features and Thetas
Test_Data(:, 2) = (Test_Data(:, 2) - mean(Test_Data(:, 2))) ./ (max(Test_Data(:, 2)) - min(Test_Data(:, 2)));
Test_Data(:, 3) = (Test_Data(:, 3) - mean(Test_Data(:, 3))) ./ (max(Test_Data(:, 3)) - min(Test_Data(:, 3)));
% Plot Test data with the result of classification
figure(2)
plot(X_Plot, Y_Plot, '-k');
set(gcf,'name','Test set with output classifier')
hold on
% Initialize data to scatter "To be available to legend with no errors"
Purchasing_Coordinates = zeros(1,2);
Not_Purchasing_Coordinates = zeros(1,2);
% Preparing the data to measure the accuracy of the model
Acc_Test_Data = [ones(length(Test_Data(:,1)),1) Test_Data(:,2) Test_Data(:,3)]';
% For Accuracy Test
Right_Classification = 0;
Wrong_Classification = 0;
for i = 1:length(Test_Data)
H_Theta = FinalThetas' * Acc_Test_Data(:,i);
H_Theta = 1 / (1 + exp(-H_Theta));
if Test_Data(i,4) == 1
Purchasing_Coordinates(end + 1, :) = [Test_Data(i,2) Test_Data(i,3)];
if H_Theta >= 0.5
Right_Classification = Right_Classification + 1;
else
Wrong_Classification = Wrong_Classification + 1;
end
else
Not_Purchasing_Coordinates(end + 1, :) = [Test_Data(i,2) Test_Data(i,3)];
if H_Theta < 0.5
Right_Classification = Right_Classification + 1;
else
Wrong_Classification = Wrong_Classification +1;
end
end
hold on
end
% Plotting data
scatter(Purchasing_Coordinates(2:end, 1), Purchasing_Coordinates(2:end ,2), 'y')
scatter(Not_Purchasing_Coordinates(2:end ,1), Not_Purchasing_Coordinates(2:end ,2), 'c')
% Labeling the figure
title('Test set with result of Logistic Regression')
xlabel('Age (After scaling)')
ylabel('Salary (After scaling)')
legend('Classifier Edge','Will purchase','Will not Purchase')
% Calculating the Accuracy
Accuracy = Right_Classification / (Right_Classification + Wrong_Classification);
fprintf("For the choosen test set the accuracy = %0.2f%c \n", Accuracy*100, '%');