forked from eriklindernoren/ML-From-Scratch
-
Notifications
You must be signed in to change notification settings - Fork 4
/
multilayer_perceptron.py
executable file
·126 lines (101 loc) · 4.13 KB
/
multilayer_perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#from sklearn import datasets
import sys
import os
import math
#import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Import helper functions
dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, dir_path + "/../utils")
from data_manipulation import train_test_split, categorical_to_binary, normalize, load_iris_dataset
from data_operation import accuracy_score
sys.path.insert(0, dir_path + "/../unsupervised_learning/")
from principal_component_analysis import PCA
# Activation function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Gradient of activation function
def sigmoid_gradient(x):
return sigmoid(x) * (1 - sigmoid(x))
class MultilayerPerceptron():
def __init__(self, n_hidden):
self.n_hidden = n_hidden # Number of hidden neurons
self.W = None # Hidden layer weights
self.V = None # Output layer weights
self.biasW = None # Hidden layer bias
self.biasV = None # Output layer bias
def fit(self, X, y, n_iterations=3000,
learning_rate=0.01, plot_errors=False):
# Convert the nominal y values to binary
y = categorical_to_binary(y)
n_samples, n_features = np.shape(X)
n_outputs = np.shape(y)[1]
# Initial weights between [-1/sqrt(N), 1/sqrt(N)]
a = -1 / math.sqrt(n_features)
b = -a
self.W = (b - a) * np.random.random((n_features, self.n_hidden)) + a
self.biasW = (b - a) * np.random.random((1, self.n_hidden)) + a
self.V = (b - a) * np.random.random((self.n_hidden, n_outputs)) + a
self.biasV = (b - a) * np.random.random((1, n_outputs)) + a
errors = []
for i in range(n_iterations):
# Calculate hidden layer
hidden_input = X.dot(self.W) + self.biasW
# Calculate output of hidden neurons
hidden_output = sigmoid(hidden_input)
# Calculate output layer
output_layer_input = hidden_output.dot(self.V) + self.biasV
output_layer_pred = sigmoid(output_layer_input)
# Calculate the error
error = y - output_layer_pred
mse = np.mean(np.power(error, 2))
errors.append(mse)
# Calculate loss gradients:
# Output layer weights V
v_gradient = -2 * (y - output_layer_pred) * \
sigmoid_gradient(output_layer_input)
biasV_gradient = v_gradient
# Hidden layer weights W
w_gradient = v_gradient.dot(
self.V.T) * sigmoid_gradient(hidden_input)
biasW_gradient = w_gradient
# Update weights
self.V -= learning_rate * hidden_output.T.dot(v_gradient)
self.biasV -= learning_rate * np.ones((1, n_samples)).dot(biasV_gradient)
self.W -= learning_rate * X.T.dot(w_gradient)
self.biasW -= learning_rate * np.ones((1, n_samples)).dot(biasW_gradient)
# Plot the training error
if plot_errors:
plt.plot(range(n_iterations), errors)
plt.ylabel('Training Error')
plt.xlabel('Iterations')
plt.show()
# Use the trained model to predict labels of X
def predict(self, X):
# Calculate the output of the hidden neurons
hidden_output = sigmoid(np.dot(X, self.W) + self.biasW)
# Set the class labels to the highest valued outputs
y_pred = np.argmax(sigmoid(np.dot(hidden_output, self.V) + self.biasV), axis=1)
return y_pred
def main():
data=load_iris_dataset(dir_path + r"/../data/iris.csv")
X=data['X']
y=data['target']
X = normalize(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
# MLP
clf = MultilayerPerceptron(n_hidden=10)
clf.fit(
X_train,
y_train,
n_iterations=4000,
learning_rate=0.01,
plot_errors=True)
y_pred = clf.predict(X_test)
print "Accuracy:", accuracy_score(y_test, y_pred)
# Reduce dimension to two using PCA and plot the results
pca = PCA()
pca.plot_in_2d(X_test, y_pred)
if __name__ == "__main__":
main()