-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3_LR_NN_L2.py
133 lines (118 loc) · 5.3 KB
/
3_LR_NN_L2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 14 11:33:25 2017
@author: dhingratul
"""
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
def unPickle(pickle_file):
"""
Unpickles the data file into tr, te, and validation data
"""
with open(pickle_file, 'rb') as f:
datasets = pickle.load(f)
test_dataset = datasets['test_dataset']
test_labels = datasets['test_labels']
train_dataset = datasets['train_dataset']
train_labels = datasets['train_labels']
valid_dataset = datasets['valid_dataset']
valid_labels = datasets['valid_labels']
return test_dataset, test_labels, train_dataset, train_labels,\
valid_dataset, valid_labels
pickle_file = "/home/dhingratul/Documents/Dataset/notMNIST.pickle"
test_dataset, test_labels, train_dataset, train_labels, valid_dataset,\
valid_labels = unPickle(pickle_file)
"""
Reformat data as per the requirements of the program, data as a flat matrix,
and label as one hot encoded vector
"""
image_size = 28
num_labels = 10
def reformat(data, labels):
"""
Converts the data into a flat matrix, and labels into one-hot encoding
"""
data = data.reshape((-1, image_size * image_size)).astype(np.float32)
# -1:size being inferred from the parameters being passed
labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
return data, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
# Training with tf
batch_size = 128
num_hidden_units = 1024
graph = tf.Graph()
with graph.as_default():
# Use placeholder instead, that is fed at run time
tf_train_data = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32,
shape=(batch_size, num_labels))
tf_valid_data = tf.constant(valid_dataset)
tf_test_data = tf.constant(test_dataset)
# Variables are the parameters that are trained: Weights and Biases
# Initialize weights to random values, using truncated normal distribution
weights1 = tf.Variable(
tf.truncated_normal([image_size * image_size, num_hidden_units]))
biases1 = tf.Variable(tf.zeros([num_hidden_units]))
weights2 = tf.Variable(
tf.truncated_normal([num_hidden_units, num_labels]))
biases2 = tf.Variable(tf.zeros([num_labels]))
# Training computation
logits_1 = tf.nn.relu(tf.matmul(tf_train_data, weights1) + biases1)
logits_2 = tf.matmul(logits_1, weights2) + biases2
# Softmax loss
loss_intermediate = tf.nn.softmax_cross_entropy_with_logits(
labels=tf_train_labels, logits=logits_2)
# L2 Regularization: L' = L + \beta * L_2
beta = 0.01
regularizer = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2)
# Take mean over the loss
avg_loss = tf.reduce_mean(loss_intermediate + beta * regularizer)
# Gradient Descent Optimizer
lr = 0.5 # Learning rate
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(avg_loss)
# Predictions
train_pred = tf.nn.softmax(logits_2)
valid_logits_1 = tf.nn.relu(tf.matmul(tf_valid_data, weights1) + biases1)
valid_logits_2 = tf.matmul(valid_logits_1, weights2) + biases2
valid_pred = tf.nn.softmax(valid_logits_2)
test_logits_1 = tf.nn.relu(tf.matmul(tf_test_data, weights1) + biases1)
test_logits_2 = tf.matmul(test_logits_1, weights2) + biases2
test_pred = tf.nn.softmax(test_logits_2)
def accuracy(predictions, labels):
""" Outputs the accuracy based on gnd truth and predicted labels"""
return (100 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) /
labels.shape[0])
# Initialize the graph defined above
step_size = 3001
with tf.Session(graph=graph) as session:
# Initialize weights
tf.global_variables_initializer().run()
print("Initialized")
for step in range(step_size):
# Pick a randomized offset within training data
offset = ((step * batch_size) % (train_labels.shape[0] - batch_size))
# Generate a mini-batch`
mb_data = train_dataset[offset:(offset + batch_size), :]
mb_labels = train_labels[offset:(offset + batch_size), :]
# Create a Dictionary to feed to mini batch
feed_dict = {tf_train_data: mb_data, tf_train_labels: mb_labels}
_, l, pred = session.run([optimizer, avg_loss, train_pred],
feed_dict=feed_dict)
if step % 500 == 0:
print("MB Loss at step %d: %f" % (step, l))
print("MB Accuracy: %0.1f%%"
% accuracy(pred, mb_labels))
# Calling .eval() on valid_prediction is basically like calling
# run(), but just to get that one numpy array Note that it
# recomputes all its graph dependencies.
print('Validation accuracy: %.1f%%' % accuracy(valid_pred.eval(),
valid_labels))
print('Test accuracy: %.1f%%' % accuracy(test_pred.eval(),
test_labels))