-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathhidden_rnn.py
171 lines (123 loc) · 6.82 KB
/
hidden_rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
'''
This code explains the hidden states and outputs of rnn.
This is code reference for the blog post: https://graviraja.github.io/unwraprnn/
'''
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch import LongTensor
# create a dummy data of batch_size = 3
data = ['long_str', 'tiny', 'medium']
# create the vocabulary
vocab = ['<pad>'] + sorted(set([char for seq in data for char in seq]))
# vocab = ['<pad>', '_', 'd', 'e', 'g', 'i', 'l', 'm', 'n', 'o', 'r', 's', 't', 'u', 'y']
# convert into numerical form
vectorized_data = [[vocab.index(tok) for tok in seq] for seq in data]
# vectorized_data = [[6, 9, 8, 4, 1, 11, 12, 10], [12, 5, 8, 14], [7, 3, 2, 5, 13, 7]]
# prepare data, by padding with 0 (<pad> token), making the batch equal lengths
seq_lengths = LongTensor([len(seq) for seq in vectorized_data])
sequence_tensor = Variable(torch.zeros(len(vectorized_data), seq_lengths.max(), dtype=torch.long))
for idx, (seq, seq_len) in enumerate(zip(vectorized_data, seq_lengths)):
sequence_tensor[idx, :seq_len] = LongTensor(seq)
# sequence_tensor = ([[ 6, 9, 8, 4, 1, 11, 12, 10],
# [12, 5, 8, 14, 0, 0, 0, 0],
# [ 7, 3, 2, 5, 13, 7, 0, 0]])
# convert the input into time major format
sequence_tensor = sequence_tensor.t()
# sequence_tensor shape => [max_len, batch_size]
input_dim = len(vocab)
print(f"Length of vocab : {input_dim}")
# hidden dimension in the RNN
hidden_dim = 5
# embedding dimension
embedding_dim = 5
class Single_Layer_Uni_Directional_RNN(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers, bidirectional):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional)
def forward(self, input):
# input shape => [max_len, batch_size]
embed = self.embedding(input)
# embed shape => [max_len, batch_size, embedding_dim]
output, hidden = self.rnn(embed)
# output shape => [max_len, batch_size, hidden_size]
# hidden shape => [1, batch_size, hidden_size]
return output, hidden
n_layers = 1
bidirectional = False
model = Single_Layer_Uni_Directional_RNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional)
output, hidden = model(sequence_tensor)
print(f"Input shape is : {sequence_tensor.shape}")
print(f"Output shape is : {output.shape}")
print(f"Hidden shape is : {hidden.shape}")
assert (output[-1, :, :] == hidden[0]).all(), "Final output must be same as Hidden state in case of Single layer uni-directional RNN"
class Multi_Layer_Uni_Directional_RNN(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers, bidirectional):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional)
def forward(self, input):
# input shape => [max_len, batch_size]
embed = self.embedding(input)
# embed shape => [max_len, batch_size, embedding_dim]
output, hidden = self.rnn(embed)
# output shape => [max_len, batch_size, hidden_size]
# hidden shape => [1, batch_size, hidden_size]
return output, hidden
n_layers = 2
bidirectional = False
model = Multi_Layer_Uni_Directional_RNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional)
output, hidden = model(sequence_tensor)
print(f"Input shape is : {sequence_tensor.shape}")
print(f"Output shape is : {output.shape}")
print(f"Hidden shape is : {hidden.shape}")
assert (output[-1, :, :] == hidden[-1]).all(), "Final output must be same as Final Hidden state in case of Multi layer uni-directional RNN"
class Single_Layer_Bi_Directional_RNN(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers, bidirectional):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional)
def forward(self, input):
# input shape => [max_len, batch_size]
embed = self.embedding(input)
# embed shape => [max_len, batch_size, embedding_dim]
output, hidden = self.rnn(embed)
# output shape => [max_len, batch_size, hidden_size * 2] => since forward and backward outputs are stacked
# hidden shape => [2, batch_size, hidden_size]
return output, hidden
n_layers = 1
bidirectional = True
model = Single_Layer_Bi_Directional_RNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional)
output, hidden = model(sequence_tensor)
print(f"Input shape is : {sequence_tensor.shape}")
print(f"Output shape is : {output.shape}")
print(f"Hidden shape is : {hidden.shape}")
assert (output[-1, :, :hidden_dim] == hidden[0]).all(), "First hidden_dim of output at last time step must be same as Final Forward Hidden state in case of Single layer bi-directional RNN"
assert (output[0, :, hidden_dim:] == hidden[-1]).all(), "Last hidden_dim of output at initial time step must be same as Final Backward Hidden state in case of Single layer bi-directional RNN"
class Multi_Layer_Bi_Directional_RNN(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers, bidirectional):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional)
def forward(self, input):
# input shape => [max_len, batch_size]
embed = self.embedding(input)
# embed shape => [max_len, batch_size, embedding_dim]
output, hidden = self.rnn(embed)
# output shape => [max_len, batch_size, hidden_size * 2] => since forward and backward outputs are stacked
# hidden shape => [num_layers * 2, batch_size, hidden_size]
return output, hidden
n_layers = 2
bidirectional = True
model = Multi_Layer_Bi_Directional_RNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional)
output, hidden = model(sequence_tensor)
print(f"Input shape is : {sequence_tensor.shape}")
print(f"Output shape is : {output.shape}")
print(f"Hidden shape is : {hidden.shape}")
batch_size = sequence_tensor.shape[1]
hidden = hidden.view(n_layers, 2, batch_size, hidden_dim)
print(f"Reshaped hidden shape is : {hidden.shape}")
assert (output[-1, :, :hidden_dim] == hidden[n_layers - 1][0]).all(), "First hidden_dim of output at last time step must be same as Final Forward Hidden state of final layer in case of Multi layer bi-directional RNN"
assert (output[0, :, hidden_dim:] == hidden[n_layers - 1][1]).all(), "Last hidden_dim of output at initial time step must be same as Final Backward Hidden state of final layer in case of Multi layer bi-directional RNN"