-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmodelRNN.lua
238 lines (227 loc) · 8.21 KB
/
modelRNN.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
----------------------------------------------------------------------
--
-- Deep time series learning: Analysis of Torch
--
-- Main functions for classification
--
----------------------------------------------------------------------
----------------------------------------------------------------------
-- Imports
require 'rnn'
require 'unsup'
require 'optim'
require 'torch'
local nninit = require 'nninit'
modelRNN = {};
----------------------------------------------------------------------
-- Handmade RNN
-- (From the Oxford course)
----------------------------------------------------------------------
function defineRNNLayer(input_size, rnn_size, params)
local n = params.n or 1
local dropout = params.dropout or 0
-- there are n+1 inputs (hiddens on each layer and x)
local inputs = {}
table.insert(inputs, nn.Identity()()) -- x
for L = 1,n do
table.insert(inputs, nn.Identity()()) -- prev_h[L]
end
local x, input_size_L
local outputs = {}
for L = 1,n do
local prev_h = inputs[L+1]
if L == 1 then
--x = OneHot(input_size)(inputs[1])
x = inputs[1];
input_size_L = input_size
else
x = outputs[(L-1)]
if dropout > 0 then x = nn.Dropout(dropout)(x) end -- apply dropout, if any
input_size_L = rnn_size
end
-- RNN tick
local i2h = nn.Linear(input_size_L, rnn_size)(x)
local h2h = nn.Linear(rnn_size, rnn_size)(prev_h)
local next_h = nn.Tanh()(nn.CAddTable(){i2h, h2h})
table.insert(outputs, next_h)
end
-- set up the decoder
local top_h = outputs[#outputs]
if dropout > 0 then top_h = nn.Dropout(dropout)(top_h) end
local proj = nn.Linear(rnn_size, input_size)(top_h)
local logsoft = nn.LogSoftMax()(proj)
table.insert(outputs, logsoft)
-- Creates the final module
return nn.gModule(inputs, outputs)
end
local modelRNN, parent = torch.class('modelRNN', 'modelClass')
function modelRNN:defineModel(structure, options)
-- Container
local model = nn.Sequential();
-- Hidden layers
for i = 1,structure.nLayers do
if i == 1 then nIn = self.windowSize; else nIn = structure.layers[i - 1]; end
-- Prepare one layer of reccurent computation
local r = nn.Recurrent(
structure.layers[i],
nn.Linear(nIn, structure.layers[i]),
nn.Linear(structure.layers[i], structure.layers[i]),
self.nonLinearity(),
self.rho
);
model:add(r);
-- Layer-wise linear transform
if self.layerwiseLinear then model:add(nn.Linear(structure.layers[i], structure.layers[i])) end
-- Batch normalization
if self.batchNormalize then model:add(nn.BatchNormalization(structure.layers[i])); end
-- Non-linearity
if self.addNonLinearity then model:add(self.nonLinearity()); end
-- Dropout
if self.dropout then model:add(nn.Dropout(self.dropout)); end
end
-- Final regression layer for classification
if self.sequencer then
-- Sequencer case simply needs to add a linear transform to number of classes
model:add(nn.Linear(structure.layers[structure.nLayers], structure.nOutputs))
rnnModel = nn.Sequencer(model);
model = nn.Sequential();
-- Number of windows we will consider
local nWins = torch.ceil((structure.nInputs - self.windowSize + 1) / self.windowStep)
-- Here we add the subsequencing trick
model:add(nn.SlidingWindow(2, self.windowSize, self.windowStep));
model:add(rnnModel);
model:add(nn.JoinTable(2));
model:add(nn.Linear(nWins * structure.nOutputs, structure.nOutputs));
else
-- Recursor case
rnnLayers = nn.Recursor(model, self.rho);
model = nn.Sequential()
-- Add the recurrent
model:add(rnnLayers);
-- Needs to reshape the data from all outputs
model:add(nn.Reshape(structure.layers[structure.nLayers]));
-- And then add linear transform to number of classes
model:add(nn.Linear(structure.layers[structure.nLayers], structure.nOutputs))
end
return model;
end
function modelRNN:definePretraining(structure, l, options)
--[[ Encoder part ]]--
local finalEncoder = nn.Sequential()
local encoder = nn.Sequential();
if l == 1 then
if (self.sequencer) then nIn = self.windowSize; else nIn = structure.nInputs end
else
nIn = structure.layers[l-1];
end
-- Prepare one layer of reccurent computation
local r = nn.Recurrent(
structure.layers[l],
nn.Linear(nIn, structure.layers[l]),
nn.Identity(),
self.nonLinearity(),
1e9--self.rho
);
-- Add the RNN modules to the network
encoder:add(r);
-- Layer-wise linear transform
if self.layerwiseLinear then encoder:add(nn.Linear(structure.layers[l], structure.layers[l])) end
-- Batch normalization
if self.batchNormalize then encoder:add(nn.BatchNormalization(structure.layers[l])); end
-- Non-linearity
if self.addNonLinearity then encoder:add(self.nonLinearity()); end
-- Dropout
if self.dropout then encoder:add(nn.Dropout(self.dropout)); end
-- Perform recurrent encoder
encoder = nn.Sequencer(encoder);
-- In the first layer we have to perform a Sliding Window
if l == 1 then
-- Number of windows we will consider
local nWins = torch.ceil((structure.nInputs - self.windowSize + 1) / self.windowStep)
-- Here we add the subsequencing trick
finalEncoder:add(nn.SlidingWindow(2, self.windowSize, self.windowStep));
end
finalEncoder:add(encoder);
--[[ Decoder part ]]--
local decoder = nn.Sequential()
local finalDecoder = nn.Sequential()
-- Prepare decoding layer of reccurent computation
local rDec = nn.Recurrent(
structure.layers[l],
nn.Linear(structure.layers[l],structure.layers[l]),
nn.Identity(),
self.nonLinearity(),
1e9--self.rho
);
decoder:add(nn.Sequencer(rDec));
decoder:add(nn.Sequencer(nn.Linear(structure.layers[l], nIn)))
finalDecoder:add(decoder);
-- In the first layer we have to join windows
if l == 1 then
-- Number of windows we will consider
local nWins = torch.ceil((structure.nInputs - self.windowSize + 1) / self.windowStep)
-- Here we add the subsequencing trick
finalDecoder:add(nn.JoinTable(2));
finalDecoder:add(nn.Linear(nWins * self.windowSize, structure.nInputs));
end
-- Construct an autoencoder
local model = unsup.AutoEncoder(finalEncoder, finalDecoder, options.beta);
-- We will need a sequencer criterion for deeper layers
if (l > 1) then model.loss = nn.SequencerCriterion(model.loss); end
-- Return the complete model
return model;
end
function modelRNN:retrieveEncodingLayer(model)
-- Retrieve only the encoding layer
encoder = model.encoder
return encoder
end
function modelRNN:weightsInitialize(model)
-- Find only the linear modules (including LSTM's)
linearNodes = model:findModules('nn.Linear')
for l = 1,#linearNodes do
module = linearNodes[l];
module:init('weight', self.initialize);
module:init('bias', self.initialize);
end
-- Initialize the batch normalization layers
for k,v in pairs(model:findModules('nn.BatchNormalization')) do
v.weight:fill(1)
v.bias:zero()
end
return model;
end
function modelRNN:weightsTransfer(model, trainedLayers)
-- Find both LSTM and linear modules
linearNodes = model:findModules('nn.Linear');
-- Current linear layer
local curLayer = 1;
for l = 1,#trainedLayers do
-- Find equivalent in pre-trained layer
lstmNodes = trainedLayers[l].encoder:findModules('nn.Linear');
for k = 1,#lstmNodes do
linearNodes[curLayer].weights = lstmNodes[k].weight;
linearNodes[curLayer].bias = lstmNodes[k].bias;
curLayer = curLayer + 1;
end
end
return model;
end
function modelRNN:parametersDefault()
self.initialize = nninit.xavier;
self.addNonLinearity = true;
self.layerwiseLinear = true;
self.nonLinearity = nn.ReLU;
self.batchNormalize = true;
self.sequencer = true;
self.pretrain = true;
self.windowSize = 16;
self.windowStep = 1;
self.dropout = 0.5;
self.rho = 4;
end
function modelRNN:parametersRandom()
-- All possible non-linearities
self.distributions.nonLinearity = {nn.HardTanh, nn.HardShrink, nn.SoftShrink, nn.SoftMax, nn.SoftMin, nn.SoftPlus, nn.SoftSign, nn.LogSigmoid, nn.LogSoftMax, nn.Sigmoid, nn.Tanh, nn.ReLU, nn.PReLU, nn.RReLU, nn.ELU, nn.LeakyReLU};
self.distributions.initialize = {nninit.normal, nninit.uniform, nninit.xavier, nninit.kaiming, nninit.orthogonal, nninit.sparse};
end