forked from ArrasL/LRP_for_LSTM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
comparison_keras_leila.py
133 lines (110 loc) · 5.26 KB
/
comparison_keras_leila.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from keras.layers import LSTM, Dense, Bidirectional
from keras.models import Sequential
import pickle as pkl
from code.LSTM.LSTM_bidi import *
import codecs
def get_test_sentence(sent_idx):
"""Returns a test set sentence and its label, sent_idx must be an integer in [1, 2210]"""
idx = 1
with codecs.open("./data/sequence_test.txt", 'r', encoding='utf8') as f:
for line in f:
line = line.rstrip('\n')
line = line.split('\t')
label = int(line[0])-1 # true sentence class
words = line[1].split(' | ') # sentence words
if idx == sent_idx:
return words, label
idx +=1
# the input,forget, cell and output gate are all saved in one matrix. Order between leilas implementation and keras
# differs, i.e. second and third quarter of the matrix have to be switched and the matrix has to be transposed
def keras_matrix_to_leila_matrix(keras_weights):
no_units = int(keras_weights.shape[1]/4)
sorted_weight_matrix = np.zeros(shape=keras_weights.shape)
# input gate
sorted_weight_matrix[:, :no_units] = keras_weights[:, :no_units]
# cell gate / g gate
sorted_weight_matrix[:, no_units:2*no_units] = keras_weights[:, 2*no_units:3*no_units]
# forget gate
sorted_weight_matrix[:, 2*no_units:3*no_units] = keras_weights[:, no_units:2*no_units]
# output gate
sorted_weight_matrix[:, 3*no_units:] = keras_weights[:, 3*no_units:]
return np.transpose(sorted_weight_matrix)
# bias vectors are in the wrong order aswell so we swap the 2nd and 3rd quarter
def keras_bias_to_leila_bias(keras_bias):
no_units = int(len(keras_bias)/4)
sorted_bias = np.zeros(shape=keras_bias.shape)
# input gate
sorted_bias[:no_units] = keras_bias[:no_units]
# cell gate / g gate
sorted_bias[no_units:2*no_units] = keras_bias[2*no_units:3*no_units]
# forget gate
sorted_bias[2*no_units:3*no_units] = keras_bias[no_units:2*no_units]
# output gate
sorted_bias[3*no_units:] = keras_bias[3*no_units:]
return sorted_bias
# convert keras model to a dict suitable for leila lstm model representation
def keras_model_to_leila_dict(model):
leila_dict = {}
keras_weights = model.get_weights()
# sort the keras weight matrix for recurrent units to fit into leila format
leila_dict['Wxh_Left'] = keras_matrix_to_leila_matrix(keras_weights[0])
leila_dict['Whh_Left'] = keras_matrix_to_leila_matrix(keras_weights[1])
leila_dict['Wxh_Right'] = keras_matrix_to_leila_matrix(keras_weights[3])
leila_dict['Whh_Right'] = keras_matrix_to_leila_matrix(keras_weights[4])
# the biases of the recurrent units have to be split to fit into the two bias approach
leila_dict['bxh_Left'] = 0.5 * keras_bias_to_leila_bias(keras_weights[2])
leila_dict['bhh_Left'] = 0.5 * keras_bias_to_leila_bias(keras_weights[2])
leila_dict['bxh_Right'] = 0.5 * keras_bias_to_leila_bias(keras_weights[5])
leila_dict['bhh_Right'] = 0.5 * keras_bias_to_leila_bias(keras_weights[5])
# finally the hidden layer matrix just has to be transposed
no_features = keras_weights[6].shape[0]
leila_dict['Why_Left'] = np.transpose(keras_weights[6][:int(no_features/2), :])
leila_dict['Why_Right'] =np.transpose(keras_weights[6][int(no_features/2):, :])
return leila_dict
batch_size = 32
seq_len = 40
feature_size = 60
units = 60
words, target_class = get_test_sentence(291)
data = np.random.randn(batch_size, seq_len, feature_size)
# print(words)
# print(target_class)
model = Sequential()
model.add(Bidirectional(LSTM(units=units, activation='tanh', recurrent_activation='sigmoid'),
input_shape=(None, feature_size), merge_mode='concat'))
model.add(Dense(5, use_bias=False))
model.load_weights('model_comparison/random_weights')
net = LSTM_bidi(model_path='model_comparison/')
w_indices = [net.voc.index(w) for w in words]
#print(w_indices)
T = len(w_indices) # input word sequence length
e = net.E.shape[1] # word embedding dimension
x = np.zeros((T, e))
x[:, :] = net.E[w_indices, :]
net.set_input(w_indices)
leila_output = net.forward()
print('input_shape:', x.shape)
print('leila output shape:', leila_output.shape)
#print('leila hidden state vector left', net.h_Left)
#print('leila hidden state vector right', net.h_Right)
y = model.predict(x.reshape((1,T,e)))
#model.save_weights('model_comparison/random_weights')
#y = model.predict(data)
for i in range(len(model.layers)):
for e in zip(model.layers[i].trainable_weights, model.layers[i].get_weights()):
print('Param %s:\n%s' % (e[0], e[1].shape))
model.summary()
# converted_weights = keras_model_to_leila_dict(model)
# pkl.dump(converted_weights, open('model_comparison/model', 'wb'))
print('keras_model_output', y)
print('leila model output', leila_output)
eps = 0.001
bias_factor = 0.0 # recommended value
Rx, Rx_rev, R_rest = net.lrp(w_indices, target_class, eps, bias_factor) # LRP through the net
R_words = np.sum(Rx + Rx_rev, axis=1) # word relevances
scores = net.s.copy() # classification
print("prediction scores:", scores)
print("\nLRP target class: ", target_class)
print("\nLRP relevances:")
for idx, w in enumerate(words):
print("\t\t\t" + "{:8.2f}".format(R_words[idx]) + "\t" + w)