-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseq_to_seq_addition.py
122 lines (103 loc) · 2.95 KB
/
seq_to_seq_addition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Activation,LSTM,TimeDistributed,RepeatVector,Dense
import numpy as np
from six.moves import range
class CharacterTable(object):
"""docstring for CharacterTable"""
def __init__(self, chars):
self.chars=sorted(set(chars))
self.char_indices=dict((c,i) for i,c in enumerate(self.chars))
self.indices_char=dict((i,c) for i,c in enumerate(self.chars))
def encode(self,s,num_rows):
"""turn the character into one hot"""
x=np.zeros((num_rows,len(self.chars)))
for i,c in enumerate(s):
x[i,self.char_indices[c]]=1
return x
def decode(self,x,calc_argmax=True):
if calc_argmax:
x=x.argmax(axis=-1)
return "".join(self.indices_char[i] for i in x)
class colors:
ok="\033[92m"
fail="\033[91m"
close="\033[0m"
TRAINING_SIZE=50000
DIGITS=3
REVERSE=True
MAXLEN=DIGITS+1+DIGITS
chars="0123456789+ "
ctable=CharacterTable(chars)
questions=[]
expected=[]
seen=set()
print("Begin data generating...")
while len(questions)<TRAINING_SIZE:
f=lambda:int("".join(
np.random.choice(list("0123456789")) for i in range(np.random.randint(1,DIGITS+1))
)
)
a,b=f(),f()
key=tuple(sorted((a,b)))
if key in seen:
continue
seen.add(key)
q="{}+{}".format(a,b)
query=q+' '*(MAXLEN-len(q))
ans=str(a+b)
ans+=' '*(DIGITS+1-len(ans))
if REVERSE:
query=query[::-1]
questions.append(query)
expected.append(ans)
print("Total addition questions:",len(questions))
print("Vectorizing...")
x=np.zeros((len(questions),MAXLEN,len(chars)),dtype=np.bool)
y=np.zeros((len(expected),DIGITS+1,len(chars)),dtype=np.bool)
for i in range(len(questions)):
x[i]=ctable.encode(questions[i],MAXLEN)
y[i]=ctable.encode(expected[i],DIGITS+1)
##Shuffle the indices
indices=np.arange(len(questions))
np.random.shuffle(indices)
x=x[indices]
y=y[indices]
split_at=int(0.8*len(x))
x_train,x_val=x[:split_at],x[split_at:]
y_train,y_val=y[:split_at],y[split_at:]
HIDDEN_SIZE=128
BATCH_SIZE=128
LAYERS=1
model=Sequential()
model.add(LSTM(HIDDEN_SIZE,input_shape=(MAXLEN,len(chars))))
model.add(RepeatVector(DIGITS+1))
for _ in range(LAYERS):
model.add(LSTM(HIDDEN_SIZE,return_sequences=True))
model.add(TimeDistributed(Dense(len(chars))))
model.add(Activation("softmax"))
model.compile(loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"])
model.summary()
for i in range(1,200):
print("Iteration ",i)
model.fit(x_train,y_train,
batch_size=BATCH_SIZE,
epochs=1,
validation_data=(x_val,y_val))
for j in range(10):
ind=np.random.randint(0,len(x_val))
q_,a_=x_val[ind],y_val[ind]
pred=model.predict_classes(np.expand_dims(q_,axis=0),verbose=0)
q=ctable.decode(q_)
true=ctable.decode(a_)
print(q[::-1] if REVERSE else q,end=' ')
print('= ?',end=' ')
print(true,end=' ')
guess=ctable.decode(pred[0],calc_argmax=False)
if guess==true:
print(colors.ok+'yes'+colors.close,end=' ')
else:
print(colors.fail+'no'+colors.close,end=' ')
print(guess)