-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclassify_intent.py
177 lines (142 loc) · 6.38 KB
/
classify_intent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import sys
from argparse import ArgumentParser
import torch
import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from torch import nn
from torch.optim import Adam, SGD
import numpy as np
from dataset.dataloader_classifier import Dataset
from dataset.config import Config
from models.transformer import Transformer
def go(arg):
config = Config()
train_file = 'sample-data/200410_train_stratshuf_english.csv'
test_file = 'sample-data/200410_test_stratshuf_chinese_200410_english.csv'
dataset = Dataset(config)
dataset.load_data(train_file, test_file)
NUM_CLS = 338 # taken from the bert code
vocab_size = len(dataset.vocab)
# create the model
model = Transformer(k=arg.dim_model, heads=arg.num_heads, depth=arg.depth,
num_tokens=vocab_size, num_classes=NUM_CLS)
use_cuda = torch.cuda.is_available() and not arg.cpu
device = torch.device("cuda:0" if use_cuda else "cpu")
model = model.to(device)
# opt = Adam(params=model.parameters(), lr=arg.lr, betas=(0.9, 0.999), eps=1e-8,
# weight_decay=0, amsgrad=False)
opt = SGD(params=model.parameters(), lr=arg.lr)
sch = torch.optim.lr_scheduler.LambdaLR(opt, lambda i: min(i / (arg.lr_warmup / config.batch_size), 1.0))
# training loop
from torch.utils.tensorboard import SummaryWriter
# default `log_dir` is "runs" - we'll be more specific here
tbw = SummaryWriter('runs/experiment_1')
for e in range(arg.num_epochs):
print(f'\n epoch {e}')
model.train(True)
total_loss = 0.0
seen = 0
for batch in tqdm.tqdm(dataset.train_iterator):
opt.zero_grad()
if torch.cuda.is_available():
x = batch.text.cuda()
y = (batch.label).type(torch.cuda.LongTensor)
else:
x = batch.text
y = (batch.label).type(torch.LongTensor)
out = model(x.permute(1,0))
# print(y.size())
# print(out.size())
loss = F.nll_loss(out, y)
total_loss += loss.data
loss.backward()
# clip gradients
# - If the total gradient vector has a length > 1, we clip it back down to 1.
if arg.gradient_clipping > 0.0:
nn.utils.clip_grad_norm_(model.parameters(), arg.gradient_clipping)
opt.step()
sch.step(epoch=e)
seen += x.size(0)
tbw.add_scalar('classification/train-loss', e, total_loss/seen)
print('classification/train-loss:', total_loss/seen)
with torch.no_grad():
model.train(False)
all_preds = []
all_y = []
for batch in dataset.val_iterator:
if torch.cuda.is_available():
x = batch.text.cuda()
y = batch.label
else:
x = batch.text
y = batch.label
out = model(x.permute(1, 0))
# print(out.size())
predicted = out.argmax(dim=1)
# print(predicted.size())
all_preds.extend(predicted.numpy())
all_y.extend(batch.label.numpy())
score = accuracy_score(all_y, np.array(all_preds).flatten())
print(f'-- {"test" if arg.final else "validation"} accuracy {score}')
# tbw.add_scalar('classification/test-loss', float(loss.item()), e)
for batch in dataset.test_iterator:
input = batch.text[0]
label = batch.label - 1
print(input)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-e", "--num-epochs",
dest="num_epochs",
help="Number of epochs.",
default=80, type=int)
parser.add_argument("-b", "--batch-size",
dest="batch_size",
help="The batch size.",
default=4, type=int)
parser.add_argument("-l", "--learn-rate",
dest="lr",
help="Learning rate",
default=0.01, type=float)
parser.add_argument("-T", "--tb_dir", dest="tb_dir",
help="Tensorboard logging directory",
default='./runs')
parser.add_argument("-f", "--final", dest="final",
help="Whether to run on the real test set (if not included, the validation set is used).",
action="store_true")
parser.add_argument("--cpu", dest="cpu",
help="Use CPU computation.).",
action="store_true")
parser.add_argument("--max-pool", dest="max_pool",
help="Use max pooling in the final classification layer.",
action="store_true")
parser.add_argument("-E", "--dim_model", dest="dim_model",
help="Size of the character embeddings.",
default=128, type=int)
parser.add_argument("-V", "--vocab-size", dest="vocab_size",
help="Number of words in the vocabulary.",
default=50_000, type=int)
parser.add_argument("-M", "--max", dest="max_length",
help="Max sequence length. Longer sequences are clipped (-1 for no limit).",
default=512, type=int)
parser.add_argument("-H", "--heads", dest="num_heads",
help="Number of attention heads.",
default=8, type=int)
parser.add_argument("-d", "--depth", dest="depth",
help="Depth of the network (nr. of self-attention layers)",
default=6, type=int)
parser.add_argument("-r", "--random-seed",
dest="seed",
help="RNG seed. Negative for random",
default=1, type=int)
parser.add_argument("--lr-warmup",
dest="lr_warmup",
help="Learning rate warmup.",
default=10_000, type=int)
parser.add_argument("--gradient-clipping",
dest="gradient_clipping",
help="Gradient clipping.",
default=1.0, type=float)
options = parser.parse_args()
print('OPTIONS ', options)
go(options)