forked from s3prl/s3prl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample_extract_finetune.py
68 lines (59 loc) · 3.11 KB
/
example_extract_finetune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*- #
"""*********************************************************************************************"""
# FileName [ example_extract_finetune.py ]
# Synopsis [ an example code of using the wrapper class for downstream feature extraction or finetune ]
# Author [ Andy T. Liu (Andi611) ]
# Copyright [ Copyleft(c), Speech Lab, NTU, Taiwan ]
"""*********************************************************************************************"""
###############
# IMPORTATION #
###############
import torch
from transformer.nn_transformer import TRANSFORMER
from downstream.model import example_classifier
from downstream.solver import get_optimizer
################
# EXAMPLE CODE #
################
# setup the transformer model
"""
`options`: a python dictionary containing the following keys:
ckpt_file: str, a path specifying the pre-trained ckpt file
load_pretrain: str, ['True', 'False'], whether to load pre-trained weights
no_grad: str, ['True', 'False'], whether to have gradient flow over this class
dropout: float/str, use float to modify dropout value during downstream finetune, or use the str `default` for pre-train default values
spec_aug: str, ['True', 'False'], whether to apply SpecAugment on inputs (used for ASR training)
spec_aug_prev: str, ['True', 'False'], apply spec augment on input acoustic features if True, else apply on output representations (used for ASR training)
weighted_sum: str, ['True', 'False'], whether to use a learnable weighted sum to integrate hidden representations from all layers, if False then use the last
select_layer: int, select from all hidden representations, set to -1 to select the last (will only be used when weighted_sum is False)
permute_input: str, ['True', 'False'], this attribute is for the forward method. If Ture then input ouput is in the shape of (T, B, D), if False then in (B, T, D)
"""
options = {
'ckpt_file' : './result/result_transformer/tera/fmllrBase960-F-N-K-libri/states-1000000.ckpt',
'load_pretrain' : 'True',
'no_grad' : 'True',
'dropout' : 'default',
'spec_aug' : 'False',
'spec_aug_prev' : 'True',
'weighted_sum' : 'False',
'select_layer' : -1,
'permute_input' : 'False',
}
transformer = TRANSFORMER(options=options, inp_dim=40)
# setup your downstream class model
classifier = example_classifier(input_dim=768, hidden_dim=128, class_num=2).cuda()
# construct the optimizer
params = list(transformer.named_parameters()) + list(classifier.named_parameters())
optimizer = get_optimizer(params=params, lr=4e-3, warmup_proportion=0.7, training_steps=50000)
# forward
example_inputs = torch.zeros(3, 1200, 40) # A batch of spectrograms: (batch_size, time_step, feature_size)
reps = transformer(example_inputs) # returns: (batch_size, time_step, feature_size)
labels = torch.LongTensor([0, 1, 0]).cuda()
loss = classifier(reps, labels)
# update
loss.backward()
optimizer.step()
# save
PATH_TO_SAVE_YOUR_MODEL = 'example.ckpt'
states = {'Classifier': classifier.state_dict(), 'Transformer': transformer.state_dict()}
# torch.save(states, PATH_TO_SAVE_YOUR_MODEL)