-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhparams.yaml
executable file
·112 lines (112 loc) · 2.31 KB
/
hparams.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
seed: 42
trainer:
gpus: 0
num_nodes: 1
max_epochs: 1
max_steps: null
accumulate_grad_batches: 4
gradient_clip_val: 0
amp_level: O0
precision: 32
checkpoint_callback: false
logger: false
log_every_n_steps: 1
val_check_interval: 1.0
reload_dataloaders_every_epoch: true
resume_from_checkpoint: null
exp_manager:
exp_dir: /home/nxingyu/project/
name: Punctuation_with_Domain_discriminator
create_tensorboard_logger: true
create_checkpoint_callback: true
base_path: /home/nxingyu/data
tmp_path: /home/nxingyu/data/tmp
model:
nemo_path: null
transformer_path: google/electra-base-discriminator
unfrozen: 0
maximum_unfrozen: 1
unfreeze_step: 1
punct_label_ids:
- ''
- '!'
- ','
- '-'
- .
- ':'
- ;
- '?'
- —
- …
punct_class_weights: false
dataset:
data_dir: /home/nxingyu/data
labelled:
- /home/nxingyu/data/ted_talks_processed
unlabelled: null
max_seq_length: 128
pad_label: ''
ignore_extra_tokens: false
ignore_start_end: false
use_cache: false
num_workers: 6
pin_memory: true
drop_last: true
num_labels: 10
num_domains: 1
test_unlabelled: true
attach_label_to_end: none
train_ds:
shuffle: true
num_samples: -1
batch_size: 4
validation_ds:
shuffle: true
num_samples: -1
batch_size: 4
tokenizer:
tokenizer_name: google/electra-base-discriminator
vocab_file: null
tokenizer_model: null
special_tokens: null
language_model:
pretrained_model_name: google/electra-base-discriminator
lm_checkpoint: null
config_file: null
config: null
punct_head:
punct_num_fc_layers: 1
fc_dropout: 0.1
activation: relu
log_softmax: false
use_transformer_init: true
loss: crf
bilstm: true
domain_head:
domain_num_fc_layers: 1
fc_dropout: 0.1
activation: relu
log_softmax: false
use_transformer_init: true
loss: cel
gamma: 0.01
pooling: mean_max
idx_conditioned_on: 0
dice_loss:
epsilon: 0.01
alpha: 3
macro_average: true
focal_loss:
gamma: 1
optim:
name: adamw
lr: 0.01
weight_decay: 0.0
sched:
name: CosineAnnealing
warmup_steps: null
warmup_ratio: 0.1
min_lr: 1.0e-10
last_epoch: -1
monitor: val_loss
reduce_on_plateau: false