-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunnerFineTuneQATfind-1.py
241 lines (190 loc) · 9.99 KB
/
runnerFineTuneQATfind-1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import comet_ml
from transformers import EarlyStoppingCallback
from enmt import RobustCallback, CometOneExperimentCallback, CometContinueExperimentCallback, \
TestRobustCallback
from enmt.datasets import EuroParl, OpenSubtitles
from enmt.model_wrapper import ModelWrapper, _test_translation
from enmt.results import Pipeline, Scenario
from copy import deepcopy
import os
from glob import glob
import re
"""
Searching for ideal point to continue QAT finetuning
Based on previous experiments - LR = 2e-4, train epochs = 2
Using checkpoints of FP_marian_6 model. Trained on EuParl,
this model https://www.comet.ml/marekninja/huggingface/2499fb4d213b4d2fa9e00be3469af540
model was trained for 100k steps - cca 10.6 epochs
For every checkpoint (every 5k steps) we do QA fine-tuning for 2 epochs.
By setting gpu_index and max_gpu_cards it is possible to run this distributed
Questions:
When to stop FP training and continue QA fine-tuning?
How does the robustness of the model change? (validation on out-of-domain dataset)
Can the QAT model achieve FP BLEU?
QAT Fine-Tune pre-trained Marian model on EuParl
1. Evaluate on Validation set
1.1 EuroParl
1.2 OpenSubs
2. FineTune on EuroParl - LEARNING RATE IS 2e-4, THOUGH IT IS MORE LIKE THE CONTINUATION OF TRAINING...
- FINETUNING FOR 2 EPOCHS
2.1 validate on EuroParl, OpenSubs
- validation every 200 steps on 400 validation examples (small validation set to allow for frequent validation)
- observe drop in validation BLEU on OpenSubs
Training dataset: Euro Parlament en-sk
Evaluation Euro Parl, Open Subs
metric_key_prefix format:
trainEuParlFP_EuParl_test_cpu
model specification:
scenario of model - train
dataset of model - Euparl
train mode - FP
current run specification:
current dataset - EuParl
current scenario - test
device - cpu
"""
"""
Running on LINUX
nvidia-smi -L
lists available cuda devices on system
use the number (might be different indexing)
CUDA_VISIBLE_DEVICES=5 python runnerAll.py
Profiling:
import torch
from torch.profiler import profile, record_function, ProfilerActivity
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
with record_function("model_inference"):
pipeEval.run()
"""
# smaller validation set - to allow for frequent metrics evalation
test_size = 40000
valid_size = 400
batch_size = 8
valid_batch_size = batch_size
eval_batch_size_gpu = batch_size
eval_batch_size_cpu = batch_size // 2
grad_acc_steps = 8
train_epochs = 2 # overiden by max_steps
warmup_steps = 0
eval_steps = 1000
save_steps = 1000
# max_steps = 125000# 250k update steps maximum, overides train_epochs...
max_steps = -1 # is negative => is not used; otherwise overides train_epochs
save_total_limit = 1
bn_freeze = int(
round((639158 / 64) * (3/8))) # 2/3 of all global steps, based on Pytorch tutorial should be bigger ten qpar_freeze
qpar_freeze = int(round((639158 / 64)* 0.25)) # 1/2 of all global steps
# checkpoints_dir = "./FP_marian_3/"
checkpoints_dir = "/mnt/local/disk1/klasifikace_reflexe/MT_petrovic/in_progress/FP_marian_6_a2680f17a7954ecf94d72c045a83bbf0_TRAIN_EVAL/"
experiment_name_template = "trainedFP-{step} QAfineTune EuParl"
max_gpu_cards = 3
gpu_index = 0 # should be from interval [0,max-1]
# test_size = 0.99995
# test_size = 0.999
# valid_size = 40
# batch_size = 2
# valid_batch_size = 2 * batch_size
# eval_batch_size_gpu = 2 * batch_size
# eval_batch_size_cpu = batch_size // 2
# grad_acc_steps = 1
# train_epochs = 2
# steps = (8000 * train_epochs) // (batch_size * grad_acc_steps)
# bn_freeze = int(round(steps*0.5)) # 1/2 of all global steps
# qpar_freeze = int(round(steps*(2/3))) # 2/3 of all global steps
# train = OpenSubtitles(test_size=test_size, valid_size=valid_size, seed=42)
dirs = sorted([f for f in glob(os.path.join(checkpoints_dir,"checkpoint-*"))], key= lambda x: int(re.findall(".*checkpoint-(\d+)",x)[0]))
dirs = dirs[gpu_index::max_gpu_cards]
# checkpoints = [c for d in dirs if os.path.isfile(c := os.path.join(d,"pytorch_model.bin"))]
print(dirs)
# print(checkpoints)
# exit()
for dir in dirs:
start_step = re.findall(".*checkpoint-(\d+)",dir)[0]
experiment_name = re.sub("{step}",start_step,experiment_name_template)
training_args = {"save_strategy": "no",
'per_device_eval_batch_size': valid_batch_size, 'predict_with_generate': True,
'generation_num_beams': 1,
'no_cuda': False,
'fp16': False, 'push_to_hub': False,
'disable_tqdm': True,
'report_to': "none"
}
training_args_q = {"output_dir":'/mnt/local/disk1/klasifikace_reflexe/MT_petrovic/in_progress/qat_find_checkpoints/'+start_step,
'evaluation_strategy': 'steps', "eval_steps": eval_steps, "save_steps":save_steps,'logging_first_step': True,
'metric_for_best_model': "eval_bleu", 'greater_is_better': True, "load_best_model_at_end": True,
"save_strategy": "steps",
# 'evaluation_strategy': 'steps', "save_steps": 500, "eval_steps": 500, 'logging_first_step': True,
'learning_rate': 2e-4, 'per_device_train_batch_size': batch_size, 'warmup_steps': warmup_steps,
# 'learning_rate': 2e-5, 'per_device_train_batch_size': batch_size, 'warmup_steps':0,
'gradient_accumulation_steps': grad_acc_steps,
'per_device_eval_batch_size': valid_batch_size, 'weight_decay': 0.01,
'save_total_limit': save_total_limit,
'num_train_epochs': train_epochs, "max_steps": max_steps, 'predict_with_generate': True,
'generation_num_beams': 1,
'bn_freeze': bn_freeze, 'qpar_freeze': qpar_freeze,
'no_cuda': False,
'fp16': False, 'push_to_hub': False,
'disable_tqdm': True,
# 'resume_from_checkpoint':'',
'report_to': "none"
}
# modelQAT = ModelWrapper(pretrained_model_name_or_path="Helsinki-NLP/opus-mt-en-sk")
modelQAT = ModelWrapper(pretrained_model_name_or_path=dir)
# _test_translation(modelQAT)
# 1. Evaluate on validation set, to know model performance before finetuning
# 1.1 Eval EuroParl
train = EuroParl(test_size=test_size, valid_size=valid_size, seed=42)
pipePreFTeval = Pipeline(Scenario.FT_EVAL, modelQAT, train, training_args, metric_key_prefix="trainEuParlFP_EuParl_eval")
pipePreFTeval.trainer.add_callback(CometOneExperimentCallback())
pipePreFTeval.run()
comet_ml.get_global_experiment().set_name(experiment_name)
# 1.2 Eval OpenSubs
validation = OpenSubtitles(test_size=test_size, valid_size=valid_size, seed=42)
pipePreFTeval = Pipeline(Scenario.FT_EVAL, modelQAT, validation, training_args, metric_key_prefix="trainEuParlFP_OpenSubs_eval")
pipePreFTeval.trainer.add_callback(CometContinueExperimentCallback())
pipePreFTeval.run()
# 2. Fine-Tune for EuroParl - metric for this pipeline is eval_bleu
# 2.1 validate on EuroParl
modelQAT.quantizeQATStart(test_tr=True)
train = EuroParl(test_size=test_size, valid_size=valid_size, seed=42)
pipe = Pipeline(Scenario.QUANT_AWARE_TUNE, model=modelQAT, dataset=train,
training_args=training_args_q)
# 2.1 validate on OpenSubs
validation = OpenSubtitles(test_size=test_size, valid_size=valid_size, seed=42)
validation.preprocess(tokenizer=modelQAT.tokenizer)
callback1 = RobustCallback(pipe.trainer, validation['val'], "trainEuParlFP_fineTuneEuParlQAT_OpenSubs_eval")
callback2 = TestRobustCallback(pipe.trainer, train['test'], "trainEuParlFP_fineTuneEuParlQAT_EuParl_test")
callback3 = TestRobustCallback(pipe.trainer, validation['test'], "trainEuParlFP_fineTuneEuParlQAT_OpenSubs_test")
callback5 = CometContinueExperimentCallback()
pipe.trainer.add_callback(callback1)
pipe.trainer.add_callback(callback2)
pipe.trainer.add_callback(callback3)
pipe.trainer.add_callback(callback5)
print("FineTuning QAT on EuroParl (model previously pre-trained FP) :")
pipe.run()
pipe.trainer.save_model('/mnt/local/disk1/klasifikace_reflexe/MT_petrovic/in_progress/FP_marian_6_QAT_fine-tuned/'+start_step+"_FP_marian_6_QAT_find")
_test_translation(modelQAT)
# modelQAT.model.save_pretrained('./saved_models/trainedQAT/'+start_step+'_marianmt_v2_en-sk_openSubs-euparl_model',
# push_to_hub=False)
# modelQAT.tokenizer.save_pretrained('./saved_models/trainedQAT/'+start_step+'_marianmt_v2_en-sk_openSubs-euparl_tokenizer',
# push_to_hub=False)
# train = OpenSubtitles(test_size=test_size, valid_size=valid_size, seed=42)
# validation = EuroParl(test_size=test_size, valid_size=valid_size, seed=42)
# modelQAT.quantizeQATConvert()
#
# training_argsEval = {'no_cuda': True, 'fp16': False, 'per_device_eval_batch_size': eval_batch_size_cpu,
# 'predict_with_generate': True,
# "report_to": "none"
# }
# pipeEval = Pipeline(Scenario.EVAL, model=modelQAT, dataset=train,
# training_args=training_argsEval, metric_key_prefix="trainEuParlQAT_EuParl_test_cpu")
# pipeEval.trainer.add_callback(CometContinueExperimentCallback())
# print("BLEU in-domain (EuParl) on QAT cpu")
# pipeEval.run()
#
# pipeEval = Pipeline(Scenario.EVAL, model=modelQAT, dataset=validation,
# training_args=training_argsEval, metric_key_prefix="trainEuParlQAT_OpenSubs_test_cpu")
# pipeEval.trainer.add_callback(CometContinueExperimentCallback())
# print("BLEU out-of-domain (OpenSubs) on QAT cpu")
# pipeEval.run()
comet_ml.get_global_experiment().end()