Skip to content

Commit

Permalink
fix CL methodologies:
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Mar 10, 2022
1 parent 347bc45 commit e00420f
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 70 deletions.
46 changes: 23 additions & 23 deletions Datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, tokenizer, type_path, input_length, output_length, args, leng
# dataset for continual training
if self.type_path=='train':
if self.args.mode == 'finetune':
self.dataset = pd.read_csv('data/TWiki_Probes/evaluation/lighttuning/'+self.args.dataset+'.csv')
self.dataset = pd.read_csv('data/TWiki_Probes/lighttuning/'+self.args.dataset+'.csv')
elif self.args.dataset=='wikipedia_0809':
self.dataset = pd.read_csv('data/TWiki_Diffsets/wikipedia_0809_subset.csv')
elif self.args.dataset=='wikipedia_0809_gpt2':
Expand All @@ -36,56 +36,56 @@ def __init__(self, tokenizer, type_path, input_length, output_length, args, leng
else:
raise Exception('The given dataset does not exist in data directory.')
elif type_path =='pretrain':
total_line = 8021155
total_line = 4000000
skip = sorted(random.sample(range(1,total_line+1),total_line-length))
self.dataset = pd.read_csv('data/wikipedia_pretrain_full.csv', usecols=['text'], skiprows=skip)
self.dataset = pd.read_csv('data/Wikipedia_Full/wikipedia_08_gpt2/part1.csv', usecols=['text'], skiprows=skip)
else:
# evaluation dataset
if self.args.check_validation_only:
if self.args.mode == 'evaluate_ppl_corpus':
self.dataset = pd.read_csv('data/perplexity/'+self.args.dataset+'.csv')
else:
if self.args.dataset == 'IL':
self.dataset = pd.read_csv('data/evaluation/IL.csv')
self.dataset = pd.read_csv('data/IL.csv')
else:
self.dataset = pd.read_csv('data/evaluation/aligned/'+ self.args.dataset + '.csv')
self.dataset = pd.read_csv('data/aligned/'+ self.args.dataset + '.csv')
# validation dataset
elif self.args.dataset=='IL':
self.dataset = pd.read_csv('data/TWiki_Probes/evaluation/IL.csv')
self.dataset = pd.read_csv('data/TWiki_Probes/IL.csv')
elif self.args.dataset=='data/wikipedia_09' or self.args.dataset=='wikipedia_0809' or self.args.dataset=='data/wikipedia_09_gpt2' or self.args.dataset=='wikipedia_0809_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/0801-0901_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/0801-0901_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/0801-0901_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/evaluation/IL.csv')
df1 = pd.read_csv('data/TWiki_Probes/aligned/0801-0901_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/0801-0901_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/0801-0901_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
elif self.args.dataset=='data/wikipedia_10_gpt2' or self.args.dataset=='data/wikipedia_10' or self.args.dataset=='wikipedia_0910' or self.args.dataset=='wikipedia_0910_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/0901-1001_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/0901-1001_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/0901-1001_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/evaluation/IL.csv')
df1 = pd.read_csv('data/TWiki_Probes/aligned/0901-1001_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/0901-1001_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/0901-1001_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
elif self.args.dataset=='data/wikipedia_11_gpt2' or self.args.dataset=='data/wikipedia_11' or self.args.dataset=='wikipedia_1011' or self.args.dataset=='wikipedia_1011_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/1001-1101_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/1001-1101_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/1001-1101_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/evaluation/IL.csv')
df1 = pd.read_csv('data/TWiki_Probes/aligned/1001-1101_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/1001-1101_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/1001-1101_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
elif self.args.dataset=='data/wikipedia_12_gpt2' or self.args.dataset=='data/wikipedia_12' or self.args.dataset=='wikipedia_1011' or self.args.dataset=='wikipedia_1011_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/1101-1201_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/1101-1201_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/evaluation/aligned/1101-1201_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/evaluation/IL.csv')
df1 = pd.read_csv('data/TWiki_Probes/aligned/1101-1201_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/1101-1201_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/1101-1201_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
else:
self.dataset = pd.read_csv('data/TWiki_Probes/evaluation/IL.csv')
self.dataset = pd.read_csv('data/TWiki_Probes/IL.csv')

print(f'Length of dataset retrieving is.. {len(self.dataset)}')
self.input_length = input_length
Expand Down
34 changes: 0 additions & 34 deletions configs/baseline_t5_s.json

This file was deleted.

18 changes: 8 additions & 10 deletions configs/baseline_t5.json → configs/training/diff.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
{
"input_length" : 512,
"output_length" : 400,
"output_length" : 512,
"num_train_epochs" : 1,
"num_files" : 2,
"output_dir" : "outputs/T5_large_08_lr1e-5_full",
"dataset" : "data/wikipedia_08",
"output_dir" : "outputs/GPT2_large_12_1e-4_diff",
"dataset" : "wikipedia_1112_gpt2",
"dataset_version" : "full",
"len_data" : 4000000,
"train_batch_size" : 6,
"learning_rate" : 1e-5,
"model" : "google/t5-large-ssm",
"train_batch_size" : 8,
"learning_rate" : 1e-4,
"model" : "gpt2-large",
"method": "baseline",
"gradient_accumulation_steps" : 1,
"ngpu" : 8,
Expand All @@ -20,8 +18,8 @@
"CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7",
"wandb_log": true,
"wandb_project": "ever_changing",
"wandb_run_name" : "T5_large_08_lr1e-5_full",
"mode" : "pretrain_brute",
"wandb_run_name" : "GPT2_large_12_1e-4_diffcl_v3",
"mode" : "pretrain",
"use_lr_scheduling" : true,
"check_validation" : false,
"checkpoint_path" : ""
Expand Down
File renamed without changes.
26 changes: 26 additions & 0 deletions configs/training/diff_lora.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"input_length" : 512,
"output_length" : 512,
"num_train_epochs" : 1,
"output_dir" : "outputs/GPT2_large_10_1e-4_diff_lora",
"dataset" : "wikipedia_0910_gpt2",
"dataset_version" : "full",
"train_batch_size" : 8,
"learning_rate" : 1e-4,
"model" : "gpt2-large",
"method": "lora",
"gradient_accumulation_steps" : 1,
"ngpu" : 8,
"num_workers" : 40,
"resume_from_checkpoint" : null,
"accelerator" : "deepspeed_stage_2",
"fp16" : true,
"CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7",
"wandb_log": true,
"wandb_project": "ever_changing",
"wandb_run_name" : "GPT2_large_10_1e-4_diff_lora",
"mode" : "pretrain",
"use_lr_scheduling" : true,
"check_validation" : false,
"checkpoint_path" : "outputs/GPT2_large_09_1e-4_diff_lora_/epoch=0"
}
26 changes: 26 additions & 0 deletions configs/training/diff_mixreview.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"input_length" : 512,
"output_length" : 512,
"num_train_epochs" : 1,
"output_dir" : "outputs/GPT2_large_09_1e-4_diff_lora",
"dataset" : "wikipedia_0809_gpt2",
"dataset_version" : "full",
"train_batch_size" : 8,
"learning_rate" : 1e-4,
"model" : "gpt2-large",
"method": "mixreview",
"gradient_accumulation_steps" : 1,
"ngpu" : 8,
"num_workers" : 40,
"resume_from_checkpoint" : null,
"accelerator" : "deepspeed_stage_2",
"fp16" : true,
"CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7",
"wandb_log": true,
"wandb_project": "ever_changing",
"wandb_run_name" : "GPT2_large_09_1e-4_diff_lora",
"mode" : "pretrain",
"use_lr_scheduling" : true,
"check_validation" : false,
"checkpoint_path" : "outputs/GPT2_large_08_full.ckpt"
}
26 changes: 26 additions & 0 deletions configs/training/diff_recadam.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"input_length" : 512,
"output_length" : 512,
"num_train_epochs" : 1,
"output_dir" : "outputs/GPT2_large_09_1e-4_diff_lora",
"dataset" : "wikipedia_0809_gpt2",
"dataset_version" : "full",
"train_batch_size" : 8,
"learning_rate" : 1e-4,
"model" : "gpt2-large",
"method": "recadam",
"gradient_accumulation_steps" : 1,
"ngpu" : 8,
"num_workers" : 40,
"resume_from_checkpoint" : null,
"accelerator" : "deepspeed_stage_2",
"fp16" : true,
"CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7",
"wandb_log": true,
"wandb_project": "ever_changing",
"wandb_run_name" : "GPT2_large_09_1e-4_diff_lora",
"mode" : "pretrain",
"use_lr_scheduling" : true,
"check_validation" : false,
"checkpoint_path" : "outputs/GPT2_large_08_full.ckpt"
}
File renamed without changes.
4 changes: 2 additions & 2 deletions convert_to_fp32.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# lightning deepspeed has saved a directory instead of a file

##### Please modify this part #####
checkpoint_dir = 'outputs/GPT2_large_11_1e-4_diffcl_v3'
output_path = 'outputs/GPT2_large_11_1e-4_diffcl_v3_/'
checkpoint_dir = 'outputs/GPT2_large_09_1e-4_diff_lora'
output_path = 'outputs/GPT2_large_09_1e-4_diff_lora_/'

if not os.path.isdir(output_path):
os.mkdir(output_path)
Expand Down
2 changes: 1 addition & 1 deletion models/GPT2_Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, hparams):

self.model = GPT2LMHeadModel.from_pretrained(hparams.model_name_or_path)
self.save_hyperparameters(hparams)
if hparams.method=='baseline':
if hparams.method=='baseline' or hparams.method=='mixreview':
self.model = GPT2LMHeadModel.from_pretrained(hparams.model_name_or_path)
elif hparams.method=='kadapter':
self.model = GPT2_Kadapter.from_pretrained(hparams.model_name_or_path)
Expand Down
2 changes: 2 additions & 0 deletions models/GPT2_Model_LoRA.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ def __init__(self, config, is_cross_attention=False, layer_idx=None):
self.num_heads = config.num_attention_heads
self.head_dim = self.embed_dim // self.num_heads
self.split_size = self.embed_dim
self.lora_attn_dim = 4
self.lora_attn_alpha = 16
if self.head_dim * self.num_heads != self.embed_dim:
raise ValueError(
f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
Expand Down

0 comments on commit e00420f

Please sign in to comment.