Skip to content

Commit

Permalink
add ckl methods + modify files
Browse files Browse the repository at this point in the history
  • Loading branch information
seonghyeonye committed Apr 24, 2022
1 parent b63241f commit 3f59221
Show file tree
Hide file tree
Showing 52 changed files with 780 additions and 1,006 deletions.
24 changes: 12 additions & 12 deletions Datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,33 +59,33 @@ def __init__(self, tokenizer, type_path, input_length, output_length, args, leng
elif self.args.dataset=='IL':
self.dataset = pd.read_csv('data/TWiki_Probes/IL.csv')
elif self.args.dataset=='data/wikipedia_09' or self.args.dataset=='wikipedia_0809' or self.args.dataset=='data/wikipedia_09_gpt2' or self.args.dataset=='wikipedia_0809_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/aligned/0801-0901_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/0801-0901_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/0801-0901_new.csv')
df1 = pd.read_csv('data/twiki_probes/0801-0901_unchanged.csv')
df2 = pd.read_csv('data/twiki_probes/0801-0901_updated.csv')
df3 = pd.read_csv('data/twiki_probes/0801-0901_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
elif self.args.dataset=='data/wikipedia_10_gpt2' or self.args.dataset=='data/wikipedia_10' or self.args.dataset=='wikipedia_0910' or self.args.dataset=='wikipedia_0910_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/aligned/0901-1001_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/0901-1001_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/0901-1001_new.csv')
df1 = pd.read_csv('data/twiki_probes/0901-1001_unchanged.csv')
df2 = pd.read_csv('data/twiki_probes/0901-1001_updated.csv')
df3 = pd.read_csv('data/twiki_probes/0901-1001_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
elif self.args.dataset=='data/wikipedia_11_gpt2' or self.args.dataset=='data/wikipedia_11' or self.args.dataset=='wikipedia_1011' or self.args.dataset=='wikipedia_1011_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/aligned/1001-1101_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/1001-1101_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/1001-1101_new.csv')
df1 = pd.read_csv('data/twiki_probes/1001-1101_unchanged.csv')
df2 = pd.read_csv('data/twiki_probes/1001-1101_updated.csv')
df3 = pd.read_csv('data/twiki_probes/1001-1101_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
self.dataset = pd.concat([df1, df4])
elif self.args.dataset=='data/wikipedia_12_gpt2' or self.args.dataset=='data/wikipedia_12' or self.args.dataset=='wikipedia_1011' or self.args.dataset=='wikipedia_1011_gpt2':
df1 = pd.read_csv('data/TWiki_Probes/aligned/1101-1201_unchanged.csv')
df2 = pd.read_csv('data/TWiki_Probes/aligned/1101-1201_updated.csv')
df3 = pd.read_csv('data/TWiki_Probes/aligned/1101-1201_new.csv')
df1 = pd.read_csv('data/twiki_probes/1101-1201_unchanged.csv')
df2 = pd.read_csv('data/twiki_probes/1101-1201_updated.csv')
df3 = pd.read_csv('data/twiki_probes/1101-1201_new.csv')
df4 = pd.read_csv('data/TWiki_Probes/IL.csv')
df1 = pd.concat([df1, df2])
df1 = pd.concat([df1, df3])
Expand Down
113 changes: 0 additions & 113 deletions Datasets_.py

This file was deleted.

16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0801-0901_changed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "0801-0901_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/09/GPT2_lora",
"output_log": "log/GPT2/lora/0801-0901_changed_ppl.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0801-0901_ppl_full.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 512,
"output_length" : 512,
"dataset" : "wikipedia_09_gpt2",
"dataset_version" : "full",
"train_batch_size" : 4,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl_corpus",
"checkpoint_path" : "outputs/09/GPT2_lora",
"output_log": "log/GPT2/lora/0801-0901_ppl_full.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0801-0901_ppl_subset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 512,
"output_length" : 512,
"dataset" : "wikipedia_0809_gpt2",
"dataset_version" : "full",
"train_batch_size" : 4,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl_corpus",
"checkpoint_path" : "outputs/09/GPT2_lora",
"output_log": "log/GPT2/lora/0801-0901_ppl_subset.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0801-0901_unchanged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "0801-0901_unchanged",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/09/GPT2_lora",
"output_log": "log/GPT2/lora/0801-0901_unchanged_ppl.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0901-1001_changed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "0901-1001_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/10/GPT2_lora",
"output_log": "log/GPT2/lora/0901-1001_changed_ppl.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0901-1001_ppl_full.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 512,
"output_length" : 512,
"dataset" : "wikipedia_10_gpt2",
"dataset_version" : "full",
"train_batch_size" : 4,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl_corpus",
"checkpoint_path" : "outputs/10/GPT2_lora",
"output_log": "log/GPT2/lora/0901-1001_ppl_full.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0901-1001_ppl_subset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 512,
"output_length" : 512,
"dataset" : "wikipedia_0910_gpt2",
"dataset_version" : "full",
"train_batch_size" : 4,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl_corpus",
"checkpoint_path" : "outputs/10/GPT2_lora",
"output_log": "log/GPT2/lora/0901-1001_ppl_subset.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/0901-1001_unchanged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "0901-1001_unchanged",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/10/GPT2_lora",
"output_log": "log/GPT2/lora/0901-1001_unchanged_ppl.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/1001-1101_changed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "1001-1101_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/11/GPT2_lora",
"output_log": "log/GPT2/lora/1001-1101_changed_ppl.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/1001-1101_ppl_full.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 512,
"output_length" : 512,
"dataset" : "wikipedia_11_gpt2",
"dataset_version" : "full",
"train_batch_size" : 4,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl_corpus",
"checkpoint_path" : "outputs/11/GPT2_lora",
"output_log": "log/GPT2/lora/1001-1101_ppl_full.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/1001-1101_ppl_subset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 512,
"output_length" : 512,
"dataset" : "wikipedia_1011_gpt2",
"dataset_version" : "full",
"train_batch_size" : 4,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl_corpus",
"checkpoint_path" : "outputs/11/GPT2_lora",
"output_log": "log/GPT2/lora/1001-1101_ppl_subset.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/1001-1101_unchanged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "1001-1101_unchanged",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/11/GPT2_lora",
"output_log": "log/GPT2/lora/1001-1101_unchanged_ppl.csv"
}
16 changes: 16 additions & 0 deletions configs/evaluation/GPT2/lora/1101-1201_changed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "1101-1201_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
"method": "lora",
"ngpu" : 1,
"num_workers" : 40,
"CUDA_VISIBLE_DEVICES" : "2",
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/12/GPT2_lora",
"output_log": "log/GPT2/lora/1101-1201_changed_ppl.csv"
}
Loading

0 comments on commit 3f59221

Please sign in to comment.