Skip to content

Commit

Permalink
dataset & config update
Browse files Browse the repository at this point in the history
  • Loading branch information
seonghyeonye committed Mar 14, 2022
1 parent 965b66a commit ea5dc4e
Show file tree
Hide file tree
Showing 74 changed files with 92 additions and 1,064 deletions.
38 changes: 14 additions & 24 deletions Datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ def __init__(self, tokenizer, type_path, input_length, output_length, args, leng
# dataset for continual training
if self.type_path=='train':
if self.args.mode == 'finetune':
self.dataset = pd.read_csv('data/TWiki_Probes/lighttuning/'+self.args.dataset+'.csv')
if 'unchanged' in self.args.dataset:
self.dataset = pd.read_csv('data/evaluation/lighttuning/lighttuning_unchanged_500.csv')
else:
self.dataset = pd.read_csv('data/evaluation/lighttuning/lighttuning_changed_500.csv')
# self.dataset = pd.read_csv('data/TWiki_Probes/lighttuning/'+self.args.dataset+'.csv')
elif self.args.dataset=='wikipedia_0809':
self.dataset = pd.read_csv('data/TWiki_Diffsets/wikipedia_0809_subset.csv')
elif self.args.dataset=='wikipedia_0809_gpt2':
Expand Down Expand Up @@ -48,8 +52,10 @@ def __init__(self, tokenizer, type_path, input_length, output_length, args, leng
if self.args.dataset == 'IL':
self.dataset = pd.read_csv('data/IL.csv')
else:
self.dataset = pd.read_csv('data/aligned/'+ self.args.dataset + '.csv')
self.dataset = pd.read_csv('data/evaluation/final/'+ self.args.dataset + '.csv')
# validation dataset
elif self.args.mode == 'finetune':
self.dataset = pd.read_csv('data/evaluation/final/'+ self.args.dataset + '.csv')
elif self.args.dataset=='IL':
self.dataset = pd.read_csv('data/TWiki_Probes/IL.csv')
elif self.args.dataset=='data/wikipedia_09' or self.args.dataset=='wikipedia_0809' or self.args.dataset=='data/wikipedia_09_gpt2' or self.args.dataset=='wikipedia_0809_gpt2':
Expand Down Expand Up @@ -98,7 +104,7 @@ def convert_to_features(self, example_batch, index=None):
# continual pretraining
input_nonprompt = None
label_ = None
if self.type_path=='validation' and ('gpt2' in self.args.model_name_or_path):
if self.type_path=='validation':
if self.args.mode == 'evaluate_ppl_corpus':
input_ = example_batch['text']
target_ = example_batch['text']
Expand All @@ -113,20 +119,15 @@ def convert_to_features(self, example_batch, index=None):
elif self.args.mode == 'evaluate':
input_ = s + ' ' + r
target_ = o
elif self.args.mode == 'finetune':
label_ = s + ' ' + r + ' ' + o
input_ = s + ' ' + r
target_ = o
else:
target_ = s + ' ' + r + ' ' + o
input_ = s + ' ' + r + ' ' + o
input_nonprompt = ' ' + o
elif self.type_path=='validation' and ('t5' in self.args.model_name_or_path):
if self.args.mode == 'evaluate_ppl_corpus':
input_ = example_batch['input']
target_ = example_batch['output']
else:
s = example_batch['subject']
r = example_batch['relation']
input_ = s + ' ' + r + ' <extra_id_0> .'
target_ = example_batch['objective']
elif 'gpt2' in self.args.model_name_or_path:
else:
if self.args.mode == 'finetune':
s = example_batch['subject']
r = example_batch['relation']
Expand All @@ -137,17 +138,6 @@ def convert_to_features(self, example_batch, index=None):
else:
input_ = example_batch['text']
target_ = example_batch['text']
elif 't5' in self.args.model_name_or_path:
if self.args.mode == 'finetune':
s = example_batch['subject']
r = example_batch['relation']
input_ = s + ' ' + r + ' <extra_id_0> .'
target_ = example_batch['objective']
else:
input_ = example_batch['input']
target_ = example_batch['output']
else:
raise Exception('Model should either T5 or GPT2.')
source = self.tokenizer.batch_encode_plus([str(input_)], max_length=self.input_length,
padding='max_length', truncation=True, return_tensors="pt")
targets = self.tokenizer.batch_encode_plus([str(target_)], max_length=self.output_length,
Expand Down
75 changes: 0 additions & 75 deletions analyze_predictions.py

This file was deleted.

16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/0801-0901_IL.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "0801-0901_new",
"dataset" : "0801-0901_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
Expand Down
16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/0801-0901_updated.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "IL",
"dataset" : "0901-1001_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
Expand All @@ -12,5 +12,5 @@
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/10/GPT2_full",
"output_log": "log/GPT2/brute_force/0901-1001_IL_ppl.csv"
"output_log": "log/GPT2/brute_force/0901-1001_changed_ppl.csv"
}
16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/0901-1001_new.json

This file was deleted.

16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/0901-1001_updated.json

This file was deleted.

16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/1001-1101_IL.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "1001-1101_updated",
"dataset" : "1001-1101_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
Expand All @@ -12,5 +12,5 @@
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/11/GPT2_full",
"output_log": "log/GPT2/brute_force/1001-1101_updated_ppl.csv"
"output_log": "log/GPT2/brute_force/1001-1101_changed_ppl.csv"
}
16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/1001-1101_new.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "1101-1201_updated",
"dataset" : "1101-1201_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
Expand All @@ -12,5 +12,5 @@
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/12/GPT2_full",
"output_log": "log/GPT2/brute_force/1101-1201_updated_ppl.csv"
"output_log": "log/GPT2/brute_force/1101-1201_changed_ppl.csv"
}
16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/brute_force/1101-1201_new.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"input_length" : 25,
"output_length" : 25,
"dataset" : "IL",
"dataset" : "0801-0901_changed",
"dataset_version" : "full",
"train_batch_size" : 32,
"model" : "gpt2-large",
Expand All @@ -12,5 +12,5 @@
"check_validation" : true,
"mode" : "evaluate_ppl",
"checkpoint_path" : "outputs/09/GPT2_diffcl",
"output_log": "log/GPT2/ckl/0801-0901_IL_ppl.csv"
"output_log": "log/GPT2/ckl/0801-0901_changed_ppl.csv"
}
16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/ckl/0801-0901_new.json

This file was deleted.

16 changes: 0 additions & 16 deletions configs/evaluation/GPT2/ckl/0801-0901_updated.json

This file was deleted.

Loading

0 comments on commit ea5dc4e

Please sign in to comment.