-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_data_into_split_sentences.py
27 lines (20 loc) · 1.21 KB
/
convert_data_into_split_sentences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
import sys
dataset = sys.argv[1]
eval_task = sys.argv[2]
phr_sen = "" if sys.argv[3] == 'sentence' else '_phrase'
run = sys.argv[4]
df = pd.read_csv(f'data/{dataset}/test_{eval_task}{phr_sen}.csv')
test_sents = df["input_text"].values.tolist()
user_inp = input("pretrained model dataset: Enter '0' for MinWiki, '1' for DeSSE, or any number for both\n")
model_dataset = "MinWiki" if user_inp == "0" else "DeSSE" if user_inp == 1 else ["MinWiki", "DeSSE"]
model = T5ForConditionalGeneration.from_pretrained(f"../ABSA_Datasets/Split_Sentences/model_files/{model_dataset}")
model = model.to(device)
tokenizer = T5Tokenizer.from_pretrained(f"../ABSA_Datasets/Split_Sentences/model_files/{model_dataset}")
test_df = pd.DataFrame({'Text': test_sents, 'Split_Sentences': [''] * len(test_sents)})
model_params["TRAIN_SIZE"] = 0.001
_, test_loader = get_data_loaders(test_df, "Text", "Split_Sentences", tokenizer, model_params)
predictions, actuals, input_texts = test(0, tokenizer, model, device, test_loader)
out = "\n".join([f"{inp}\n{pr}\n" for inp, pr in zip(input_texts, predictions)])
with open(f"Split_Sentences/outputs/T5_pretrained_{model_dataset}_test_MAMS_{test_dataset}_output.txt", "w+") as f:
f.write(out)