-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpredict.py
69 lines (50 loc) · 2.25 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import argparse
import os
from transformers import *
import torch
from model import LangModelWithDense
from loader import load_data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input_path")
parser.add_argument("ensemble_path")
parser.add_argument("--output_path", default="predictions.txt")
args = parser.parse_args()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: {}\n".format(device))
models = []
for model_path in os.listdir(args.ensemble_path):
print("Loading model from: {}".format(os.path.join(args.ensemble_path, model_path)))
model_name_path = os.path.join(args.ensemble_path, model_path)
tokenizer = AutoTokenizer.from_pretrained(model_name_path if "scibert" not in model_name_path else "bert-base-uncased")
model = torch.load(os.path.join(args.ensemble_path, model_path, "model.pt"), map_location=device)
model.fine_tune = False
model.eval()
models.append((model, tokenizer, model_path))
predictions = {}
for model, tokenizer, model_path in models:
print("Predicting labels for model: {}...".format(model_path))
test_loader = load_data(args.input_path, tokenizer, device)
y_pred = []
for test_x, mask in test_loader:
outputs = torch.sigmoid(model.forward(test_x, mask).reshape(-1))
for output in outputs:
pred = 0 if output < 0.5 else 1
y_pred.append(pred)
predictions[model_path] = y_pred
counter = len(list(predictions.values())[0])
model_paths = list(predictions.keys())
with open(args.output_path, "w") as file:
for model_path in model_paths:
file.write("{:<15} ".format(model_path))
file.write("{:<15} \n".format("ensemble"))
for i in range(counter):
label_sum = 0
for model_path in model_paths:
file.write("{:<15} ".format(predictions[model_path][i]))
label_sum += predictions[model_path][i]
if label_sum > len(models) / 2:
file.write("{:<15} ".format(1))
else:
file.write("{:<15} ".format(0))
file.write("\n")