-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
110 lines (87 loc) · 2.91 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import sys
import yaml
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import classification_report
import torch
import torch.nn as nn
from iteration import step
from utils.infereloader import get_testloader
from utils.load_checkpoint import load_checkpoint
from transformer.model import TransformerClassifier
with open("./config.yaml") as file:
config = yaml.safe_load(file)
if not config["model"]["ckpt"]:
print("Checkpoint is needed!!!")
sys.exit(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_dir = os.path.join(
config["model"]["model_loc"],
config["dataset"]["file_name"],
config["model"]["model"].split("/")[-1],
)
_model = TransformerClassifier(
config["model"]["model"],
hidden_states=config["hyperparameters"]["hidden_layers"],
dropout=config["hyperparameters"]["dropout"],
).to(device)
print(config["model"]["ckpt"])
_model, _, _, best_weighted_f1, _ = load_checkpoint(
config["model"]["ckpt"],
config["model"]["model"],
model_dir,
device,
_model,
None,
None,
)
criterion = nn.CrossEntropyLoss()
print(f"Model with wted F1 = {best_weighted_f1} is loaded!!!")
test_loader = get_testloader(
config["dataset"]["data_dir"],
config["dataset"]["file_name"],
config["model"]["model"],
config["inference"]["mode"],
config["inference"]["batch_size"],
config["hyperparameters"]["max_len"],
)
_ids, _probs, _labels, _true = [], [], [], []
test_loss, test_acc = [], []
_model.eval()
y_preds, y_test = np.array([]), np.array([])
with torch.set_grad_enabled(False):
with tqdm(test_loader, desc="") as vepoch:
vepoch.set_postfix(loss=0.0, acc=0.0)
for batch_idx, batch in enumerate(vepoch):
details, ypred, ytrue, probabilities, _ = step(
_model, batch, criterion, device
)
ytrue = ytrue.cpu().numpy()
ypred = ypred.cpu().numpy()
for i, j, k, l in zip(
batch["id"], probabilities.cpu().numpy(), ypred, ytrue
):
_ids.append(i)
_probs.append(j)
_labels.append(k)
_true.append(l)
y_preds = np.hstack((y_preds, ypred))
y_test = np.hstack((y_test, ytrue))
test_loss.append(details["loss"].item())
test_acc.append(details["accuracy"].item())
vepoch.set_postfix(
loss=details["loss"].item(), acc=np.array(test_acc).mean()
)
print(
classification_report(
y_test,
y_preds,
target_names=["OFF", "NOT"],
)
)
print(f"{config['inference']['mode']} data processed")
df = pd.DataFrame({"ids": _ids, "probability": _probs, "pred": _labels, "true": _true})
df.to_csv(f"{model_dir}/_{config['inference']['mode']}.csv", index=False)
print(f"Results stores in {model_dir}/_{config['inference']['mode']}.csv")