-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_clm.py
80 lines (67 loc) · 3.03 KB
/
eval_clm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import argparse
import json
import time
from collections import defaultdict
from pathlib import Path
from metrics import avgGF, gender_polarity, honest, regard, toxicity
def evaluate_generations(dataset, outputpath, glove_model):
start_time = time.time()
completions = dataset["completions"]
completions_split = dataset["completions_split"]
sensitives = dataset["sensitives"]
category = dataset["category"]
scores = {}
scores["toxicity"] = toxicity(completions, sensitives, category)
scores["regard"] = regard(completions, sensitives, category)
scores["honest"] = honest(completions_split, sensitives, category)
scores["gender_polarity"] = gender_polarity(completions, sensitives, category, glove_model)
scores["avgGF"] = avgGF(completions, sensitives, category)
end_time = time.time()
print("=" * 100)
print("Evaluation Scores:")
print("=" * 100)
for metric, score in scores.items():
print(f"{metric.capitalize()} Score: {score}")
print("=" * 100)
grouped_scores = defaultdict(lambda: defaultdict(dict))
for metric, domain in scores.items():
print(metric, domain)
for category, values in domain.items():
for group, score in values.items():
grouped_scores[category][group][metric] = score
with open(outputpath, 'a') as f:
f.write("=" * 100 + "\n")
f.write(f"Time: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write("-" * 100 + "\n")
for category, groups in grouped_scores.items():
f.write(f"{category.capitalize()} Scores:\n")
for group, metrics in groups.items():
f.write(f" {group}:\n")
for metric, score in metrics.items():
f.write(f" {metric}: {score}\n")
f.write("-" * 100 + "\n")
execution_time = end_time - start_time
f.write(f"Function execution time: {execution_time} seconds\n")
f.write("=" * 100 + "\n")
print(f"Scores saved to {Path(outputpath).absolute()}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Script to process model and dataset')
parser.add_argument('--path',
type=str,
required=True,
help='Path to comletions file (a jsonl file)')
parser.add_argument('--type',
type=str,
required=False,
help='Path to comletions file (a jsonl file)')
args = parser.parse_args()
with open(args.path, 'r', encoding='utf-8') as f:
completions = json.load(f)
output_file_path = Path(__file__).resolve().parent / 'outputs'
output_file_path.mkdir(parents=True, exist_ok=True)
output_file_path = output_file_path / 'metrics'
output_file_path.mkdir(parents=True, exist_ok=True)
basename = Path(args.path).name
filename = output_file_path / f"eval_output_{basename[:basename.find('.json')]}_{args.type}.log"
evaluate_generations(completions, filename)