-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsummarize_sa_stats.py
executable file
·141 lines (123 loc) · 4.61 KB
/
summarize_sa_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
import os
import re
import sys
from collections import defaultdict
from enum import Enum
from math import floor, log10
from typing import Sequence
def dice_coefficient(a: Sequence, b: Sequence) -> float:
if not a or not b:
return 0.0
# Quick case for true duplicates.
if a == b:
return 1.0
# If a != b, and a or b are single chars, then they can't possibly match.
if len(a) == 1 or len(b) == 1:
return 0.0
# Use python list comprehension, preferred over list.append().
a_bigram_list = [a[i:i + 2] for i in range(len(a) - 1)]
b_bigram_list = [b[i:i + 2] for i in range(len(b) - 1)]
a_bigram_list.sort()
b_bigram_list.sort()
lena = len(a_bigram_list)
lenb = len(b_bigram_list)
matches = i = j = 0
while i < lena and j < lenb:
if a_bigram_list[i] == b_bigram_list[j]:
matches += 2
i += 1
j += 1
elif a_bigram_list[i] < b_bigram_list[j]:
i += 1
else:
j += 1
score = matches / (lena + lenb)
return score
# The different type of statistics and their corresponding pattern.
class StatType(Enum):
NUM = '#'
PER = '%'
MAX = 'maximum'
def summ_stats(path: str, verbose: bool = True) -> dict:
stat_map = defaultdict(int)
per_helper = defaultdict(int)
group = {}
if os.path.isdir(path):
for stat_file in os.listdir(path):
summ_stats_on_file(os.path.join(path, stat_file),
stat_map, per_helper, group)
elif os.path.isfile(path):
summ_stats_on_file(path, stat_map, per_helper, group)
else:
return stat_map
if verbose:
# Print the content of stat_map in a formatted way grouped by the statistic producing file.
last_space = floor(log10(max(stat_map.values()))) + 1
for key in sorted(group.keys(), key=(lambda x: group[x])):
val = stat_map[key]
if isinstance(val, float):
num_of_spaces = int(last_space - floor(log10(int(val)))) - 4
sys.stdout.write("{0:.3f}".format(val))
else:
num_of_spaces = int(last_space - floor(log10(val)))
sys.stdout.write(str(val))
print(' ' * num_of_spaces + '- ' + key)
return stat_map
def summ_stats_on_file(filename: str, stat_map: dict, per_helper: dict,
group: dict) -> None:
type_pattern = ''
for t in StatType:
type_pattern += t.value + '|'
type_pattern = type_pattern[:-1]
stat_pattern = re.compile(
r"([0-9]+(?:\.[0-9]+)?) (.+) - (The (" + type_pattern + r") .+)")
timer_pattern = re.compile(r".+\(.+\).+\(.+\).+\(.+\)(.+)\(.+\).+analyzer total time",
re.IGNORECASE)
act_nums = {}
per_to_num_map = {}
per_to_update = {}
is_in_stat_block = False
f = open(filename)
lines = f.readlines()
for line in lines:
m = timer_pattern.search(line)
if m:
val = float(m.group(1).strip())
if "TU times" in stat_map:
stat_map["TU times"].append(val)
else:
stat_map["TU times"] = [val]
m = stat_pattern.search(line)
if m:
is_in_stat_block = True
stat_type = StatType(m.group(4))
stat_name = m.group(3)
stat_val = m.group(1)
group[stat_name] = m.group(2)
if stat_type == StatType.NUM:
stat_map[stat_name] += int(stat_val)
act_nums[stat_name] = int(stat_val)
elif stat_type == StatType.MAX:
stat_map[stat_name] = max(stat_map[stat_name], int(stat_val))
elif stat_type == StatType.PER:
per_to_update[stat_name] = stat_val
# When all the other statistics has been processed (to a file) than check the % stats.
elif is_in_stat_block:
is_in_stat_block = False
for key, val in per_to_update.items():
# Find the most similar # stat.
num_data = max(act_nums.keys(), key=(
lambda x: dice_coefficient(x, key)))
per_helper[num_data] += int(act_nums[num_data] * float(val))
# Check for consistency.
assert not (
key in per_to_num_map and per_to_num_map[key] != num_data)
per_to_num_map[key] = num_data
stat_map[key] = floor(
per_helper[num_data]) / stat_map[num_data]
act_nums = {}
def main(argv):
summ_stats(argv[1])
if __name__ == "__main__":
main(sys.argv)