-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathregsize.py
157 lines (124 loc) · 5.08 KB
/
regsize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
import argparse
import csv
import glob
import math
import os
import sys
from collections import namedtuple
from operator import attrgetter
from Registry import Registry
class RegSize:
"""
Main functionality of script. Parses the hive, finding the largest values based on the size of their data.
"""
def __init__(self, hive_file, max_sizes, do_ent, as_csv):
self.KeyValue = namedtuple('KeyValue', 'key value size')
self.hive_file = hive_file
self.max_sizes = max_sizes
self.do_ent = do_ent
self.as_csv = as_csv
self.reg = Registry.Registry(self.hive_file)
self.tops = []
def check_key(self, key):
min_size = 0
try:
for value in key.values():
if self.is_tops(value):
raw_len = len(value.raw_data())
if len(self.tops) > 0:
min_size = min(self.tops, key=attrgetter('size')).size
if raw_len > min_size:
self.drop_smallest(min_size)
self.tops.append(self.KeyValue(key.path(), value.name(), raw_len))
except Registry.RegistryParse.ParseException as parseEx:
print "ParseException: {}".format(parseEx)
print key.path()
try:
for subkey in key.subkeys():
self.check_key(subkey)
except Registry.RegistryParse.ParseException as parseEx:
print "ParseException: {}".format(parseEx)
print key.path()
def analyse(self):
if not self.as_csv:
print('[{}]'.format(self.hive_file))
self.check_key(self.reg.root())
if self.as_csv:
self.to_csv()
else:
self.to_text()
def is_tops(self, value):
if len(self.tops) < self.max_sizes:
return True
my_size = len(value.raw_data())
for k, v, s in self.tops:
if my_size >= s:
return True
def drop_smallest(self, min_size):
if len(self.tops) < self.max_sizes:
return
self.tops = [kv for kv in self.tops if kv.size != min_size]
def to_text(self):
if self.do_ent:
for path, value, size in sorted(self.tops, key=attrgetter('size'), reverse=True):
path = '\\'.join(path.split('\\')[1:])
key = self.reg.open(path)
print('{:<9} {:.5f} {}\\{}'.format(size, calc_shannon(key.value(value).raw_data()), path, value))
else:
for path, value, size in sorted(self.tops, key=attrgetter('size'), reverse=True):
path = '\\'.join(path.split('\\')[1:])
print('{:<9} {}\\{}'.format(size, path, value))
def to_csv(self):
csv_writer = csv.writer(sys.stdout, quotechar='"')
if self.do_ent:
csv_writer.writerow(['hivefile', 'size', 'entropy', 'key'])
for path, value, size in sorted(self.tops, key=attrgetter('size'), reverse=True):
path = '\\'.join(path.split('\\')[1:])
key = self.reg.open(path)
csv_writer.writerow([self.hive_file, size, calc_shannon(key.value(value).raw_data()), path+'\\'+value])
else:
csv_writer.writerow(['hivefile', 'size', 'key'])
for path, value, size in sorted(self.tops, key=attrgetter('size'), reverse=True):
path = '\\'.join(path.split('\\')[1:])
csv_writer.writerow([self.hive_file, size, path+'\\'+value])
def calc_shannon(data):
"""
Calculates the Shannon entropy of data. The closer to 8, the higher the entropy.
:param data: Calculate the Shannon entropy of this data
:return: A float between 0 and 8
"""
byte_array = map(ord, data)
data_size = len(byte_array)
# calculate the frequency of each byte value
byte_count = [0 for b in xrange(256)]
for b in byte_array:
byte_count[b] += 1
byte_freq = []
for c in byte_count:
byte_freq.append(float(c) / data_size)
# Shannon entropy
ent = 0.0
for freq in byte_freq:
if freq > 0:
ent += freq * math.log(freq, 2)
return ent * -1
if __name__ == '__main__':
argp = argparse.ArgumentParser()
argp.add_argument('target', nargs='+', help='file to analyse. supports globbing: folder{0}*'.format(os.sep))
argp.add_argument('--max', '-m', help='report the top MAX sizes', type=int, default=20)
argp.add_argument('--no-ent', '-E', help='don\'t calculate the Shannon entropy', action='store_true')
argp.add_argument('--csv', '-c', help='output in CSV format', action='store_true')
args = argp.parse_args()
targets = []
for t in args.target:
if os.path.isfile(t):
targets.append(t)
else: # try and glob
[targets.append(tmp) for tmp in glob.glob(t) if os.path.isfile(tmp)]
if len(targets) < 1:
print('no valid files found. nothing to do.')
else:
for t in targets:
analyser = RegSize(t, args.max, False if args.no_ent else True, args.csv)
analyser.analyse()