-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDWARFMap.py
229 lines (193 loc) · 9.71 KB
/
DWARFMap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
import sys
sys.path.insert(1, '../')
import sveCacheSim as sim
import numpy as np
from elftools.elf.elffile import ELFFile #conda install pyelftools
from pprint import pprint as pprint
# HOW TO USE:
# An example is at the bottom of this file, in __main__.
# Basically, you just need to create a DWARFMap object by giving it
# the executable with dwarf info relating to your trace (e.g. compile with -g).
# Then, you can pass a trace into the classify function and you will get a
# dictonary back. This dict has keys that are function names and values that
# is the number of instructions from the trace corresponding to each function.
# A range represents a range of 64 bit addresses. Each one will be named for a
# function. Each can optionally hold a reference, which is the offset of a DIE
# that contains the function name associated with this range. This allows us
# to fill in the name later, if we don't know it right away (as is the case
# with inlined functions)
nunknown = 0
compress = True
class range:
def __init__(self, name:str, start:np.uint64, end:np.uint64, ref:np.int64=-1):
self.name = name
self.start = start
self.end = end
self.ref = ref
self.child = []
# Returns true if the range passed as an argument is completely contained within
# this range (inclusive) and false otherwise
def contains(self, new:range):
return new.start >= self.start and new.end <= self.end
# Returns true if the ranges have the same start and end, false otherwise
def equals(self, new:range):
return new.start == self.start and new.end == self.end
# Returns true if the address passed as an argument is within this range (inclusive)
# and false otherwise
def has(self, addr:np.uint64):
return addr >= self.start and addr <= self.end
# Use the map to figure out the names of inlined functions
def map_inlined(self, map):
global nunknown
if self.name is None:
if self.ref in map:
self.name = map[self.ref]
else:
self.name = 'unknown{0:05d}'.format(nunknown) # TODO: figure this out (try running meabo)
nunknown += 1
for c in self.child:
c.map_inlined(map)
# Add a new range. Insert it at the narrowest possible spot, i.e. if it
# is contained within another range, it must be place within it.
def insert(self, new:range):
for c in self.child:
if c.contains(new):
if not c.equals(new):
c.insert(new)
return
self.child.append(new)
# Find the lowest/narrowest range that the address is contained in
def find(self, addr:np.uint64):
name = self.name
for c in self.child:
if c.has(addr):
name = c.find(addr)
if 'unknown' in name:
name = self.name
return name
def remove_unknowns(self):
for c in self.child:
c.remove_unknowns()
self.child = [x for x in self.child if "unknown" not in x.name]
# Helper function for __str__ that lets us do indentation
def _tostring(self, level):
spaces = '' if level == 0 else '{}↳ '.format(' '*(level)*2)
res = '{}{} [0x{:x} - 0x{:x}]\n'.format(spaces, self.name, self.start, self.end)
for c in self.child:
res = res + c._tostring(level+1)
return res
def __str__(self):
return self._tostring(0)
class DWARFMap:
def __init__(self, file):
# The root node of our tree contains all possible addresses
# and is named unknown, as this is what is returned when
# we don't have DWARF info for an address
self.root = range('ALL', 0,0xffffffffffffffff)
# Stores the offsets of all DIEs that define functions, so that we can later
# discern the names of inlined functions, which do not store the function
# name at their inlined location
self.offset_map = {}
# Use pyelftools to get an object containing all DWARF info
with open(file, 'rb') as exefile:
elffile = ELFFile(exefile)
if not elffile.has_dwarf_info():
print('Error: {} has no dwarf info'.format(file))
exit()
dwarfinfo = elffile.get_dwarf_info()
# Iterate over every compute unit (roughtly every input .c/.cpp file)
for CU in dwarfinfo.iter_CUs():
# Iterate over every Debugging Information Entry and search for ones
# that represent subroutines
for DIE in CU.iter_DIEs():
#print('DIE Tag: {}'.format(DIE.tag))
#if 'DW_AT_abstract_origin' in DIE.attributes:
# print(' DIE [{}] has abstract origin: [{}]'.format(DIE.offset, DIE.attributes['DW_AT_abstract_origin'].value))
try:
if DIE.tag == 'DW_TAG_inlined_subroutine':
offset = DIE.attributes['DW_AT_abstract_origin'].value
start = DIE.attributes['DW_AT_low_pc'].value
end = DIE.attributes['DW_AT_low_pc'].value + DIE.attributes['DW_AT_high_pc'].value
# Insert a node with this range in the tree. It currently has no name (None) as it was an inlined
# function. We will get the name later when we find the DIE with the corresponding
# offset, which this DIE lists as DW_AT_abstract_origin
self.root.insert(range(None, start, end, offset))
elif DIE.tag == 'DW_TAG_subprogram':
#if 'DW_AT_name' in DIE.attributes:
# print(' DIE Name: {}'.format(DIE.attributes['DW_AT_name'].value.decode('UTF-8')))
#else:
# print(' DIE [unnamed]')
#if 'DW_AT_low_pc' in DIE.attributes:
# print(' Range present')
#else:
# if 'DW_AT_ranges' in DIE.attributes:
# print(' Range not present but DW_AT_ranges is')
# if 'DW_AT_entry_pc' in DIE.attributes:
# print(' Range not present but DW_AT_entry_pc is')
# print(' Range not present in DIE [{}]'.format(DIE.offset))
# print(DIE.attributes.keys())
#
# Go ahead and store the name and offset of this DIE, as it represents a function. We will need
# this map later when we want to get the names of inlined functions
name = DIE.attributes['DW_AT_name'].value.decode('UTF-8')
self.offset_map[DIE.offset] = name
# If the DIE has a low_pc, it should also have a high_pc. If it has neither, we
# can't determine a range so we will continue
if 'DW_AT_low_pc' not in DIE.attributes:
continue
start = DIE.attributes['DW_AT_low_pc'].value
end = DIE.attributes['DW_AT_low_pc'].value + DIE.attributes['DW_AT_high_pc'].value
# Insert a node with this range in the tree. If we have made it this far it means
# we have a start and an end address, meaning this is a full function, and not
# just a prototype.
self.root.insert(range(name, start, end))
except KeyError:
#print('Skipped DIE with tag {}'.format(DIE.tag))
continue #TODO figure out what this is necesseary
self.root.map_inlined(self.offset_map)
#for CU in dwarfinfo.iter_CUs():
# for DIE in CU.iter_DIEs():
# if DIE.offset == 8589:
# print('Found DIE')
# pprint(dict(DIE.attributes))
# if DIE.offset == 8315:
# print('Found DIE')
# pprint(dict(DIE.attributes))
# if 'DW_AT_low_pc' in DIE.attributes:
# if DIE.attributes['DW_AT_low_pc'].value == 4203102:
def remove_unknowns(self):
self.root.remove_unknowns()
def classify(self, ips, counts=None):
if np.isscalar(ips):
return self.root.find(ips)
# Initialize counts if it is empty
# Subsequent calls can pass in counts and
# we will just add to that one.
if not counts:
counts = {}
counts['ALL'] = 0
for k in self.offset_map:
counts[self.offset_map[k]] = 0
for ip in ips:
counts[self.root.find(ip)] += 1
return counts
if __name__ == '__main__':
if len(sys.argv) < 2 or (len(sys.argv) == 2 and sys.argv[1] == '-h'):
print('Usage:\n python3 {} <exefile> [tracefile]'.format(sys.argv[0]))
exit()
# Parse DWARF
DM = DWARFMap(sys.argv[1])
# Remove unknowns
if compress:
DM.remove_unknowns()
# This prints out the hierarcical ranges found in the DWARF
print(DM.root, end='')
if len(sys.argv) > 2:
# Use sveCacheSim to load the trace
try:
trace = sim.traceToInts(sys.argv[2], None).IP
# This will attribute each IP in the trace to a function
counts = DM.classify(trace)
print(counts)
except:
pass