-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathContainer.py
606 lines (546 loc) · 27.1 KB
/
Container.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
import subprocess
import sys
from fabric import Connection
import docker
import fnmatch
import random
import time
# Analytics modules
from DataHandler import *
class EmptyContextManager:
def __enter__(self):
return
def __exit__(self, exc_type, exc_val, exc_tb):
return
class Container(object):
""" Class used to spawn a new container with sshd listening """
class LineType:
NotCovered = 1
NotExecutable = 2
Covered = 3
def __init__(self, _image, _user, _pwd, _repeats=1):
self.container = None
self.client = docker.DockerClient(base_url='unix://var/run/docker.sock')
self.image = _image
self.user = _user
self.pwd = _pwd
self.repeats = _repeats
self.offline = not _image
# no errors yet :)
self.compileError = False
self.maketestError = False
self.emptyCommit = False
self.exit_status_list = []
""" how many lines from previous patches are covered now """
self.changed_files = []
self.echanged_files = []
self.uncovered_lines_list = []
self.prev_covered = []
self.hunkheads = []
self.ehunkheads = []
self.hunkheads3 = []
self.ehunkheads3 = []
self.tsize = 0
# split the test suite into directories and files
self.tsuite_dir = []
self.tsuite_file = []
self.changed_test_files = []
self.merge = False
self.total_eloc = 0
self.covered_eloc = 0
self.total_branches = 0
self.covered_branches = 0
self._gcovNameCache = set()
self._gcovNoNameCache = set()
# connection_attempts no longer supported
if self.offline:
self.initialpath = self.local("realpath .").stdout.strip()
self.difflinessh = "%s/deps/measure-cov.sh" % self.initialpath
else:
self.difflinessh = "/root/measure-cov.sh"
# The following are methods used to spawn a new container
#
def sshd_up(self):
""" set up sshd """
print(self.image)
# Pick a random number between 60001 and 61000 for the port mapping, and try to create a container, retry if the port is already in use
# Do this 20 times (arbitrary number) before giving up, and wait a random amount of time between retries
attempts, max_retries = 0, 20
random_port = -1
while attempts < max_retries:
# This is a hack to get around the fact that the port is already allocated
random_port = random.randint(60001, 60999)
try:
self.container = self.client.containers.create(self.image,
command='/usr/sbin/sshd -D',
ports={22: random_port})
self.cnt_id = self.container.id
self.container.start() # This line is the one that fails if the port is already allocated
break
except Exception as e:
print(f"Docker threw error: {e}")
print(f"Most likely port {random_port} already allocated, retrying...")
time.sleep(random.uniform(0.1, 1))
attempts += 1
if attempts == max_retries:
print("Could not create container, giving up")
sys.exit(1)
print(f"Started container {self.cnt_id[:8]} on port {random_port}")
def set_ip(self):
""" set container ID """
# state = self.client.inspect_container(self.cnt_id)
state = self.client.containers.get(self.cnt_id).attrs
self.ip = state['NetworkSettings']['IPAddress']
print(f"Assigned container {self.cnt_id[:8]} IP {self.ip}")
def fabric_setup(self):
# """ set fabric env parameters """
# env.user = self.user
# env.password = self.pwd
# env.host_string = self.ip + ':22'
# # in a perfect world, this would not be here
# env.connection_attempts = 10
self.try_to_connect()
def try_to_connect(self, max_connection_attempts=10):
# Get the ubuntu version from the image, so we can use the correct sshd_config
labels = self.client.images.get(self.image).attrs['Config']['Labels']
if labels is None or 'ubuntu_version' not in labels:
image_version = 14.04
print('LABEL "ubuntu_version" not found in image, assuming <= 14.04')
else:
image_version = float(labels['ubuntu_version'])
tries = 0
# Do this 10 times (arbitrary number) before giving up, use backoff
while tries < max_connection_attempts:
try:
# if the image is ubuntu 14.04 or prior, use the old sshd_config
if image_version < 16:
self.conn = Connection(host=self.ip, port=22, user=self.user,
connect_kwargs={
"disabled_algorithms": {"pubkeys": ["rsa-sha2-256", "rsa-sha2-512"]}
})
else:
self.conn = Connection(host=self.ip, port=22, user=self.user)
return
except Exception as e:
print(f'Failed to connect to container: {e}')
tries += 1
if tries < max_connection_attempts:
print(f'Retrying... ({tries})')
time.sleep((1.5 ** tries)/3) # exponential backoff scale to 12s
print(f'Error: Maximum number of connection attempts exceeded.')
def try_to_run(self, cmd, max_connection_attempts=10, **kwargs):
tries = 0
while tries < max_connection_attempts:
try:
run = self.conn.run(cmd, **kwargs)
return run
except Exception as e:
print(f'Failed to run on container: {e}')
tries += 1
if tries < max_connection_attempts:
print(f'Retrying... ({tries})')
time.sleep((1.5 ** tries) / 3) # exponential backoff scale to 12s
print(f'Error: Maximum number of run attempts exceeded.')
return None
def run_test(self):
""" uname to check everything works """
self.conn.run('uname -on')
def local(self, cmd: str, text=True, **kwargs):
return subprocess.run([cmd], shell=True, capture_output=True, text=text, **kwargs)
def omnicd(self, path):
if self.offline:
# TODO: lcd no longer supported so just use cd for now?
# or use a local('cd ...')
# if hasattr(self, "conn") and self.conn:
# return self.conn.lcd(path)
return EmptyContextManager()
else:
return self.conn.cd(path)
def omnirun(self, cmd, **kwargs):
print("running %s" % cmd)
if self.offline:
# return self.conn.local(cmd, capture=True, **kwargs)
kwargs.pop('warn', None)
text = kwargs.pop('text', True) # Capture text arg if it can be found, otherwise True.
return self.local(cmd, text=text, **kwargs)
else:
kwargs.pop('cwd', None) # Cleanse of cwd magic for a standard fabric run
kwargs.pop('text', None)
return self.try_to_run(cmd, **kwargs)
# return self.conn.run(cmd, **kwargs)
def spawn(self):
if not self.offline:
""" call all the methods needed to spawn a container """
self.sshd_up()
self.set_ip()
self.fabric_setup()
def halt(self, max_connection_attempts=10):
if not self.offline:
""" shutdown the current container """
print('\n\nHalting the current container...\n\n')
tries = 0
while tries < max_connection_attempts:
try:
self.conn.close()
self.container.stop()
self.container.remove(force=True)
return
except Exception as e:
print(f'Failed to stop container: {e}')
tries += 1
if tries < max_connection_attempts:
print(f'Retrying... ({tries})')
time.sleep((1.5 ** tries) / 3)
print(f'Error: Maximum number of stop attempts exceeded.')
def get_commits(self, n, ending_at=''):
""" attach timestamp and author to a given commit """
commitlist = self.omnirun(
'cd %s && git rev-list -n %d --first-parent --format=%%h__%%ct__%%an %s|grep -v commit' % (
self.path, n, ending_at))
return commitlist.stdout.splitlines()
def count_sloc(self, path):
""" use cloc to get the static lines of code for any given file or directory """
lines = 0
for p in path:
try:
lines += int(self.omnirun("cloc " + p + " | tail -2 | awk '{print $5}'").stdout.strip())
except ValueError:
lines += int(self.omnirun("wc -l " + p + "/*|tail -1|awk '{ print $1 }'").stdout.strip())
return str(lines)
def count_hunks(self, prev_revision):
cwd = None
if self.offline:
cwd = self.path
with self.omnicd(self.path):
changed = self.omnirun("git diff -b -U0 " +
prev_revision + " " + self.revision +
" | perl -pe 's/\e\[?.*?[\@-~]//g'", cwd=cwd, text=False)
# Make sure not in text mode as can be passed illegal utf8 encoded characters
if changed:
changed_stripped = changed.stdout.splitlines()
if self.offline:
self.hunkheads = [i for i in changed_stripped if i.decode('utf-8', 'replace').startswith('@@')]
else:
self.hunkheads = [i for i in changed_stripped if i.startswith('@@')]
changed = self.omnirun("git diff -b " +
prev_revision + " " + self.revision +
" | perl -pe 's/\e\[?.*?[\@-~]//g'", cwd=cwd, text=False)
changed_stripped = changed.stdout.splitlines()
if self.offline:
self.hunkheads3 = [i for i in changed_stripped if i.decode('utf-8', 'replace').startswith('@@')]
else:
self.hunkheads3 = [i for i in changed_stripped if i.startswith('@@')]
def checkout(self, prev_revision, revision):
""" checkout the revision we want """
# set the revision for current execution (commit sha)
self.revision = revision
cwd = None
if self.offline:
cwd = self.path
with self.omnicd(self.path):
print('path is ' + self.path)
self.omnirun('git checkout ' + revision, cwd=cwd)
self.is_merge(revision)
diffcmd = "git diff -b --pretty='format:' --name-only " + prev_revision + " " + self.revision + " -- "
if hasattr(self, 'limit_changes_to'):
for path in self.limit_changes_to:
diffcmd += path + " "
diffcmd += " | perl -pe 's/\e\[?.*?[\@-~]//g'"
result = self.omnirun(diffcmd, cwd=cwd)
if result.stdout.strip() == "":
self.emptyCommit = True
def tsize_compute(self):
""" compute test suite as SLOCs """
# rebuild the test suite with only files or dirs that
# actually exists in the current revision
actual_tsuite = []
for item in self.tsuite_path:
item = "%s/%s" % (self.path, item)
fileExists = self.omnirun('ls -U ' + item + ' >/dev/null 2>&1 && echo y || echo n')
if fileExists.stdout.strip() == 'y':
actual_tsuite.append(item)
print('Added ' + item + ' to the test suite\n')
isdir = self.omnirun('[ -d "' + item + '" ] && echo y || echo n')
if isdir.stdout.strip() == 'y':
self.tsuite_dir.append(item)
else:
self.tsuite_file.append(item)
self.tsuite_path = actual_tsuite
# XXX count_sloc will fail if a wildcard path contains no files recognized by cloc
self.tsize = self.count_sloc(self.tsuite_path)
def backup(self, commit):
assert not self.offline
""" create a tar.bz2 with .gcov and lcov .info files and save it to localhost """
if self.compileError or self.emptyCommit:
return
with self.omnicd(self.source_path):
# save gcov/gcc/g++ info
self.conn.run('gcov -v | head -1 > build_info.txt', warn=True)
self.conn.run('echo >> build_info.txt', warn=True)
self.conn.run('gcc -v &>> build_info.txt', warn=True)
with self.omnicd(self.source_path):
# bzip all the coverage files
self.conn.run("find . -name '*.gcov' -or -name '*.info' > backuplist")
self.conn.run("echo ./build_info.txt >> backuplist")
self.conn.run('tar -cjf coverage-' + commit + '.tar.bz2 -T backuplist')
# scp to localhost/data
self.conn.get(self.source_path + '/coverage-' + commit + '.tar.bz2', local='data/' + self.outputfolder +
'/' + 'coverage-' + commit + '.tar.bz2')
def rec_initial_coverage(self):
assert not self.offline
with self.omnicd(self.source_path):
# Don't warn=True - we need this to work
self.conn.run('lcov --rc lcov_branch_coverage=1 -c -i -d . -o base.info')
def make_test(self):
assert not self.offline
if self.compileError or self.emptyCommit:
return
self.rec_initial_coverage()
def overall_coverage(self):
""" collect overall coverage results """
if self.compileError or self.emptyCommit:
return
if self.offline:
covdatadir = '%s/data/%s' % (self.initialpath, self.outputfolder)
with self.omnicd(covdatadir):
res = self.local('rm -rf tmp && mkdir tmp && tar xjf coverage-%s.tar.bz2 -C tmp' % self.revision, cwd=covdatadir)
if res.returncode != 0:
return
covdatadir += "/tmp"
else:
covdatadir = self.source_path
with self.omnicd(covdatadir):
res = self.omnirun('lcov --rc lcov_branch_coverage=1 -c -d . -o test.info', warn=True)
if res.failed:
return
self.conn.run("lcov --rc lcov_branch_coverage=1 -a base.info -a test.info -o total.info", warn=True)
self.conn.run('find -name "*.gcda"|xargs gcov', hide=True, warn=True) # From quiet to hide
cwd = None
if self.offline:
cwd = covdatadir
with self.omnicd(covdatadir):
ignore = ' '.join(["'%s'" % p for p in self.tsuite_path])
self.omnirun('lcov --rc lcov_branch_coverage=1 -r total.info %s -o total.info' % ignore, warn=True, cwd=cwd)
if hasattr(self, 'ignore_coverage_from'):
ignore = ' '.join(["'%s'" % p for p in self.ignore_coverage_from])
self.omnirun('lcov --rc lcov_branch_coverage=1 -r total.info %s -o total.info' % ignore, warn=True, cwd=cwd)
lines = self.omnirun(
"lcov --rc lcov_branch_coverage=1 --summary total.info 2>&1|tail -3|head -1|sed 's/.*(//' |egrep -o '[0-9]+'",
warn=True, cwd=cwd)
lines = lines.stdout.splitlines()
if len(lines) == 2:
self.covered_eloc = lines[0]
self.total_eloc = lines[1]
branches = self.omnirun(
"lcov --rc lcov_branch_coverage=1 --summary total.info 2>&1|tail -1|sed 's/.*(//' |egrep -o '[0-9]+'",
warn=True, cwd=cwd)
branches = branches.stdout.splitlines()
if len(branches) == 2:
self.covered_branches = branches[0]
self.total_branches = branches[1]
def has_coverage_information(self, filepath):
cwd = None
if self.offline:
covdatadir = '%s/data/%s/tmp' % (self.initialpath, self.outputfolder)
cwd = covdatadir
else:
covdatadir = self.source_path
print(covdatadir)
with self.omnicd(covdatadir):
result = self.omnirun("sed -n '\|SF:.*/%s|,/end_of_record/p' total.info" % filepath, cwd=cwd)
return bool(result.stdout.strip())
def is_covered(self, filepath, line):
cwd = None
if self.offline:
covdatadir = '%s/data/%s/tmp' % (self.initialpath, self.outputfolder)
cwd = covdatadir
else:
covdatadir = self.source_path
with self.omnicd(covdatadir):
result = self.omnirun("sed -n '\|SF:.*/%s|,/end_of_record/p' total.info |grep '^DA:%d,'" % (filepath, line),
warn=True, cwd=cwd)
if (hasattr(result, 'ok') and not result.ok) or (hasattr(result, 'returncode') and result.returncode != 0):
return self.LineType.NotExecutable
elif result.stdout.strip().endswith(",0"):
return self.LineType.NotCovered
else:
return self.LineType.Covered
def is_merge(self, commit):
cwd = None
if self.offline:
cwd = self.path
with self.omnicd(self.path):
mergestatus = self.omnirun("git show " + commit + "|head -2|tail -1", cwd=cwd)
self.merge = mergestatus.stdout.strip().startswith("Merge:")
return self.merge
def patch_coverage(self, prev_revision):
""" compute the coverage for the current commit """
self.added_lines = 0
self.covered_lines = 0
self.uncovered_lines = 0
self.average = 0
if self.compileError or self.emptyCommit or self.covered_eloc == 0:
return
# get a list of the changed files for the current commit
cwd = None
if self.offline:
cwd = self.path
with self.omnicd(self.path):
diffcmd = "git diff -b --pretty='format:' --name-only " + prev_revision + " " + self.revision + " -- "
if hasattr(self, 'limit_changes_to'):
for path in self.limit_changes_to:
diffcmd += path + " "
diffcmd += " | perl -pe 's/\e\[?.*?[\@-~]//g'"
changed_files = self.omnirun(diffcmd, cwd=cwd)
if changed_files.stdout:
self.changed_files = [i for i in changed_files.stdout.splitlines() if i]
print(self.changed_files)
# for every changed file
for f in self.changed_files:
# get the filename
self.uncovered_lines_list.append([])
if not self.compileError:
# check whether it's a test file
# with self.omnicd(self.path): #TODO: aren't we already in context? I'm commenting of this
fileExists = self.omnirun('[ -f ' + f + ' ] && echo y || echo n', cwd=cwd)
if fileExists.stdout.strip() == 'y':
realp = self.omnirun('realpath ' + f, cwd=cwd).stdout.strip()
for tf in self.tsuite_file:
if fnmatch.fnmatch(realp, tf):
self.changed_test_files.append(f)
for td in self.tsuite_dir:
if realp.startswith(td + "/"):
self.changed_test_files.append(f)
self.changed_test_files = list(set(self.changed_test_files))
if self.has_coverage_information(f):
print('Coverage information found\n')
self.echanged_files.append(f)
# get the changed lines numbers
# with self.omnicd(self.path):
file_diff = self.omnirun("git diff -b -U0 " +
prev_revision + " " + self.revision +
" -- " + f +
" | perl -pe 's/\e\[?.*?[\@-~]//g'", cwd=cwd)
self.ehunkheads += [i for i in file_diff.stdout.splitlines() if i.startswith('@@')]
file_diff3 = self.omnirun("git diff -b " +
prev_revision + " " + self.revision +
" -- " + f +
" | perl -pe 's/\e\[?.*?[\@-~]//g'", cwd=cwd)
self.ehunkheads3 += [i for i in file_diff3.stdout.splitlines() if i.startswith('@@')]
line_numbers = self.omnirun(
"%s %s %s %s" % (self.difflinessh, prev_revision, self.revision, f), cwd=cwd)
# for every changed line
for l in line_numbers.stdout.splitlines():
# increment added lines
self.added_lines += 1
covstatus = self.is_covered(f, int(l))
if covstatus == self.LineType.NotCovered:
self.uncovered_lines += 1
self.uncovered_lines_list[-1].append(int(l))
elif covstatus == self.LineType.Covered:
self.covered_lines += 1
else:
# no coverage information found
# most likely the file was not compiled into any of the programs
# executed by the test suite. Alternatives include: file was removed, program crashed or has no permissions
print('No coverage information found for ' + f + '\n')
# with self.omnicd(self.path):
fileExists = self.omnirun('[ -f ' + f + ' ] && echo y || echo n', cwd=cwd)
if fileExists.stdout.strip() == 'y':
line_numbers = self.omnirun(
"%s %s %s %s" % (self.difflinessh, prev_revision, self.revision, f), cwd=cwd)
lines = line_numbers.stdout.splitlines()
self.added_lines += len(lines)
else:
print('No file found ' + f + '\n')
# save results
if self.covered_lines > 0:
self.average = round(((self.covered_lines / (self.covered_lines +
self.uncovered_lines))
* 100), 2)
self.count_hunks(prev_revision)
def prev_patch_coverage(self, backcnt, prev_files, prev_lines):
assert len(prev_files) == len(prev_lines)
prev_files_same = []
prev_lines_same = []
for i, f in enumerate(prev_files):
if f not in self.changed_files:
prev_files_same.append(f)
prev_lines_same.append(prev_lines[i])
""" ignore files that are modified """
prev_files = prev_files_same
prev_lines = prev_lines_same
if self.compileError or self.emptyCommit or self.covered_eloc == 0:
return prev_files, prev_lines
covered = 0
for i, f in enumerate(prev_files):
covered += len(prev_lines[i])
prev_lines[i][:] = [l for l in prev_lines[i] if self.is_covered(f, l) != self.LineType.Covered]
covered -= len(prev_lines[i])
assert (len(self.prev_covered) >= backcnt)
if len(self.prev_covered) == backcnt:
self.prev_covered.append(covered)
else:
self.prev_covered[backcnt] += covered
return prev_files, prev_lines
def get_non_determinism_flag(self):
# Returns whether a test is non-deterministic or not (true if non-deterministic, false otherwise)
# If we have a compile error, empty commit or no covered lines then return false (vacuously the case that the test is deterministic)
if self.compileError or self.emptyCommit or self.covered_eloc == 0:
return False
# Filter out any 124 exit statuses (timeout) from the list of exit statuses
self.exit_status_list = [x for x in self.exit_status_list if x != 124]
# Check whether all executions of the test have the same exit status - false if all are the same, true otherwise
if len(set(self.exit_status_list)) <= 1:
return False
# Therefore the test is non-deterministic so return true/1
return True
def collect(self, author_name, timestamp, outputfolder, outputfile):
""" create a Collector to collect all info and a XMLHandler to parse them """
c = Collector()
# the class name which is actually running this method, as a string
c.name = self.__class__.__name__
c.outputfile = outputfile
c.outputfolder = outputfolder
# fill in some info about the test
c.revision = self.revision
c.author_name = author_name
c.timestamp = timestamp
c.tsuite_size = self.tsize
c.merge = self.merge
c.repeats = self.repeats
c.non_det = self.get_non_determinism_flag()
# if compilation failed, halt
if self.compileError:
c.compileError = True
# go on
elif self.emptyCommit:
c.emptyCommit = True
else:
# fill patch coverage results
c.added_lines = self.added_lines
c.covered_lines = self.covered_lines
c.uncovered_lines = self.uncovered_lines
c.average = self.average
# fill overall coverage results and exit status
c.covered_eloc = self.covered_eloc
c.total_eloc = self.total_eloc
c.covered_branches = self.covered_branches
c.total_branches = self.total_branches
c.compileError = self.compileError
c.maketestError = self.maketestError
if self.covered_eloc:
c.prev_covered = self.prev_covered
c.hunks = len(self.hunkheads)
c.ehunks = len(self.ehunkheads)
c.hunks3 = len(self.hunkheads3)
c.ehunks3 = len(self.ehunkheads3)
c.changed_files = len(self.changed_files)
c.echanged_files = len(self.echanged_files)
c.changed_test_files = len(self.changed_test_files)
# pass the Collector() obj to the Data Handler to store results in CSV format
x = DataHandler(c)
x.extractData()
x.dumpCSV()
print("Files modified in the revision: " + str(self.changed_files) + '\n')
print("Lines modified and uncovered in the revision: " + str(self.uncovered_lines_list) + '\n')