Skip to content

Commit

Permalink
Add artifact evaluation script
Browse files Browse the repository at this point in the history
  • Loading branch information
nadia-polikarpova committed Oct 16, 2018
1 parent 848b6a0 commit 0cd6905
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 0 deletions.
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ initialCommands in console :=

mainClass in assembly := Some("org.tygus.suslik.synthesis.SynthesisRunner")

test in assembly := {}

assemblyJarName in assembly := "suslik.jar"

assemblyMergeStrategy in assembly := {
Expand Down
226 changes: 226 additions & 0 deletions evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
import sys
from subprocess import call
import os, os.path
import platform
import shutil
import csv

JAVA8 = 'java' # Path to Java8
SUSLIK_JAR = 'suslik.jar' # Path to suslik.jar
TIMEOUT = '-t=120000' # Timeout option for suslik
TEST_DIR = 'src/test/resources/synthesis/paper-benchmarks/' # Root directory for the tests
VARIANTS = ['phased', 'invert', 'fail', 'commute', 'none'] # Configurations
CSV_IN = 'stats.csv' # Intermediate CSV file produced by suslik

STATS = 'all_stats.csv' # Output file with all the stats
RESULTS = 'all_results' # Output file with synthesis results

class Benchmark:
def __init__(self, name, description):
self.name = name # Test file name
self.description = description # Description (in the table)

def str(self):
return self.name + ': ' + self.description

class BenchmarkGroup:
def __init__(self, name, benchmarks):
self.name = name # Id
self.benchmarks = benchmarks # List of benchmarks in this group

ALL_BENCHMARKS = [
BenchmarkGroup("Integers", [
Benchmark('ints/swap', 'swap two'),
Benchmark('ints/min2', 'min of two'),
]),
BenchmarkGroup("Linked List", [
Benchmark('sll-bounds/sll-len', 'length'),
Benchmark('sll-bounds/sll-max', 'max'),
Benchmark('sll-bounds/sll-min', 'min'),
Benchmark('sll/sll-singleton', 'singleton'),
Benchmark('sll/sll-free', 'dispose'),
Benchmark('sll/sll-init', 'initialize'),
Benchmark('sll/sll-copy', 'copy'),
Benchmark('sll/sll-append', 'append'),
Benchmark('sll/sll-delete-all', 'delete'),
]),
BenchmarkGroup("Sorted list", [
Benchmark('srtl/srtl-prepend', 'prepend'),
Benchmark('srtl/srtl-insert', 'insert'),
Benchmark('srtl/insertion-sort', 'insertion sort'),
]),
BenchmarkGroup("Tree", [
Benchmark('tree/tree-size', 'size'),
Benchmark('tree/tree-free', 'dispose'),
Benchmark('tree/tree-copy', 'copy'),
Benchmark('tree/tree-flatten', 'flatten w/append'),
Benchmark('tree/tree-flatten-acc', 'flatten w/acc'),
]),
BenchmarkGroup("BST", [
Benchmark('bst/bst-insert', 'insert'),
Benchmark('bst/bst-left-rotate', 'rotate left'),
Benchmark('bst/bst-right-rotate', 'rotate right'),
]),
]

class SynthesisResult:
def __init__(self, name, time, spec_size, code_size):
self.name = name # Benchmark name
self.time = time # Synthesis time (seconds)
self.spec_size = spec_size # Cumulative specification size (in AST nodes)
self.code_size = code_size # Cumulative synthesized code size (in AST nodes)
self.variant_times = {var : -3.0 for var in VARIANTS} # Synthesis times for SuSLik variants:


def str(self):
return self.name + ', ' + '{0:0.2f}'.format(self.time) + ', ' + self.spec_size + ', ' + self.code_size + ', ' + str(self.variant_times)

def run_benchmark(file):
'''Run single benchmark'''
with open(RESULTS, "a") as outfile:
print 'Running', file
call([JAVA8, '-jar', SUSLIK_JAR, file, TIMEOUT], stdout=outfile)
read_csv()

def var_option(var):
if var == 'none':
return ' '.join([var_option(v) for v in VARIANTS[:-1]])
else:
return ' '.join(['--' + v + ' false' for v in var.split('-')])

def test_variants():
'''Test all enabled configurations of each benchmark'''

for group in groups:
for b in group.benchmarks:
test = TEST_DIR + b.name
testFileName = test + '.syn'
if not os.path.isfile(testFileName):
print "Test file not found:", testFileName
else:
run_benchmark(testFileName) # Run default configuration

for var in variants:
varFileName = test + '-' + var + '.syn'
shutil.copy(testFileName, varFileName)
with open(varFileName, 'r+') as f:
content = f.read()
f.seek(0, 0)
if content.startswith('#'):
# already has a config line
lines = content.split('\n', 1)
f.write(lines[0].rstrip() + ' ' + var_option(var) + '\n' + lines[1])
else:
# no config line, create one
f.write('#. this ' + var_option(var) + '\n' + content)

run_benchmark(testFileName) # Run variant

def clean_variants():
'''Remove previously generated benchmark variants'''

for group in groups:
for b in group.benchmarks:
test = TEST_DIR + b.name
for var in VARIANTS:
varFileName = test + '-' + var + '.syn'
if os.path.isfile(varFileName):
os.remove(varFileName)

def read_csv():
'''Read stats file into the results dictionary'''
with open(CSV_IN, 'rb') as csvfile:
d = csv.excel
d.skipinitialspace = True
statsReader = csv.DictReader(csvfile, dialect = d)
for row in statsReader:
name = row['Name']
time = float(row['Time'])/1000
spec_size = row['Spec Size']
code_size = row['Code Size']

is_var = False
for var in VARIANTS:
if name.endswith(var):
# This is a test for a variant
is_var = True
suffix_len = len(var) + 1
store_result(name[:-suffix_len], time, spec_size, code_size, var)
if not is_var:
store_result(name, time, spec_size, code_size)

def format_time(t):
if t < 0:
return '-'
if t < 0.1:
return '<0.1'
else:
return '{0:0.1f}'.format(t)

def write_stats():
'''Write stats from dictionary into a file'''
with open(STATS, 'w') as stats:
stats.write('Name,Code,Spec,Time,T-phase,T-inv,T-fail,T-com,T-all\n')

for group in groups:
for b in group.benchmarks:
result = results[b.name]
row = \
b.name +\
',' + result.code_size + \
',' + result.spec_size + \
',' + format_time(result.time) + \
',' + format_time(result.variant_times['phased']) + \
',' + format_time(result.variant_times['invert']) + \
',' + format_time(result.variant_times['fail']) + \
',' + format_time(result.variant_times['commute']) + \
',' + format_time(result.variant_times['none']) + '\n'
stats.write(row)


def store_result(name, time, spec_size, code_size, variant = 'none'):
timeOrTO = -1.0 if code_size == 'FAIL' else time

if not(name in results):
results[name] = SynthesisResult(name, timeOrTO, spec_size, code_size)

if variant == 'none':
results[name].time = timeOrTO
results[name].code_size = code_size
else:
results[name].variant_times[variant] = timeOrTO

def cmdline():
import argparse
a = argparse.ArgumentParser()
a.add_argument('--unopt', action='store_true')
a.add_argument('--tiny', action='store_true')
return a.parse_args()

if __name__ == '__main__':
cl_opts = cmdline()

if os.path.isfile(RESULTS):
os.remove(RESULTS)

if cl_opts.unopt:
variants = [VARIANTS]
else:
variants = []

if cl_opts.tiny:
groups = ALL_BENCHMARKS[0:2]
else:
groups = ALL_BENCHMARKS

results = dict()
test_variants()

# print 'STATS'
# for res in results:
# print results[res].str()

write_stats()
clean_variants()


0 comments on commit 0cd6905

Please sign in to comment.