Add artifact evaluation script

TyGuS · Oct 16, 2018 · 0cd6905 · 0cd6905
1 parent 848b6a0
commit 0cd6905
Show file tree

Hide file tree

Showing 2 changed files with 228 additions and 0 deletions.
diff --git a/build.sbt b/build.sbt
@@ -41,6 +41,8 @@ initialCommands in console :=
 
 mainClass in assembly := Some("org.tygus.suslik.synthesis.SynthesisRunner")
 
+test in assembly := {}
+
 assemblyJarName in assembly := "suslik.jar"
 
 assemblyMergeStrategy in assembly := {

diff --git a/evaluation.py b/evaluation.py
@@ -0,0 +1,226 @@
+import sys
+from subprocess import call
+import os, os.path
+import platform
+import shutil
+import csv
+
+JAVA8        = 'java'                                             # Path to Java8
+SUSLIK_JAR   = 'suslik.jar'                                       # Path to suslik.jar
+TIMEOUT      = '-t=120000'                                        # Timeout option for suslik
+TEST_DIR     = 'src/test/resources/synthesis/paper-benchmarks/'   # Root directory for the tests   
+VARIANTS     = ['phased', 'invert', 'fail', 'commute', 'none']    # Configurations
+CSV_IN       = 'stats.csv'                                        # Intermediate CSV file produced by suslik
+
+STATS        = 'all_stats.csv'                                    # Output file with all the stats
+RESULTS      = 'all_results'                                      # Output file with synthesis results
+
+class Benchmark:
+  def __init__(self, name, description):
+    self.name = name                # Test file name
+    self.description = description  # Description (in the table)
+
+  def str(self):
+    return self.name + ': ' + self.description
+
+class BenchmarkGroup:
+  def __init__(self, name, benchmarks):
+    self.name = name              # Id
+    self.benchmarks = benchmarks  # List of benchmarks in this group
+
+ALL_BENCHMARKS = [
+  BenchmarkGroup("Integers",  [
+    Benchmark('ints/swap', 'swap two'),
+    Benchmark('ints/min2', 'min of two'),
+    ]),    
+  BenchmarkGroup("Linked List", [
+    Benchmark('sll-bounds/sll-len', 'length'),
+    Benchmark('sll-bounds/sll-max', 'max'),
+    Benchmark('sll-bounds/sll-min', 'min'),
+    Benchmark('sll/sll-singleton', 'singleton'),
+    Benchmark('sll/sll-free', 'dispose'),
+    Benchmark('sll/sll-init', 'initialize'),
+    Benchmark('sll/sll-copy', 'copy'),
+    Benchmark('sll/sll-append', 'append'),
+    Benchmark('sll/sll-delete-all', 'delete'),
+    ]),
+  BenchmarkGroup("Sorted list", [
+    Benchmark('srtl/srtl-prepend', 'prepend'),
+    Benchmark('srtl/srtl-insert', 'insert'),
+    Benchmark('srtl/insertion-sort', 'insertion sort'),
+    ]),
+  BenchmarkGroup("Tree", [
+    Benchmark('tree/tree-size', 'size'),
+    Benchmark('tree/tree-free', 'dispose'),
+    Benchmark('tree/tree-copy', 'copy'),
+    Benchmark('tree/tree-flatten', 'flatten w/append'),
+    Benchmark('tree/tree-flatten-acc', 'flatten w/acc'),
+    ]),
+  BenchmarkGroup("BST", [
+    Benchmark('bst/bst-insert', 'insert'),
+    Benchmark('bst/bst-left-rotate', 'rotate left'),
+    Benchmark('bst/bst-right-rotate', 'rotate right'),
+    ]),
+]
+
+class SynthesisResult:
+  def __init__(self, name, time, spec_size, code_size):
+    self.name = name                                      # Benchmark name
+    self.time = time                                      # Synthesis time (seconds)
+    self.spec_size = spec_size                            # Cumulative specification size (in AST nodes)
+    self.code_size = code_size                            # Cumulative synthesized code size (in AST nodes)
+    self.variant_times = {var : -3.0 for var in VARIANTS} # Synthesis times for SuSLik variants:
+
+
+  def str(self):
+    return self.name + ', ' + '{0:0.2f}'.format(self.time) + ', ' + self.spec_size + ', ' + self.code_size + ', ' + str(self.variant_times)
+
+def run_benchmark(file):
+  '''Run single benchmark'''
+  with open(RESULTS, "a") as outfile:
+    print 'Running', file
+    call([JAVA8, '-jar', SUSLIK_JAR, file, TIMEOUT], stdout=outfile)
+    read_csv()
+
+def var_option(var):
+  if var == 'none':
+    return ' '.join([var_option(v) for v in VARIANTS[:-1]])
+  else:
+    return ' '.join(['--' + v + ' false' for v in var.split('-')])    
+
+def test_variants():
+  '''Test all enabled configurations of each benchmark'''
+
+  for group in groups:
+    for b in group.benchmarks:
+      test = TEST_DIR + b.name
+      testFileName = test + '.syn'
+      if not os.path.isfile(testFileName):
+        print "Test file not found:", testFileName
+      else:
+        run_benchmark(testFileName) # Run default configuration
+
+        for var in variants:
+          varFileName = test + '-' + var + '.syn'     
+          shutil.copy(testFileName, varFileName)
+          with open(varFileName, 'r+') as f:
+            content = f.read()
+            f.seek(0, 0)
+            if content.startswith('#'):
+              # already has a config line
+              lines = content.split('\n', 1)
+              f.write(lines[0].rstrip() + ' ' + var_option(var) + '\n' + lines[1])
+            else:
+              # no config line, create one
+              f.write('#. this ' + var_option(var) + '\n' + content)
+
+          run_benchmark(testFileName) # Run variant
+
+def clean_variants():
+  '''Remove previously generated benchmark variants'''
+
+  for group in groups:
+    for b in group.benchmarks:
+      test = TEST_DIR + b.name
+      for var in VARIANTS:
+        varFileName = test + '-' + var + '.syn'
+        if os.path.isfile(varFileName):        
+          os.remove(varFileName)
+
+def read_csv():
+  '''Read stats file into the results dictionary'''
+  with open(CSV_IN, 'rb') as csvfile:
+    d = csv.excel
+    d.skipinitialspace = True
+    statsReader = csv.DictReader(csvfile, dialect = d)
+    for row in statsReader:
+      name = row['Name']
+      time = float(row['Time'])/1000
+      spec_size = row['Spec Size']
+      code_size = row['Code Size']
+
+      is_var = False
+      for var in VARIANTS:
+        if name.endswith(var):
+          # This is a test for a variant
+          is_var = True
+          suffix_len = len(var) + 1
+          store_result(name[:-suffix_len], time, spec_size, code_size, var)
+      if not is_var:
+        store_result(name, time, spec_size, code_size)
+
+def format_time(t):
+  if t < 0:
+    return '-'
+  if t < 0.1:
+    return '<0.1'
+  else:
+    return '{0:0.1f}'.format(t)        
+
+def write_stats():
+  '''Write stats from dictionary into a file'''
+  with open(STATS, 'w') as stats:
+    stats.write('Name,Code,Spec,Time,T-phase,T-inv,T-fail,T-com,T-all\n')
+
+    for group in groups:
+      for b in group.benchmarks:
+        result = results[b.name]
+        row = \
+            b.name +\
+            ',' + result.code_size + \
+            ',' + result.spec_size + \
+            ',' + format_time(result.time) + \
+            ',' + format_time(result.variant_times['phased']) + \
+            ',' + format_time(result.variant_times['invert']) + \
+            ',' + format_time(result.variant_times['fail']) + \
+            ',' + format_time(result.variant_times['commute']) + \
+            ',' + format_time(result.variant_times['none']) + '\n'
+        stats.write(row)
+
+
+def store_result(name, time, spec_size, code_size, variant = 'none'):
+  timeOrTO = -1.0 if code_size == 'FAIL' else time
+
+  if not(name in results):
+    results[name] = SynthesisResult(name, timeOrTO, spec_size, code_size)
+
+  if variant == 'none':
+    results[name].time = timeOrTO
+    results[name].code_size = code_size
+  else:
+    results[name].variant_times[variant] = timeOrTO
+
+def cmdline():
+  import argparse
+  a = argparse.ArgumentParser()
+  a.add_argument('--unopt', action='store_true')
+  a.add_argument('--tiny', action='store_true')
+  return a.parse_args()          
+
+if __name__ == '__main__':
+  cl_opts = cmdline()
+
+  if os.path.isfile(RESULTS):        
+    os.remove(RESULTS)
+
+  if cl_opts.unopt:
+    variants = [VARIANTS]
+  else:
+    variants = []
+
+  if cl_opts.tiny:
+    groups = ALL_BENCHMARKS[0:2]
+  else:
+    groups = ALL_BENCHMARKS
+
+  results = dict()  
+  test_variants()
+
+  # print 'STATS'
+  # for res in results:
+    # print results[res].str()  
+
+  write_stats()
+  clean_variants()
+
+