lambda-feedback
diff --git a/‎eval_tests_old.json‎
Lines changed: 0 additions & 76 deletions b/‎eval_tests_old.json‎
Lines changed: 0 additions & 76 deletions
diff --git a/‎evaluation_function/auto_tests.py‎
Lines changed: 72 additions & 81 deletions b/‎evaluation_function/auto_tests.py‎
Lines changed: 72 additions & 81 deletions
@@ -1,48 +1,18 @@
-import json
 import yaml
 from typing import Union
+from dataclasses import dataclass
 
 class TestFile:
-    """An abstraction over a test file, which may be in one of several different formats.
-    Currently, JSON and YAML are supported.
+    """An abstraction over a test file.
+    Currently, only YAML files are supported.
     """
 
-    def __init__(self, path: str) -> None:
+    def __init__(self, file_content: str, file_name: str) -> None:
         self.groups = []
 
-        # Attempt to open the given file. Exit with an error if this
-        # is not possible.
-        file_content = ""
-        try:
-            with open(path, "r") as test_file:
-                file_content = test_file.read()
-        except IOError as e:
-            raise Exception(f'Failed to open test file: "{e}"')
-
         # Get the file extension to determine which format should be used.
-        extension = path.split(".")[-1]
-        if extension == "json":
-            try:
-                questions = json.loads(file_content)
-
-                for question in questions:
-                    out = []
-                    title = question["title"]
-                    for part in question["parts"]:
-                        for response_area in part["responseAreas"]:
-                            params = response_area["params"]
-                            answer = response_area["answer"]
-                            for test in response_area["tests"]:
-                                test.update({"answer": answer})
-                                test.update({"params": params})
-                                out.append(SingleTest(test))
-                    self.groups.append({"title": title, "tests": out})
-
-            except KeyError as e:
-                raise Exception(f'The key "{e.args[0]}" doesn\'t exist, or is in the wrong place.')
-            except json.JSONDecodeError as e:
-                raise Exception(f'Error parsing JSON: "{e}"')
-        elif extension == "yaml":
+        extension = file_name.split(".")[-1]
+        if extension == "yaml":
             try:
                 # Tests are organised in groups of separate YAML documents (separated by "---")
                 docs = yaml.safe_load_all(file_content)
@@ -53,66 +23,85 @@ def __init__(self, path: str) -> None:
                         # Add an empty params field if none was provided.
                         if test.get("params") == None:
                             test["params"] = {}
-
-                        # Does this test have sub-tests?
-                        sub_tests = test.get("sub_tests")
-                        if sub_tests != None:
-                            params = test["params"]
-                            answer = test["answer"]
-
-                            for sub_test in sub_tests:
-                                sub_test["params"] = params
-                                sub_test["answer"] = answer
-                                tests.append(SingleTest(sub_test))
-                        else:
-                            tests.append(SingleTest(test))
+                        
+                        tests.append(SingleTest(test))
 
                     self.groups.append({"title": title, "tests": tests})
             except yaml.YAMLError as e:
                 raise Exception(f'Error parsing YAML: {e}')
         else:
             raise Exception(f'"{extension}" files are not supported as a test format.')
 
+
 class SingleTest:
     def __init__(self, test_dict: dict):
-        self.response = test_dict.get("response", "")
         self.answer = test_dict.get("answer", "")
         self.params = test_dict.get("params", {})
-        expected_result = test_dict.get("expected_result")
-        if not expected_result:
-            raise Exception("No expected result given for test")
-        self.is_correct = expected_result.get("is_correct")
-        self.results = expected_result
         self.desc = test_dict.get("description", "")
 
-    def evaluate(self, func) -> dict:
-        return func(self.response, self.answer, self.params)
+        self.sub_tests = []
+        if "sub_tests" in test_dict:
+            for sub_test in test_dict["sub_tests"]:
+                expected_result = sub_test.get("expected_result")
+                if not expected_result:
+                    raise Exception("No expected result given for test")
+
+                self.sub_tests.append(SubTest(
+                    sub_test.get("description", ""),
+                    sub_test.get("response", ""),
+                    expected_result.get("is_correct"),
+                    expected_result,
+                ))
+        else:
+            expected_result = test_dict.get("expected_result")
+            if not expected_result:
+                raise Exception("No expected result given for test")
+
+            self.sub_tests.append(SubTest(
+                "",
+                test_dict.get("response", ""),
+                expected_result.get("is_correct"),
+                expected_result,
+            ))
+
+    def evaluate_all(self, func) -> list[dict]:
+        return [func(test.response, self.answer, self.params) for test in self.sub_tests]
 
-    def compare(self, eval_result: dict) -> tuple[bool, str]:
-        eval_correct = eval_result["is_correct"]
-            
-        if eval_correct != self.is_correct:
-            return (
-                False,
-                f"response \"{self.response}\" with answer \"{self.answer}\" was {'' if eval_correct else 'in'}correct: {eval_result['feedback']}\nTest description: {self.desc}"
-            )
-        
-        # Are there any other fields in the eval function result that need to be checked?
-        if self.results != None:
-            # Check each one in turn
-            for key, value in self.results.items():
-                actual_result_val = eval_result.get(key)
-                if actual_result_val == None:
-                    return (False, f"No value returned for \"{key}\"")
+    def compare_all(self, eval_results: list[dict]) -> tuple[bool, str]:
+        for i, eval_result in enumerate(eval_results):
+            eval_correct = eval_result["is_correct"]
 
-                if actual_result_val != value:
-                    return (
-                        False,
-                        f"expected {key} = \"{value}\", got {key} = \"{actual_result_val}\"\nTest description: {self.desc}"
-                    )
+            if eval_correct != self.sub_tests[i].is_correct:
+                return (
+                    False,
+                    (f"response \"{self.sub_tests[i].response}\" with answer "
+                     f"\"{self.answer}\" was {'' if eval_correct else 'in'}correct: "
+                     f"{eval_result['feedback']}\nTest description: {self.sub_tests[i].desc}")
+                )
+            
+            # Are there any other fields in the eval function result that need to be checked?
+            if self.sub_tests[i].expected_result != None:
+                # Check each one in turn
+                for key, value in self.sub_tests[i].expected_result.items():
+                    actual_result_val = eval_result.get(key)
+                    if actual_result_val == None:
+                        return (False, f"No value returned for \"{key}\"")
+                    
+                    if actual_result_val != value:
+                        return (
+                            False,
+                            f"expected {key} = \"{value}\", got {key} = \"{actual_result_val}\"\nTest description: {self.desc}"
+                        )
 
         return (True, "")
 
+@dataclass
+class SubTest:
+    desc: str
+    response: str
+    is_correct: bool
+    expected_result: dict
+
 
 def auto_test(path, func):
     """A decorator that adds the necessary infrastructure to run tests defined
@@ -124,16 +113,18 @@ def _auto_test(orig_class):
         def test_auto(self):
             # Creating a TestFile can fail for several reasons.
             # If so, an exception is raised with a suitable error message
+            tests = {}
             try:
-                tests = TestFile(path)
+                with open(path, "r") as f:
+                    tests = TestFile(f.read(), path)
             except Exception as e:
                 self.fail(e)
 
             # Successfully loaded 
             for group in tests.groups:
                 for test in group["tests"]:
-                    results = test.evaluate(func)
-                    self.assertTrue(*test.compare(results.to_dict()))
+                    results = test.evaluate_all(func)
+                    self.assertTrue(*test.compare_all(map(lambda r: r.to_dict(), results)))
 
         orig_class.test_auto = test_auto # Add the test_auto function to the class
         return orig_class