Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON evaluator bug fixes #3

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 37 additions & 25 deletions src/evaluators/json_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(
self.result = {}

def __call__(self, ground_truth, actual, eval_schema={}):
self.compare_dicts(ground_truth, actual, eval_schema)
self.compare_values(ground_truth, actual, eval_schema, None)
for wrapper in self.eval_wrappers:
self.result[f"{wrapper.name}.ratio"] = (
wrapper.calculate_ratio()
Expand All @@ -38,42 +38,54 @@ def __call__(self, ground_truth, actual, eval_schema={}):
return self.result

def compare_values(self, ground_truth, actual, eval_schema, curr_key):
if isinstance(ground_truth, dict) and isinstance(actual, dict):
if isinstance(ground_truth, dict):
return self.compare_dicts(ground_truth, actual, eval_schema, curr_key)
elif isinstance(ground_truth, list) and isinstance(actual, list):
elif isinstance(ground_truth, list):
return self.compare_lists(ground_truth, actual, eval_schema, curr_key)
else:
for wrapper in self.eval_wrappers:
score = wrapper.instance(
ground_truth,
actual,
eval_schema.get(wrapper.name, None),
)
if actual is None:
score = 0
else:
score = wrapper.instance(
ground_truth,
actual,
eval_schema.get(wrapper.name, None),
)
wrapper.total_strings_compared += 1
self.result[f"{wrapper.name}.{curr_key}"] = score
wrapper.total_score += score

def compare_dicts(self, ground_truth_dict, actual_dict, eval_schema, curr_key=None):
for key in ground_truth_dict:
if key not in actual_dict:
for string_evaluator in self.eval_wrappers:
string_evaluator.total_strings_compared += 1
else:
next_key = f"{curr_key}.{key}" if curr_key is not None else key
self.compare_values(
ground_truth_dict[key],
actual_dict[key],
eval_schema.get(key, {}),
next_key,
)
# handle defaults if is None
next_key = f"{curr_key}.{key}" if curr_key is not None else key
actual = actual_dict.get(key, None) if actual_dict is not None else None
curr_eval_schema = eval_schema.get(key, {}) if eval_schema is not None else {}

self.compare_values(
ground_truth_dict[key],
actual,
curr_eval_schema,
next_key,
)

def compare_lists(self, ground_truth_list, actual_list, eval_schema, curr_key):
if not eval_schema:
eval_schema = [{}] * len(ground_truth_list)
for i in range(len(ground_truth_list)):
# handle defaults if is None
next_key = f"{curr_key}[{i}]" if curr_key is not None else f"[{i}]"
try:
actual = actual_list[i]
except Exception:
actual = None
try:
curr_eval_schema = eval_schema[i]
except Exception:
curr_eval_schema = {}

for i, (ground_truth_item, actual_item, schema) in enumerate(
zip(ground_truth_list, actual_list, eval_schema)
):
self.compare_values(
ground_truth_item, actual_item, schema, f"{curr_key}[{i}]"
ground_truth_list[i],
actual,
curr_eval_schema,
next_key,
)
83 changes: 82 additions & 1 deletion src/evaluators/tests/test_json_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def test_json_evaluator_no_eval_schema(self):
},
# key5 is missing
},
# key3 is missing
"key4": "value10", # correct 6
# key2 is missing
}
# Total correct = 6
# ratio = 6/10 = 0.6
Expand Down Expand Up @@ -167,3 +167,84 @@ def test_json_evaluator_no_eval_schema_with_default_config(self):
result = json_evaluator(ground_truth_data, actual_data)
assert result["CustomStringEvaluator.ratio"] == 0.5
assert result['FuzzStringEvaluator.ratio'] == 0.764

def test_json_evaluator_different_array_length_in_actual(self):
ground_truth_data = {
"key1": "value1", # value 1
"key2": ["test1", "test2", "test3"], # Values 2, 3, 4
}
# Total values = 4

actual_data = {
"key1": "value1", # correct 1
"key2": ["test1"], # correct 2, wrong 1, wrong 2 (missing index 1, 2)
}

evaluators = [CustomStringEvaluator()]

# Total correct = 2
# ratio = 2/4 = 0.5

json_evaluator = JsonEvaluator(evaluators)
result = json_evaluator(ground_truth_data, actual_data)
assert result["CustomStringEvaluator.ratio"] == 0.5
assert result['CustomStringEvaluator.key1'] == 1
assert result['CustomStringEvaluator.key2[0]'] == 1
assert result['CustomStringEvaluator.key2[1]'] == 0
assert result['CustomStringEvaluator.key2[2]'] == 0

def test_json_evaluator_handles_array_first_value(self):
ground_truth_data = [
{"key1": "value1"}, # value 1
{"key2": ["1", "2", "3"]},
"array_value_3"
]
# Total values = 5

actual_data = [
{"key1": "value1"}, # correct 1
{"key2": ["1", "wrong", "3"]}, # correct 2, wrong 1, correct 3
"array_value_3" # correct 4
]

# Total correct = 4
# ratio = 4/5 = 0.8

evaluators = [CustomStringEvaluator()]

json_evaluator = JsonEvaluator(evaluators)
result = json_evaluator(ground_truth_data, actual_data)
assert result["CustomStringEvaluator.ratio"] == 0.8
assert result['CustomStringEvaluator.[0].key1'] == 1
assert result['CustomStringEvaluator.[1].key2[0]'] == 1
assert result['CustomStringEvaluator.[1].key2[1]'] == 0
assert result['CustomStringEvaluator.[1].key2[2]'] == 1
assert result['CustomStringEvaluator.[2]'] == 1

def test_json_evaluator_handles_array_dict_mismatch(self):
ground_truth_data = [
{"key1": "value1"}, # value 1
{"key2": ["1", "2", "3"]},
"array_value_3"
]
# Total values = 5

# all values should be wrong, as this is a dict and not an array
actual_data = {
"key1": "value1",
"key2": ["1", "wrong", "3"],
}

# Total correct = 0
# ratio = 0/5 = 0

evaluators = [CustomStringEvaluator()]

json_evaluator = JsonEvaluator(evaluators)
result = json_evaluator(ground_truth_data, actual_data)
assert result["CustomStringEvaluator.ratio"] == 0
assert result['CustomStringEvaluator.[0].key1'] == 0
assert result['CustomStringEvaluator.[1].key2[0]'] == 0
assert result['CustomStringEvaluator.[1].key2[1]'] == 0
assert result['CustomStringEvaluator.[1].key2[2]'] == 0
assert result['CustomStringEvaluator.[2]'] == 0
Loading