From b624ace3ce42ae6ae02509e244165efc9931a0ab Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Fri, 27 Dec 2024 12:06:01 -0800 Subject: [PATCH] evaluate script --- evaluate_gsm8k.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/evaluate_gsm8k.py b/evaluate_gsm8k.py index 9709b2af05..ca8a6ba0ec 100644 --- a/evaluate_gsm8k.py +++ b/evaluate_gsm8k.py @@ -26,9 +26,9 @@ def evaluate(input_file: str): final_answer = "" for message in obj['response']['messages']: - if message['message_type'] == "function_call": - if message['function_call']['name'] == "send_message": - arguments = json.loads(message['function_call']['arguments']) + if message['message_type'] == "tool_call_message": + if message['tool_call']['name'] == "send_message": + arguments = json.loads(message['tool_call']['arguments']) final_answer = arguments['message'] # TODO: make sure this matches lm-evaluation-harness