From 001195a3ea1d78696c37e4754fbd8db939c4944c Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 2 Aug 2024 10:12:34 +0800 Subject: [PATCH] reduce the duplication in run_controller (#3217) --- evaluation/EDA/run_infer.py | 3 +-- evaluation/agent_bench/run_infer.py | 3 +-- evaluation/biocoder/run_infer.py | 3 +-- evaluation/bird/run_infer.py | 4 ++-- evaluation/browsing_delegation/run_infer.py | 3 +-- evaluation/gaia/run_infer.py | 3 +-- evaluation/gorilla/run_infer.py | 3 +-- evaluation/gpqa/run_infer.py | 3 +-- evaluation/humanevalfix/run_infer.py | 3 +-- evaluation/logic_reasoning/run_infer.py | 3 +-- evaluation/miniwob/run_infer.py | 3 +-- evaluation/mint/run_infer.py | 3 +-- evaluation/ml_bench/run_infer.py | 3 +-- evaluation/swe_bench/run_infer.py | 3 +-- evaluation/toolqa/run_infer.py | 3 +-- evaluation/webarena/run_infer.py | 3 +-- opendevin/core/main.py | 20 ++++++-------------- 17 files changed, 23 insertions(+), 46 deletions(-) diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py index 40e30da3de..98460c14e0 100644 --- a/evaluation/EDA/run_infer.py +++ b/evaluation/EDA/run_infer.py @@ -118,13 +118,12 @@ def process_instance( instruction += AGENT_CLS_TO_INST_SUFFIX[agent.__class__.__name__] # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[ agent.__class__.__name__ ], diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py index 51f2d9095a..756744e63f 100644 --- a/evaluation/agent_bench/run_infer.py +++ b/evaluation/agent_bench/run_infer.py @@ -120,12 +120,11 @@ def process_instance( logger.info(f'Init script result: {init_res}') # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=FAKE_RESPONSES[agent.__class__.__name__], agent=agent, sandbox=sandbox, diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py index 7de4509d5a..b743325744 100644 --- a/evaluation/biocoder/run_infer.py +++ b/evaluation/biocoder/run_infer.py @@ -166,12 +166,11 @@ def process_instance( sid = instance.test_case_id.replace('/', '__') # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[ agent.__class__.__name__ ], diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py index 6b5a882438..59c45f53b2 100644 --- a/evaluation/bird/run_infer.py +++ b/evaluation/bird/run_infer.py @@ -209,13 +209,13 @@ def execute_sql(db_path, sql): ) # NOTE: You can actually set slightly different instruction for different agents instruction += AGENT_CLS_TO_INST_SUFFIX[agent.__class__.__name__] + # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[ agent.__class__.__name__ ], diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py index f66338e211..6b128eeeea 100644 --- a/evaluation/browsing_delegation/run_infer.py +++ b/evaluation/browsing_delegation/run_infer.py @@ -67,12 +67,11 @@ def process_instance( f'NOTE: You should copy the "query" as is into the tag. DO NOT change ANYTHING in the query.' ) + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, agent=agent, sid=env_id, ) diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py index 2241667286..162730c83f 100644 --- a/evaluation/gaia/run_infer.py +++ b/evaluation/gaia/run_infer.py @@ -116,12 +116,11 @@ def process_instance( logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'}) # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[ agent.__class__.__name__ ], diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py index f77f75ef07..ad818d307f 100644 --- a/evaluation/gorilla/run_infer.py +++ b/evaluation/gorilla/run_infer.py @@ -110,12 +110,11 @@ def process_instance(agent, question_id, question, metadata, reset_logger: bool # logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'}) # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get( agent.__class__.__name__ ), diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py index 97490c84b2..33baeefe36 100644 --- a/evaluation/gpqa/run_infer.py +++ b/evaluation/gpqa/run_infer.py @@ -226,12 +226,11 @@ def process_instance( """ # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get( agent.__class__.__name__ ), diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py index 593758dac4..ceaa5f8399 100644 --- a/evaluation/humanevalfix/run_infer.py +++ b/evaluation/humanevalfix/run_infer.py @@ -179,12 +179,11 @@ def process_instance( instruction += AGENT_CLS_TO_INST_SUFFIX[agent.__class__.__name__] # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get( agent.__class__.__name__ ), diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py index c6a6231777..6c0c19c038 100644 --- a/evaluation/logic_reasoning/run_infer.py +++ b/evaluation/logic_reasoning/run_infer.py @@ -185,12 +185,11 @@ def process_instance( exit_code, command_output = sandbox.execute('pip install scitools-pyke') # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get( agent.__class__.__name__ ), diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index 3241a85e03..d0a732743d 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -85,12 +85,11 @@ def process_instance( } } + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str='PLACEHOLDER_GOAL', - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, runtime_tools_config=runtime_tools_config, agent=agent, sandbox=get_sandbox(), diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py index 27d4255733..775f894f81 100644 --- a/evaluation/mint/run_infer.py +++ b/evaluation/mint/run_infer.py @@ -148,12 +148,11 @@ def process_instance( }, ) + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=fake_user_response_fn, agent=agent, sandbox=sandbox, diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py index 70e3ba6e90..3bd383d352 100644 --- a/evaluation/ml_bench/run_infer.py +++ b/evaluation/ml_bench/run_infer.py @@ -155,12 +155,11 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = instruction += AGENT_CLS_TO_INST_SUFFIX[agent.__class__.__name__] # Run the agent + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get( agent.__class__.__name__ ), diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 7ed17c2317..52f643859a 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -280,12 +280,11 @@ def process_instance( instruction += AGENT_CLS_TO_INST_SUFFIX[agent.__class__.__name__] # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[ agent.__class__.__name__ ], diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py index a92b1ea8b1..0fa3e64c1f 100644 --- a/evaluation/toolqa/run_infer.py +++ b/evaluation/toolqa/run_infer.py @@ -76,12 +76,11 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = # logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'}) # Here's how you can run the agent (similar to the `main` function) and get the final task state + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str=instruction, - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[ agent.__class__.__name__ ], diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py index 6dd5f7af60..c09da459a7 100644 --- a/evaluation/webarena/run_infer.py +++ b/evaluation/webarena/run_infer.py @@ -86,12 +86,11 @@ def process_instance( } } + config.max_iterations = metadata.max_iterations state: State | None = asyncio.run( run_controller( config=config, task_str='PLACEHOLDER_GOAL', - max_iterations=metadata.max_iterations, - max_budget_per_task=config.max_budget_per_task, runtime_tools_config=runtime_tools_config, agent=agent, sandbox=get_sandbox(), diff --git a/opendevin/core/main.py b/opendevin/core/main.py index a8d9498691..91e1ab5915 100644 --- a/opendevin/core/main.py +++ b/opendevin/core/main.py @@ -40,8 +40,6 @@ def read_task_from_stdin() -> str: async def run_controller( config: AppConfig, task_str: str, - max_iterations: int | None = None, - max_budget_per_task: float | None = None, exit_on_message: bool = False, fake_user_response_fn: Callable[[State | None], str] | None = None, sandbox: Sandbox | None = None, @@ -56,8 +54,6 @@ async def run_controller( Args: config: The app config. task_str: The task to run. - max_iterations: The maximum number of iterations to run. - max_budget_per_task: The maximum budget per task. exit_on_message: quit if agent asks for a message from user (optional) fake_user_response_fn: An optional function that receives the current state (could be None) and returns a fake user response. sandbox: (will be deprecated) An optional sandbox to run the agent in. @@ -72,7 +68,6 @@ async def run_controller( agent = agent_cls( llm=LLM(config=config.get_llm_config_from_agent(config.default_agent)) ) - max_iterations = max_iterations or config.max_iterations # Logging logger.info( @@ -96,8 +91,8 @@ async def run_controller( # init controller with this initial state controller = AgentController( agent=agent, - max_iterations=max_iterations, - max_budget_per_task=max_budget_per_task, + max_iterations=config.max_iterations, + max_budget_per_task=config.max_budget_per_task, agent_to_llm_config=config.get_agent_to_llm_config_map(), event_stream=event_stream, initial_state=initial_state, @@ -212,17 +207,14 @@ async def on_event(event: Event): config.default_agent = args.agent_cls # if max budget per task is not sent on the command line, use the config value - max_budget_per_task = ( - args.max_budget_per_task - if args.max_budget_per_task - else config.max_budget_per_task - ) + if args.max_budget_per_task is not None: + config.max_budget_per_task = args.max_budget_per_task + if args.max_iterations is not None: + config.max_iterations = args.max_iterations asyncio.run( run_controller( config=config, task_str=task_str, - max_iterations=args.max_iterations, - max_budget_per_task=args.max_budget_per_task, ) )