All-Hands-AI · ryanhoangt · Sep 5, 2024 · Sep 5, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/agenthub/__init__.py b/agenthub/__init__.py
@@ -9,6 +9,7 @@
 
 from agenthub import (  # noqa: E402
     browsing_agent,
+    coact_agent,
     codeact_agent,
     codeact_swe_agent,
     delegator_agent,
@@ -23,6 +24,7 @@
     'delegator_agent',
     'dummy_agent',
     'browsing_agent',
+    'coact_agent',
 ]
 
 for agent in all_microagents.values():

diff --git a/agenthub/coact_agent/README.md b/agenthub/coact_agent/README.md
@@ -0,0 +1,36 @@
+# CoAct Multi-Agent Framework
+
+This folder implements a multi-agent workflow inspired by the CoAct framework ([paper](https://arxiv.org/abs/2406.13381)), that provides a robust structure for defining, planning, and executing tasks using multiple agents.
+
+## Agents
+
+1. `CoActPlannerAgent`:
+    - is responsible for exploring and creating a global plan. It can replan if there are issues with the previous one.
+    - has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent).
+2. `CoActExecutorAgent`:
+    - is responsible for executing the proposed plan. Facing issues with the plan, it can request for a new one.
+    - also has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent).
+
+
+## Plan structure
+```markdown
+The user message is: <<Full user's message here.>>
+# Phases
+## Phase 1
+- reason: <<Assistant's thorough thoughts on why this phase is necessary, with tips/codes to instruct the executor finish the task easier.>>
+- description: <<Describe what needs to be done in this phase.>>
+- expected_state: <<Describe the expected state after this phase is completed. If the task involves code editing, provide the expectation of the code after the edit.>>
+<file_path> <<The file path to edit. In one phase only 1 file is edited.>> </file_path>
+<expected_content>
+<<The partial expected content here WITH LINE NUMBERS and a vertical bar before the actual code e.g., 1|, 11|.>>
+</expected_content>
+## Phase 2
+- reason: ...
+- description: ...
+- expected_state: ...
+<file_path> ... </file_path>
+<expected_content>
+...|...
+</expected_content>
+## Phase ...
+```
diff --git a/agenthub/coact_agent/__init__.py b/agenthub/coact_agent/__init__.py
@@ -0,0 +1,10 @@
+from agenthub.coact_agent.executor.executor_agent import (
+    LocalExecutorAgent as CoActExecutorAgent,
+)
+from agenthub.coact_agent.planner.planner_agent import (
+    GlobalPlannerAgent as CoActPlannerAgent,
+)
+from openhands.controller.agent import Agent
+
+Agent.register('CoActPlannerAgent', CoActPlannerAgent)
+Agent.register('CoActExecutorAgent', CoActExecutorAgent)
diff --git a/agenthub/coact_agent/executor/action_parser.py b/agenthub/coact_agent/executor/action_parser.py
@@ -0,0 +1,71 @@
+import re
+
+from agenthub.codeact_agent.action_parser import (
+    CodeActActionParserAgentDelegate,
+    CodeActActionParserCmdRun,
+    CodeActActionParserFinish,
+    CodeActActionParserIPythonRunCell,
+    CodeActActionParserMessage,
+    CodeActResponseParser,
+)
+from openhands.controller.action_parser import ActionParser
+from openhands.events.action import (
+    Action,
+    AgentFinishAction,
+)
+
+
+class ExecutorResponseParser(CodeActResponseParser):
+    """Parser action:
+    - CmdRunAction(command) - bash command to run
+    - IPythonRunCellAction(code) - IPython code to run
+    - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
+    - MessageAction(content) - Message action to run (e.g. ask for clarification)
+    - AgentFinishAction() - end the interaction
+    """
+
+    def __init__(self):
+        # Need pay attention to the item order in self.action_parsers
+        super().__init__()
+        self.action_parsers = [
+            CodeActActionParserFinish(),
+            CodeActActionParserCmdRun(),
+            CodeActActionParserIPythonRunCell(),
+            CodeActActionParserAgentDelegate(),
+            CoActActionParserRequest(),
+        ]
+        self.default_parser = CodeActActionParserMessage()
+
+    def parse_response(self, response) -> str:
+        action = response.choices[0].message.content
+        if action is None:
+            return ''
+        for action_suffix in ['bash', 'ipython', 'browse', 'request']:
+            if (
+                f'<execute_{action_suffix}>' in action
+                and f'</execute_{action_suffix}>' not in action
+            ):
+                action += f'</execute_{action_suffix}>'
+        return action
+
+
+class CoActActionParserRequest(ActionParser):
+    def __init__(self):
+        self.request = None
+
+    def check_condition(self, action_str: str) -> bool:
+        self.request = re.search(
+            r'<execute_request>(.*)</execute_request>', action_str, re.DOTALL
+        )
+        return self.request is not None
+
+    def parse(self, action_str: str) -> Action:
+        assert (
+            self.request is not None
+        ), 'self.request should not be None when parse is called'
+
+        replan_request = self.request.group(1).strip()
+        return AgentFinishAction(
+            thought=replan_request,
+            outputs={'content': replan_request},
+        )
diff --git a/agenthub/coact_agent/executor/executor_agent.py b/agenthub/coact_agent/executor/executor_agent.py
@@ -0,0 +1,23 @@
+import os
+
+from agenthub.coact_agent.executor.action_parser import ExecutorResponseParser
+from agenthub.codeact_agent.codeact_agent import CodeActAgent
+from openhands.core.config import AgentConfig
+from openhands.llm.llm import LLM
+from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
+from openhands.utils.prompt import PromptManager
+
+
+class LocalExecutorAgent(CodeActAgent):
+    VERSION = '1.0'
+
+    def __init__(self, llm: LLM, config: AgentConfig) -> None:
+        super().__init__(llm, config)
+
+        self.action_parser = ExecutorResponseParser()
+        self.prompt_manager = PromptManager(
+            prompt_dir=os.path.join(os.path.dirname(__file__)),
+            agent_skills_docs=AgentSkillsRequirement.documentation,
+            micro_agent=self.micro_agent,
+        )
+        self.stop_sequences.append('</execute_request>')
diff --git a/agenthub/coact_agent/executor/system_prompt.j2 b/agenthub/coact_agent/executor/system_prompt.j2
@@ -0,0 +1,62 @@
+{% set MINIMAL_SYSTEM_PREFIX %}
+You are an autonomous intelligent programming agent playing the role of a subordinate employee responsible for local planning and execution of specific tasks in a multi-tier task execution structure, tasked with software development. You will be given coding-based tasks. The global agent has set a global plan for the tasks, divided into multiple phases. These phase plans will be given to you as a whole. Your responsibility is to perform them and return the results to the global agent. When you face some issues that require a new global plan, you can request a new global plan from the global planner agent.
+
+Here's the information you'll have:
+* The user message and the broken-down phase list: These are the tasks you're trying to complete now. Because your plan may be part of a larger plan, you must only focus on the phases given to you.
+* The current codebase: This is what you need to navigate through and make the changes to complete the tasks given by the global agent.
+
+The agent can use a Python environment with <execute_ipython>, e.g.:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+
+The agent can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
+
+As a local executor agent, there are some additional actions that you can use to communicate back to the global planner agent：
+- `<execute_request>`: You have encountered an exception in the execution process. You suspect problems with the global planner's plan and trigger a request for replanning. Explain why you decide to request a new global plan using this action.
+
+{% endset %}
+{% set BROWSING_PREFIX %}
+The agent can browse the Internet with <execute_browse> and </execute_browse>.
+For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
+Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
+{% endset %}
+{% set PIP_INSTALL_PREFIX %}
+The agent can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+{% endset %}
+{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %}
+{% set COMMAND_DOCS %}
+Apart from the standard Python library, the agent can also use the following functions (already imported) in <execute_ipython> environment:
+{{ agent_skills_docs }}
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The agent MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The agent shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the agent would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!
+{% endset %}
+{% set SYSTEM_SUFFIX %}
+Responses should be concise.
+The agent should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
+Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the agent is finished with the task or needs more input or action from the user in order to proceed.
+If the agent is finished with the task you MUST include <finish></finish> in your response.
+IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
+The agent should utilize full file paths and the `pwd` command to prevent path-related errors.
+The agent must avoid apologies and thanks in its responses.
+Remeber to execute ALL the phases of the global plan and only return the summary of the whole process to the global agent. Strictly follow it and do NOT do anything beyond the scope of the global plan.
+
+{% endset %}
+{# Combine all parts without newlines between them #}
+{{ SYSTEM_PREFIX -}}
+{{- COMMAND_DOCS -}}
+{{- SYSTEM_SUFFIX }}