Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(experimental) test_runtime_client.py to test _execute_bash() [DO NOT MERGE!] #3040

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,13 @@ cache

# configuration
config.toml
config.toml_
config.toml.bak

containers/agnostic_sandbox

# swe-bench-eval
image_build_logs
run_instance_logs

od_runtime_*.tar
118 changes: 84 additions & 34 deletions opendevin/runtime/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import asyncio
import os
import re
import uuid
from pathlib import Path

import pexpect
Expand Down Expand Up @@ -61,55 +62,102 @@ def __init__(self, plugins_to_load: list[Plugin], work_dir: str) -> None:

def _init_bash_shell(self, work_dir: str) -> None:
self.shell = pexpect.spawn('/bin/bash', encoding='utf-8', echo=False)
self.__bash_PS1 = r'[PEXPECT_BEGIN] \u@\h:\w [PEXPECT_END]'
Copy link
Collaborator Author

@tobitege tobitege Jul 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xingyaoww why set the prompt up so complex to begin with? 🤔
Doesn't this make detection harder than it should be?

Hmm... just to avoid PS1= ... :D

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to avoid PS1= ... :D
It is just the agent could possibly run env to check the env var, then found itself dead in the water because of the PS1 pattern 😓

why set the prompt up so complex to begin with?

Do you mean the user@host:/workdir thing, or the [PEXPECT_BEGIN]? For the former, i think it will be useful for the agent to know whoami and where am i - so i keep those user/host/workdir info. For [PEXPECT_BEGIN] - i spend sometime and i think that's probably the easiest way to make the \u@\h:\w regex work.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, user@host. But I see the advantage in extra information for the agent there, which seems the obvious way to do now that you mention it! 👍


# This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
self.__bash_expect_regex = (
r'\[PEXPECT_BEGIN\] ([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a reason why i use [a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) - because other wise, when agent runs env, it will be stopped by the PS1= env var printed by the env command, causing the parsing to be broken.

Copy link
Collaborator

@xingyaoww xingyaoww Jul 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! I think this got caught in the test cases - i added that one to make sure it is not broken :D

image

self.prompt_uuid = str(uuid.uuid4())
self.__bash_PS1 = (
f'[PEXPECT_BEGIN_{self.prompt_uuid}] $ [PEXPECT_END_{self.prompt_uuid}]'
)
print(self.__bash_PS1)
self.__bash_expect_regex = f'\\[PEXPECT_BEGIN_{self.prompt_uuid}\\] \\$ \\[PEXPECT_END_{self.prompt_uuid}\\]'

self.shell.sendline(f'export PS1="{self.__bash_PS1}"')
self.shell.expect(self.__bash_expect_regex)
self._expect_prompt()

# Disable bracketed paste mode
self.shell.sendline("bind 'set enable-bracketed-paste off'")
self._expect_prompt()

self.shell.sendline(f'cd {work_dir}')
self.shell.expect(self.__bash_expect_regex)
self._expect_prompt()

def _get_bash_prompt(self):
ps1 = self.shell.after
# parse the ps1 to get username, hostname, and working directory
matched = re.match(self.__bash_expect_regex, ps1)
assert (
matched is not None
), f'Failed to parse bash prompt: {ps1}. This should not happen.'
username, hostname, working_dir = matched.groups()

# re-assemble the prompt
prompt = f'{username}@{hostname}:{working_dir} '
if username == 'root':
prompt += '#'
else:
prompt += '$'
return prompt + ' '

def _execute_bash(self, command, keep_prompt: bool = True) -> tuple[str, int]:
logger.debug(f'Executing command: {command}')
prompt = self.shell.after
# Extract the actual prompt content between the PEXPECT markers
match = re.search(r'\[PEXPECT_BEGIN_.*?\] \$ \[PEXPECT_END_.*?\]', prompt)
if match:
return match.group(0)
return prompt

def _execute_bash(
self, command, keep_prompt: bool = True, timeout=30
) -> tuple[str, int]:
logger.info(f'Executing command: {command}')
self.shell.sendline(command)
self.shell.expect(self.__bash_expect_regex)

output = self.shell.before
output = ''
while True:
try:
index = self.shell.expect(
[self.__bash_expect_regex, r'\(y/n\)|\[y/N\]'], timeout=timeout
)
output += self.shell.before
if index == 0: # Matched our prompt
break
elif index == 1: # Matched an interactive prompt
self.shell.sendline('n') # Default to 'no' for safety
output += "(Automatically responded 'n' to prompt)\n"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking if we can just return an CmdRunOutputObservation directly to the agent with this prefix (with the (y/n) added there of course): something like

CmdRunOutputObservation(content="XXXX (y/n)", exit_code=None)
# We can't get exit code here bc the shell is abnormal

And then we can let agent to decide by either running

<execute_bash> y </execute_bash>
OR
<execute_bash> n </execute_bash>

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That'd be awesome, if that would work!

except pexpect.TIMEOUT:
raise RuntimeError(f'Timeout executing command: {command}')

if keep_prompt:
output += '\r\n' + self._get_bash_prompt()
logger.debug(f'Command output: {output}')
logger.info(f'Command output: {output}')

# Get exit code
self.shell.sendline('echo $?')
logger.debug(f'Executing command for exit code: {command}')
self.shell.expect(self.__bash_expect_regex)
_exit_code_output = self.shell.before
logger.debug(f'Exit code Output: {_exit_code_output}')
exit_code = int(_exit_code_output.strip())
logger.info(f'Sent "echo $?" to get exit code for command: {command}')
self._expect_prompt(
timeout=5
) # Use a shorter timeout since we're expecting immediate output

# Capture the raw output for debugging
raw_exit_code_output = self.shell.before.strip()
logger.info(f'Raw exit code output before cleaning:\n{raw_exit_code_output}')

# Extract the last non-empty line as the exit code
exit_code_lines = raw_exit_code_output.split('\n')
exit_code_str = next(
(line.strip() for line in reversed(exit_code_lines) if line.strip()), None
)
if exit_code_str is None:
logger.error(
f'Failed to find a valid exit code in output: {raw_exit_code_output}'
)
exit_code = 1 # Default to non-zero exit code for safety
else:
try:
exit_code = int(exit_code_str)
except ValueError:
logger.error(f'Failed to convert exit code to integer: {exit_code_str}')
exit_code = 1

# Log the final command output and exit code
logger.info(f'Final command output: {output}')
logger.info(f'Final parsed exit code: {exit_code}')

return output, exit_code

def capture_full_session_output(self):
self.shell.sendline('echo "Capturing full session output"')
self._expect_prompt()
full_output = self.shell.before.strip()
logger.info(f'Full session output:\n{full_output}')

def _expect_prompt(self, timeout=10):
try:
self.shell.expect(self.__bash_expect_regex, timeout=timeout)
except pexpect.TIMEOUT:
raise RuntimeError(f'Timeout waiting for prompt after {timeout} seconds')

async def run_action(self, action) -> Observation:
action_type = action.action
observation = await getattr(self, action_type)(action)
Expand Down Expand Up @@ -214,8 +262,10 @@ async def browse_interactive(self, action: BrowseInteractiveAction) -> Observati
return await browse(action, self.browser)

def close(self):
self.shell.sendline('exit')
self.shell.close()
self.browser.close()
if hasattr(self, 'browser') and self.browser:
self.browser.close()


# def test_run_commond():
Expand Down
29 changes: 23 additions & 6 deletions opendevin/runtime/client/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,15 @@ async def ainit(self, env_vars: dict[str, str] | None = None):
# NOTE: You can need set DEBUG=true to update the source code
# inside the container. This is useful when you want to test/debug the
# latest code in the runtime docker container.
update_source_code=config.debug,
update_source_code=False, # config.debug,
)
self.container = await self._init_container(
self.sandbox_workspace_dir,
mount_dir=config.workspace_mount_path,
plugins=self.plugins,
)
# Initialize the env vars
# MUST call super().ainit() to initialize both default env vars
# AND the ones in env vars!
await super().ainit(env_vars)

@staticmethod
Expand Down Expand Up @@ -129,21 +130,37 @@ async def _init_container(
else:
port_mapping = {f'{self._port}/tcp': self._port}

container = self.docker_client.containers.run(
self.container_image,
command=(
env_vars = {'PYTHONUNBUFFERED': '1'}
if config.debug:
env_vars['DEBUG'] = 'true'

# tobitege: for images without mamba, this should work:
# cmd = (
# f'poetry run python -u -m opendevin.runtime.client.client {self._port} '
# f'--working-dir {sandbox_workspace_dir} '
# f'--plugins {plugin_names}'
# )

# ORIGINAL:
cmd = (
(
f'/opendevin/miniforge3/bin/mamba run --no-capture-output -n base '
'PYTHONUNBUFFERED=1 poetry run '
f'python -u -m opendevin.runtime.client.client {self._port} '
f'--working-dir {sandbox_workspace_dir} '
f'--plugins {plugin_names}'
),
)

container = self.docker_client.containers.run(
self.container_image,
command=cmd,
network_mode=network_mode,
ports=port_mapping,
working_dir='/opendevin/code/',
name=self.container_name,
detach=True,
environment={'DEBUG': 'true'} if config.debug else None,
environment=env_vars,
volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}},
)
logger.info(f'Container started. Server url: {self.api_url}')
Expand Down
5 changes: 3 additions & 2 deletions opendevin/runtime/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ async def ainit(self, env_vars: dict[str, str] | None = None) -> None:

This method should be called after the runtime's constructor.
"""
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
await self.add_env_vars(self.DEFAULT_ENV_VARS)
if self.DEFAULT_ENV_VARS:
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
await self.add_env_vars(self.DEFAULT_ENV_VARS)
if env_vars is not None:
logger.debug(f'Adding provided env vars: {env_vars}')
await self.add_env_vars(env_vars)
Expand Down
Loading