Skip to content

feat(agent): remind the agent that it can use timeout to increase the amount of time the command is running #8932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions openhands/runtime/utils/bash.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
CmdOutputMetadata,
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
from openhands.utils.shutdown_listener import should_continue


Expand Down Expand Up @@ -379,9 +380,7 @@ def _handle_nochange_timeout_command(
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
command,
Expand Down Expand Up @@ -414,9 +413,7 @@ def _handle_hard_timeout_command(
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command timed out after {timeout} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
command,
Expand Down
7 changes: 7 additions & 0 deletions openhands/runtime/utils/bash_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Common timeout message that can be used across different timeout scenarios
TIMEOUT_MESSAGE_TEMPLATE = (
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'send keys to interrupt/kill the command, '
'or use the timeout parameter in execute_bash for future commands.'
)
9 changes: 3 additions & 6 deletions openhands/runtime/utils/windows_bash.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
CmdOutputMetadata,
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
from openhands.utils.shutdown_listener import should_continue

pythonnet.load('coreclr')
Expand Down Expand Up @@ -559,9 +560,7 @@ def _check_active_job(
else:
metadata.suffix = (
f'\n[The command timed out after {timeout_seconds} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)

return CmdOutputObservation(
Expand Down Expand Up @@ -1331,9 +1330,7 @@ def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservati
# Align suffix with bash.py timeout message
suffix = (
f'\n[The command timed out after {timeout_seconds} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
elif shutdown_requested:
# Align suffix with bash.py equivalent (though bash.py might not have specific shutdown message)
Expand Down
15 changes: 11 additions & 4 deletions tests/runtime/test_bash.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
from openhands.runtime.impl.local.local_runtime import LocalRuntime
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE


def get_timeout_suffix(timeout_seconds):
"""Helper function to generate the expected timeout suffix."""
return (
f'[The command timed out after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)


# ============================================================================================================================
# Bash-specific tests
Expand Down Expand Up @@ -56,10 +66,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
if runtime_cls == CLIRuntime:
assert '[The command timed out after 1.0 seconds.]' in obs.metadata.suffix
else:
assert (
"[The command timed out after 1.0 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
in obs.metadata.suffix
)
assert get_timeout_suffix(1.0) in obs.metadata.suffix

action = CmdRunAction(command='C-c', is_input=True)
action.set_hard_timeout(30)
Expand Down
2 changes: 1 addition & 1 deletion tests/runtime/trajs/basic_gui_mode.json
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@
"working_dir": null,
"py_interpreter_path": null,
"prefix": "",
"suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
"suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, send keys to interrupt/kill the command, or use the timeout parameter in execute_bash for future commands.]"
},
"hidden": false
},
Expand Down
58 changes: 16 additions & 42 deletions tests/unit/test_bash_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.runtime.utils.bash import BashCommandStatus, BashSession
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE


def get_no_change_timeout_suffix(timeout_seconds):
"""Helper function to generate the expected no-change timeout suffix."""
return (
f'\n[The command has no new output after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)


def test_session_initialization():
Expand Down Expand Up @@ -83,25 +92,15 @@ def test_long_running_command_follow_by_execute():
assert '1' in obs.content # First number should appear before timeout
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
assert obs.metadata.prefix == ''

# Continue watching output
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '2' in obs.content
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

Expand Down Expand Up @@ -142,12 +141,7 @@ def test_interactive_command():
assert 'Enter name:' in obs.content
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == ''

# Send input
Expand All @@ -164,36 +158,21 @@ def test_interactive_command():
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == ''

obs = session.execute(CmdRunAction('line 1', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'

obs = session.execute(CmdRunAction('line 2', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'

obs = session.execute(CmdRunAction('EOF', is_input=True))
Expand All @@ -216,12 +195,7 @@ def test_ctrl_c():
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'looping' in obs.content
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
assert obs.metadata.prefix == ''
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
Expand Down
15 changes: 11 additions & 4 deletions tests/unit/test_windows_bash.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
from openhands.events.observation.commands import (
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE


def get_timeout_suffix(timeout_seconds):
"""Helper function to generate the expected timeout suffix."""
return (
f'[The command timed out after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)


# Skip all tests in this module if not running on Windows
pytestmark = pytest.mark.skipif(
Expand Down Expand Up @@ -168,10 +178,7 @@ def test_long_running_command(windows_bash_session):
# Verify the initial output was captured
assert 'Serving HTTP on' in result.content
# Check for timeout specific metadata
assert (
"[The command timed out after 1.0 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
in result.metadata.suffix
)
assert get_timeout_suffix(1.0) in result.metadata.suffix
assert result.exit_code == -1

# The action timed out, but the command should be still running
Expand Down
Loading