Skip to content

Commit 6b7d114

Browse files
committed
CodeActAgent: Delegate to BrowsingAgent for browsing tasks
1 parent 9b371b1 commit 6b7d114

File tree

25 files changed

+1081
-37
lines changed

25 files changed

+1081
-37
lines changed

agenthub/browsing_agent/browsing_agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ def step(self, state: State) -> Action:
8888
- AgentFinishAction() - end the interaction
8989
"""
9090
goal = state.get_current_user_intent()
91+
if goal is None:
92+
goal = state.inputs['task']
9193
messages = []
9294
prev_actions = ''
9395
cur_axtree_txt = ''
@@ -101,7 +103,7 @@ def step(self, state: State) -> Action:
101103
isinstance(prev_action, MessageAction) and prev_action.source != 'user'
102104
):
103105
# agent has responded, task finish.
104-
return AgentFinishAction()
106+
return AgentFinishAction(outputs={'content': prev_action.content})
105107

106108
if isinstance(last_obs, BrowserOutputObservation):
107109
if last_obs.error:

agenthub/codeact_agent/codeact_agent.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@
1111
from opendevin.controller.state.state import State
1212
from opendevin.events.action import (
1313
Action,
14+
AgentDelegateAction,
1415
AgentFinishAction,
1516
BrowseInteractiveAction,
1617
CmdRunAction,
1718
IPythonRunCellAction,
1819
MessageAction,
1920
)
2021
from opendevin.events.observation import (
22+
AgentDelegateObservation,
2123
BrowserOutputObservation,
2224
CmdOutputObservation,
2325
IPythonRunCellObservation,
@@ -88,6 +90,9 @@ def get_observation_message(obs) -> dict[str, str] | None:
8890
elif isinstance(obs, BrowserOutputObservation):
8991
content = 'OBSERVATION:\n' + truncate_observation(obs.content)
9092
return {'role': 'user', 'content': content}
93+
elif isinstance(obs, AgentDelegateObservation):
94+
content = 'OBSERVATION:\n' + truncate_observation(str(obs.outputs))
95+
return {'role': 'user', 'content': content}
9196
return None
9297

9398

@@ -106,7 +111,7 @@ def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
106111

107112

108113
class CodeActAgent(Agent):
109-
VERSION = '1.5'
114+
VERSION = '1.6'
110115
"""
111116
The Code Act Agent is a minimalist agent.
112117
The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
@@ -188,7 +193,7 @@ def step(self, state: State) -> Action:
188193
Returns:
189194
- CmdRunAction(command) - bash command to run
190195
- IPythonRunCellAction(code) - IPython code to run
191-
- BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
196+
- AgentDelegateAction(agent, inputs) - delegate action for (sub)task
192197
- MessageAction(content) - Message action to run (e.g. ask for clarification)
193198
- AgentFinishAction() - end the interaction
194199
"""
@@ -260,12 +265,10 @@ def step(self, state: State) -> Action:
260265
elif browse_command := re.search(
261266
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
262267
):
263-
# BrowserGym actions was found
264-
browse_actions = browse_command.group(1).strip()
265268
thought = action_str.replace(browse_command.group(0), '').strip()
266-
return BrowseInteractiveAction(
267-
browser_actions=browse_actions, thought=thought
268-
)
269+
browse_actions = browse_command.group(1).strip()
270+
task = f'{thought}. I should start with: {browse_actions}'
271+
return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
269272
else:
270273
# We assume the LLM is GOOD enough that when it returns pure natural language
271274
# it want to talk to the user

agenthub/codeact_agent/prompt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
1717
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
1818
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
19-
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
19+
For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
2020
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
2121
The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
2222

@@ -155,7 +155,7 @@ def index():
155155
ASSISTANT:
156156
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
157157
<execute_browse>
158-
goto("http://127.0.0.1:5000")
158+
Get the content on "http://127.0.0.1:5000"
159159
</execute_browse>
160160
161161
USER:

0 commit comments

Comments
 (0)