Skip to content

refactor browsing agent code #2442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 14, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 62 additions & 61 deletions agenthub/browsing_agent/browsing_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,56 @@
EVAL_MODE = False


def get_error_prefix(last_browser_action: str) -> str:
return f'IMPORTANT! Last action is incorrect:\n{last_browser_action}\nThink again with the current observation of the page.\n'


def get_system_message(goal: str, action_space: str) -> str:
return f"""\
# Instructions
Review the current state of the page and all other information to find the best
possible next action to accomplish your goal. Your answer will be interpreted
and executed by a program, make sure to follow the formatting instructions.

# Goal:
{goal}

# Action Space
{action_space}
"""


CONCISE_INSTRUCTION = """\

Here is another example with chain of thought of a valid action when providing a concise answer to user:
"
In order to accomplish my goal I need to send the information asked back to the user. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I will send a message back to user with the answer.
```send_msg_to_user("$279.49")```
"
"""


def get_prompt(error_prefix: str, cur_axtree_txt: str, prev_action_str: str) -> str:
prompt = f"""\
{error_prefix}

# Current Accessibility Tree:
{cur_axtree_txt}

# Previous Actions
{prev_action_str}

Here is an example with chain of thought of a valid action when clicking on a button:
"
In order to accomplish my goal I need to click on the button with bid 12
```click("12")```
"
""".strip()
if USE_CONCISE_ANSWER:
prompt += CONCISE_INSTRUCTION
return prompt


class BrowsingAgent(Agent):
VERSION = '1.0'
"""
Expand Down Expand Up @@ -89,9 +139,6 @@ def step(self, state: State) -> Action:
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
goal = state.get_current_user_intent()
if goal is None:
goal = state.inputs['task']
messages = []
prev_actions = []
cur_axtree_txt = ''
Expand Down Expand Up @@ -132,7 +179,10 @@ def step(self, state: State) -> Action:
if isinstance(last_obs, BrowserOutputObservation):
if last_obs.error:
# add error recovery prompt prefix
error_prefix = f'IMPORTANT! Last action is incorrect:\n{last_obs.last_browser_action}\nThink again with the current observation of the page.\n'
error_prefix = get_error_prefix(last_obs.last_browser_action)
self.error_accumulator += 1
if self.error_accumulator > 5:
return MessageAction('Too many errors encountered. Task failed.')
try:
cur_axtree_txt = flatten_axtree_to_str(
last_obs.axtree_object,
Expand All @@ -146,73 +196,24 @@ def step(self, state: State) -> Action:
)
return MessageAction('Error encountered when browsing.')

if error_prefix:
self.error_accumulator += 1
if self.error_accumulator > 5:
return MessageAction('Too many errors encountered. Task failed.')
system_msg = f"""\
# Instructions
Review the current state of the page and all other information to find the best
possible next action to accomplish your goal. Your answer will be interpreted
and executed by a program, make sure to follow the formatting instructions.

# Goal:
{goal}

# Action Space
{self.action_space.describe(with_long_description=False, with_examples=True)}
"""
if (goal := state.get_current_user_intent()) is None:
goal = state.inputs['task']
system_msg = get_system_message(
goal,
self.action_space.describe(with_long_description=False, with_examples=True),
)

messages.append({'role': 'system', 'content': system_msg})

prompt = f"""\
{error_prefix}

# Current Accessibility Tree:
{cur_axtree_txt}

# Previous Actions
{prev_action_str}

Here is an example with chain of thought of a valid action when clicking on a button:
"
In order to accomplish my goal I need to click on the button with bid 12
```click("12")```
"
""".strip()

if USE_CONCISE_ANSWER:
concise_instruction = """\

Here is another example with chain of thought of a valid action when providing a concise answer to user:
"
In order to accomplish my goal I need to send the information asked back to the user. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I will send a message back to user with the answer.
```send_msg_to_user("$279.49")```
"
"""
prompt += concise_instruction
prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str)
messages.append({'role': 'user', 'content': prompt})
logger.info(prompt)
response = self.llm.completion(
response = self.llm.do_completion(
messages=messages,
temperature=0.0,
stop=[')```', ')\n```'],
)
self.log_cost(response)
return self.response_parser.parse(response)

def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')

def log_cost(self, response):
# TODO: refactor to unified cost tracking
try:
cur_cost = self.llm.completion_cost(response)
except Exception:
cur_cost = 0
self.cost_accumulator += cur_cost
logger.info(
'Cost: %.2f USD | Accumulated Cost: %.2f USD',
cur_cost,
self.cost_accumulator,
)