Skip to content

Allow markdown or custom formatting for code blocks #1493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/rag_using_chromadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def forward(self, query: str) -> str:
model=model,
max_steps=4,
verbosity_level=2,
stream_outputs=True,
)

agent_output = agent.run("How can I push a model to the Hub?")
Expand Down
36 changes: 28 additions & 8 deletions src/smolagents/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ class CodeAgent(MultiStepAgent):
<Deprecated version="1.17.0">
Parameter `grammar` is deprecated and will be removed in version 1.20.
</Deprecated>
code_block_tags (`tuple[str, str]` | `str`, *optional*): Opening and closing tags for code blocks (regex strings). Pass a custom tuple, or pass 'markdown' to use ("```(?:python|py)", "\\n```"), leave empty to use ("<code>", "</code>").
**kwargs: Additional keyword arguments.
"""

Expand All @@ -1518,6 +1519,7 @@ def __init__(
stream_outputs: bool = False,
use_structured_outputs_internally: bool = False,
grammar: dict[str, str] | None = None,
code_block_tags: str | tuple[str, str] | None = None,
**kwargs,
):
self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else []
Expand All @@ -1534,6 +1536,17 @@ def __init__(
)
if grammar and use_structured_outputs_internally:
raise ValueError("You cannot use 'grammar' and 'use_structured_outputs_internally' at the same time.")

if isinstance(code_block_tags, str) and not code_block_tags == "markdown":
raise ValueError("Only 'markdown' is supported for a string argument to `code_block_tags`.")
self.code_block_tags = (
code_block_tags
if isinstance(code_block_tags, tuple)
else ("```python", "```")
if code_block_tags == "markdown"
else ("<code>", "</code>")
)

super().__init__(
tools=tools,
model=model,
Expand Down Expand Up @@ -1596,6 +1609,8 @@ def initialize_system_prompt(self) -> str:
else str(self.authorized_imports)
),
"custom_instructions": self.instructions,
"code_block_opening_tag": self.code_block_tags[0],
"code_block_closing_tag": self.code_block_tags[1],
},
)
return system_prompt
Expand All @@ -1611,6 +1626,10 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
input_messages = memory_messages.copy()
### Generate model output ###
memory_step.model_input_messages = input_messages
stop_sequences = ["Observation:", "Calling tools:"]
if self.code_block_tags[1] not in self.code_block_tags[0]:
# If the closing tag is contained in the opening tag, adding it as a stop sequence would cut short any code generation
stop_sequences.append(self.code_block_tags[1])
try:
additional_args: dict[str, Any] = {}
if self.grammar:
Expand All @@ -1620,7 +1639,7 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
if self.stream_outputs:
output_stream = self.model.generate_stream(
input_messages,
stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
stop_sequences=stop_sequences,
**additional_args,
)
chat_message_stream_deltas: list[ChatMessageStreamDelta] = []
Expand All @@ -1637,7 +1656,7 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
else:
chat_message: ChatMessage = self.model.generate(
input_messages,
stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
stop_sequences=stop_sequences,
**additional_args,
)
memory_step.model_output_message = chat_message
Expand All @@ -1648,10 +1667,10 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
level=LogLevel.DEBUG,
)

# This adds <end_code> sequence to the history.
# This will nudge ulterior LLM calls to finish with <end_code>, thus efficiently stopping generation.
if output_text and output_text.strip().endswith("```"):
output_text += "<end_code>"
# This adds the end code sequence to the history.
# This will nudge ulterior LLM calls to finish with this end code sequence, thus efficiently stopping generation.
if output_text and not output_text.strip().endswith(self.code_block_tags[1]):
output_text += self.code_block_tags[1]
memory_step.model_output_message.content = output_text

memory_step.token_usage = chat_message.token_usage
Expand All @@ -1663,9 +1682,9 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
try:
if self._use_structured_outputs_internally:
code_action = json.loads(output_text)["code"]
code_action = extract_code_from_text(code_action) or code_action
code_action = extract_code_from_text(code_action, self.code_block_tags) or code_action
else:
code_action = parse_code_blobs(output_text)
code_action = parse_code_blobs(output_text, self.code_block_tags)
code_action = fix_final_answer_code(code_action)
memory_step.code_action = code_action
except Exception as e:
Expand Down Expand Up @@ -1754,6 +1773,7 @@ def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "CodeAgent":
"executor_type": agent_dict.get("executor_type"),
"executor_kwargs": agent_dict.get("executor_kwargs"),
"max_print_outputs_length": agent_dict.get("max_print_outputs_length"),
"code_block_tags": agent_dict.get("code_block_tags"),
}
# Filter out None values
code_agent_kwargs = {k: v for k, v in code_agent_kwargs.items() if v is not None}
Expand Down
62 changes: 31 additions & 31 deletions src/smolagents/prompts/code_agent.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
system_prompt: |-
You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', '<code>', and 'Observation:' sequences.
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of Thought, Code, and Observation sequences.

At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
Then in the '<code>' sequence, you should write the code in simple Python. The code sequence must end with '</code>' sequence.
Then in the Code sequence you should write the code in simple Python. The code sequence must be opened with '{{code_block_opening_tag}}', and closed with '{{code_block_closing_tag}}'.
During each intermediate step, you can use 'print()' to save whatever important information you will then need.
These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
In the end you have to return a final answer using the `final_answer` tool.
Expand All @@ -14,26 +14,26 @@ system_prompt: |-
Task: "Generate an image of the oldest person in this document."

Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
<code>
{{code_block_opening_tag}}
answer = document_qa(document=document, question="Who is the oldest person mentioned?")
print(answer)
</code>
{{code_block_closing_tag}}
Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."

Thought: I will now generate an image showcasing the oldest person.
<code>
{{code_block_opening_tag}}
image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
final_answer(image)
</code>
{{code_block_closing_tag}}

---
Task: "What is the result of the following operation: 5 + 3 + 1294.678?"

Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
<code>
{{code_block_opening_tag}}
result = 5 + 3 + 1294.678
final_answer(result)
</code>
{{code_block_closing_tag}}

---
Task:
Expand All @@ -42,31 +42,31 @@ system_prompt: |-
{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"

Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
<code>
{{code_block_opening_tag}}
translated_question = translator(question=question, src_lang="French", tgt_lang="English")
print(f"The translated question is {translated_question}.")
answer = image_qa(image=image, question=translated_question)
final_answer(f"The answer is {answer}")
</code>
{{code_block_closing_tag}}

---
Task:
In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
What does he say was the consequence of Einstein learning too much math on his creativity, in one word?

Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
<code>
{{code_block_opening_tag}}
pages = web_search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
print(pages)
</code>
{{code_block_closing_tag}}
Observation:
No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".

Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
<code>
{{code_block_opening_tag}}
pages = web_search(query="1979 interview Stanislaus Ulam")
print(pages)
</code>
{{code_block_closing_tag}}
Observation:
Found 6 pages:
[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
Expand All @@ -76,61 +76,61 @@ system_prompt: |-
(truncated)

Thought: I will read the first 2 pages to know more.
<code>
{{code_block_opening_tag}}
for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
whole_page = visit_webpage(url)
print(whole_page)
print("\n" + "="*80 + "\n") # Print separator between pages
</code>
{{code_block_closing_tag}}
Observation:
Manhattan Project Locations:
Los Alamos, NM
Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
(truncated)

Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
<code>
{{code_block_opening_tag}}
final_answer("diminished")
</code>
{{code_block_closing_tag}}

---
Task: "Which city has the highest population: Guangzhou or Shanghai?"

Thought: I need to get the populations for both cities and compare them: I will use the tool `web_search` to get the population of both cities.
<code>
{{code_block_opening_tag}}
for city in ["Guangzhou", "Shanghai"]:
print(f"Population {city}:", web_search(f"{city} population")
</code>
{{code_block_closing_tag}}
Observation:
Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
Population Shanghai: '26 million (2019)'

Thought: Now I know that Shanghai has the highest population.
<code>
{{code_block_opening_tag}}
final_answer("Shanghai")
</code>
{{code_block_closing_tag}}

---
Task: "What is the current age of the pope, raised to the power 0.36?"

Thought: I will use the tool `wikipedia_search` to get the age of the pope, and confirm that with a web search.
<code>
{{code_block_opening_tag}}
pope_age_wiki = wikipedia_search(query="current pope age")
print("Pope age as per wikipedia:", pope_age_wiki)
pope_age_search = web_search(query="current pope age")
print("Pope age as per google search:", pope_age_search)
</code>
{{code_block_closing_tag}}
Observation:
Pope age: "The pope Francis is currently 88 years old."

Thought: I know that the pope is 88 years old. Let's compute the result using python code.
<code>
{{code_block_opening_tag}}
pope_current_age = 88 ** 0.36
final_answer(pope_current_age)
</code>
{{code_block_closing_tag}}

Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:
```python
{{code_block_opening_tag}}
{%- for tool in tools.values() %}
def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
"""{{ tool.description }}
Expand All @@ -141,14 +141,14 @@ system_prompt: |-
{%- endfor %}
"""
{% endfor %}
```
{{code_block_closing_tag}}

{%- if managed_agents and managed_agents.values() | list %}
You can also give tasks to team members.
Calling a team member works similarly to calling a tool: provide the task description as the 'task' argument. Since this team member is a real human, be as detailed and verbose as necessary in your task description.
You can also include any relevant variables or context using the 'additional_args' argument.
Here is a list of the team members that you can call:
```python
{{code_block_opening_tag}}
{%- for agent in managed_agents.values() %}
def {{ agent.name }}(task: str, additional_args: dict[str, Any]) -> str:
"""{{ agent.description }}
Expand All @@ -158,11 +158,11 @@ system_prompt: |-
additional_args: Dictionary of extra inputs to pass to the managed agent, e.g. images, dataframes, or any other contextual data it may need.
"""
{% endfor %}
```
{{code_block_closing_tag}}
{%- endif %}

Here are the rules you should always follow to solve your task:
1. Always provide a 'Thought:' sequence, and a '<code>' sequence ending with '</code>', else you will fail.
1. Always provide a 'Thought:' sequence, and a '{{code_block_opening_tag}}' sequence ending with '{{code_block_closing_tag}}', else you will fail.
2. Use only variables that you have defined!
3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wikipedia_search({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wikipedia_search(query="What is the place where James Bond lives?")'.
4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to wikipedia_search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
Expand Down
28 changes: 15 additions & 13 deletions src/smolagents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]:
try:
first_accolade_index = json_blob.find("{")
last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1]
json_data = json_blob[first_accolade_index : last_accolade_index + 1]
json_data = json.loads(json_data, strict=False)
json_str = json_blob[first_accolade_index : last_accolade_index + 1]
json_data = json.loads(json_str, strict=False)
return json_data, json_blob[:first_accolade_index]
except IndexError:
raise ValueError("The model output does not contain any JSON blob.")
Expand All @@ -172,16 +172,16 @@ def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]:
)


def extract_code_from_text(text: str) -> str | None:
def extract_code_from_text(text: str, code_block_tags: tuple[str, str]) -> str | None:
"""Extract code from the LLM's output."""
pattern = r"<code>(.*?)</code>"
matches = re.findall(pattern, text, re.DOTALL)
initial_pattern = rf"{code_block_tags[0]}(.*?){code_block_tags[1]}"
matches = re.findall(initial_pattern, text, re.DOTALL)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why you renamed it to initial_pattern instead of original pattern?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a residue of implementation change, reverting this

if matches:
return "\n\n".join(match.strip() for match in matches)
return None


def parse_code_blobs(text: str) -> str:
def parse_code_blobs(text: str, code_block_tags: tuple[str, str]) -> str:
"""Extract code blocs from the LLM's output.

If a valid code block is passed, it returns it directly.
Expand All @@ -195,7 +195,9 @@ def parse_code_blobs(text: str) -> str:
Raises:
ValueError: If no valid code block is found in the text.
"""
matches = extract_code_from_text(text)
matches = extract_code_from_text(text, code_block_tags)
if not matches: # Fallback to markdown pattern
matches = extract_code_from_text(text, ("```(?:python|py)", "\n```"))
if matches:
return matches
# Maybe the LLM outputted a code blob directly
Expand All @@ -209,27 +211,27 @@ def parse_code_blobs(text: str) -> str:
raise ValueError(
dedent(
f"""
Your code snippet is invalid, because the regex pattern <code>(.*?)</code> was not found in it.
Your code snippet is invalid, because the regex pattern {code_block_tags[0]}(.*?){code_block_tags[1]} was not found in it.
Here is your code snippet:
{text}
It seems like you're trying to return the final answer, you can do it as follows:
<code>
{code_block_tags[0]}
final_answer("YOUR FINAL ANSWER HERE")
</code>
{code_block_tags[1]}
"""
).strip()
)
raise ValueError(
dedent(
f"""
Your code snippet is invalid, because the regex pattern <code>(.*?)</code> was not found in it.
Your code snippet is invalid, because the regex pattern {code_block_tags[0]}(.*?){code_block_tags[1]} was not found in it.
Here is your code snippet:
{text}
Make sure to include code with the correct pattern, for instance:
Thoughts: Your thoughts
<code>
{code_block_tags[0]}
# Your python code here
</code>
{code_block_tags[1]}
"""
).strip()
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -1674,11 +1674,11 @@ def test_end_code_appending(self):

outputs = [s.model_output for s in actions_steps if s.model_output]
assert outputs
assert all(o.endswith("<end_code>") for o in outputs)
assert all(o.endswith("</code>") for o in outputs)

messages = [s.model_output_message for s in actions_steps if s.model_output_message]
assert messages
assert all(m.content.endswith("<end_code>") for m in messages)
assert all(m.content.endswith("</code>") for m in messages)

def test_change_tools_after_init(self):
from smolagents import tool
Expand Down
Loading