huggingface · aymeric-roucher · Jun 30, 2025 · Jun 27, 2025 · Jun 27, 2025 · Jun 27, 2025
diff --git a/examples/rag_using_chromadb.py b/examples/rag_using_chromadb.py
@@ -121,6 +121,7 @@ def forward(self, query: str) -> str:
     model=model,
     max_steps=4,
     verbosity_level=2,
+    stream_outputs=True,
 )
 
 agent_output = agent.run("How can I push a model to the Hub?")

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
@@ -1502,6 +1502,7 @@ class CodeAgent(MultiStepAgent):
             <Deprecated version="1.17.0">
             Parameter `grammar` is deprecated and will be removed in version 1.20.
             </Deprecated>
+        code_block_tags (`tuple[str, str]` | `str`, *optional*): Opening and closing tags for code blocks (regex strings). Pass a custom tuple, or pass 'markdown' to use ("```(?:python|py)", "\\n```"), leave empty to use ("<code>", "</code>").
         **kwargs: Additional keyword arguments.
     """
 
@@ -1518,6 +1519,7 @@ def __init__(
         stream_outputs: bool = False,
         use_structured_outputs_internally: bool = False,
         grammar: dict[str, str] | None = None,
+        code_block_tags: str | tuple[str, str] | None = None,
         **kwargs,
     ):
         self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else []
@@ -1534,6 +1536,17 @@ def __init__(
             )
         if grammar and use_structured_outputs_internally:
             raise ValueError("You cannot use 'grammar' and 'use_structured_outputs_internally' at the same time.")
+
+        if isinstance(code_block_tags, str) and not code_block_tags == "markdown":
+            raise ValueError("Only 'markdown' is supported for a string argument to `code_block_tags`.")
+        self.code_block_tags = (
+            code_block_tags
+            if isinstance(code_block_tags, tuple)
+            else ("```python", "```")
+            if code_block_tags == "markdown"
+            else ("<code>", "</code>")
+        )
+
         super().__init__(
             tools=tools,
             model=model,
@@ -1596,6 +1609,8 @@ def initialize_system_prompt(self) -> str:
                     else str(self.authorized_imports)
                 ),
                 "custom_instructions": self.instructions,
+                "code_block_opening_tag": self.code_block_tags[0],
+                "code_block_closing_tag": self.code_block_tags[1],
             },
         )
         return system_prompt
@@ -1611,6 +1626,10 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
         input_messages = memory_messages.copy()
         ### Generate model output ###
         memory_step.model_input_messages = input_messages
+        stop_sequences = ["Observation:", "Calling tools:"]
+        if self.code_block_tags[1] not in self.code_block_tags[0]:
+            # If the closing tag is contained in the opening tag, adding it as a stop sequence would cut short any code generation
+            stop_sequences.append(self.code_block_tags[1])
         try:
             additional_args: dict[str, Any] = {}
             if self.grammar:
@@ -1620,7 +1639,7 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
             if self.stream_outputs:
                 output_stream = self.model.generate_stream(
                     input_messages,
-                    stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
+                    stop_sequences=stop_sequences,
                     **additional_args,
                 )
                 chat_message_stream_deltas: list[ChatMessageStreamDelta] = []
@@ -1637,7 +1656,7 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
             else:
                 chat_message: ChatMessage = self.model.generate(
                     input_messages,
-                    stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
+                    stop_sequences=stop_sequences,
                     **additional_args,
                 )
                 memory_step.model_output_message = chat_message
@@ -1648,10 +1667,10 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
                     level=LogLevel.DEBUG,
                 )
 
-            # This adds <end_code> sequence to the history.
-            # This will nudge ulterior LLM calls to finish with <end_code>, thus efficiently stopping generation.
-            if output_text and output_text.strip().endswith("```"):
-                output_text += "<end_code>"
+            # This adds the end code sequence to the history.
+            # This will nudge ulterior LLM calls to finish with this end code sequence, thus efficiently stopping generation.
+            if output_text and not output_text.strip().endswith(self.code_block_tags[1]):
+                output_text += self.code_block_tags[1]
                 memory_step.model_output_message.content = output_text
 
             memory_step.token_usage = chat_message.token_usage
@@ -1663,9 +1682,9 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
         try:
             if self._use_structured_outputs_internally:
                 code_action = json.loads(output_text)["code"]
-                code_action = extract_code_from_text(code_action) or code_action
+                code_action = extract_code_from_text(code_action, self.code_block_tags) or code_action
             else:
-                code_action = parse_code_blobs(output_text)
+                code_action = parse_code_blobs(output_text, self.code_block_tags)
             code_action = fix_final_answer_code(code_action)
             memory_step.code_action = code_action
         except Exception as e:
@@ -1754,6 +1773,7 @@ def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "CodeAgent":
             "executor_type": agent_dict.get("executor_type"),
             "executor_kwargs": agent_dict.get("executor_kwargs"),
             "max_print_outputs_length": agent_dict.get("max_print_outputs_length"),
+            "code_block_tags": agent_dict.get("code_block_tags"),
         }
         # Filter out None values
         code_agent_kwargs = {k: v for k, v in code_agent_kwargs.items() if v is not None}

diff --git a/src/smolagents/prompts/code_agent.yaml b/src/smolagents/prompts/code_agent.yaml
@@ -1,10 +1,10 @@
 system_prompt: |-
   You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
   To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
-  To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', '<code>', and 'Observation:' sequences.
+  To solve the task, you must plan forward to proceed in a series of steps, in a cycle of Thought, Code, and Observation sequences.
 
   At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
-  Then in the '<code>' sequence, you should write the code in simple Python. The code sequence must end with '</code>' sequence.
+  Then in the Code sequence you should write the code in simple Python. The code sequence must be opened with '{{code_block_opening_tag}}', and closed with '{{code_block_closing_tag}}'.
   During each intermediate step, you can use 'print()' to save whatever important information you will then need.
   These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
   In the end you have to return a final answer using the `final_answer` tool.
@@ -14,26 +14,26 @@ system_prompt: |-
   Task: "Generate an image of the oldest person in this document."
 
   Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
-  <code>
+  {{code_block_opening_tag}}
   answer = document_qa(document=document, question="Who is the oldest person mentioned?")
   print(answer)
-  </code>
+  {{code_block_closing_tag}}
   Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
 
   Thought: I will now generate an image showcasing the oldest person.
-  <code>
+  {{code_block_opening_tag}}
   image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
   final_answer(image)
-  </code>
+  {{code_block_closing_tag}}
 
   ---
   Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
 
   Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
-  <code>
+  {{code_block_opening_tag}}
   result = 5 + 3 + 1294.678
   final_answer(result)
-  </code>
+  {{code_block_closing_tag}}
 
   ---
   Task:
@@ -42,31 +42,31 @@ system_prompt: |-
   {'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"
 
   Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
-  <code>
+  {{code_block_opening_tag}}
   translated_question = translator(question=question, src_lang="French", tgt_lang="English")
   print(f"The translated question is {translated_question}.")
   answer = image_qa(image=image, question=translated_question)
   final_answer(f"The answer is {answer}")
-  </code>
+  {{code_block_closing_tag}}
 
   ---
   Task:
   In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
   What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
 
   Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
-  <code>
+  {{code_block_opening_tag}}
   pages = web_search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
   print(pages)
-  </code>
+  {{code_block_closing_tag}}
   Observation:
   No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
 
   Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
-  <code>
+  {{code_block_opening_tag}}
   pages = web_search(query="1979 interview Stanislaus Ulam")
   print(pages)
-  </code>
+  {{code_block_closing_tag}}
   Observation:
   Found 6 pages:
   [Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
@@ -76,61 +76,61 @@ system_prompt: |-
   (truncated)
 
   Thought: I will read the first 2 pages to know more.
-  <code>
+  {{code_block_opening_tag}}
   for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
       whole_page = visit_webpage(url)
       print(whole_page)
       print("\n" + "="*80 + "\n")  # Print separator between pages
-  </code>
+  {{code_block_closing_tag}}
   Observation:
   Manhattan Project Locations:
   Los Alamos, NM
   Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
   (truncated)
 
   Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
-  <code>
+  {{code_block_opening_tag}}
   final_answer("diminished")
-  </code>
+  {{code_block_closing_tag}}
 
   ---
   Task: "Which city has the highest population: Guangzhou or Shanghai?"
 
   Thought: I need to get the populations for both cities and compare them: I will use the tool `web_search` to get the population of both cities.
-  <code>
+  {{code_block_opening_tag}}
   for city in ["Guangzhou", "Shanghai"]:
       print(f"Population {city}:", web_search(f"{city} population")
-  </code>
+  {{code_block_closing_tag}}
   Observation:
   Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
   Population Shanghai: '26 million (2019)'
 
   Thought: Now I know that Shanghai has the highest population.
-  <code>
+  {{code_block_opening_tag}}
   final_answer("Shanghai")
-  </code>
+  {{code_block_closing_tag}}
 
   ---
   Task: "What is the current age of the pope, raised to the power 0.36?"
 
   Thought: I will use the tool `wikipedia_search` to get the age of the pope, and confirm that with a web search.
-  <code>
+  {{code_block_opening_tag}}
   pope_age_wiki = wikipedia_search(query="current pope age")
   print("Pope age as per wikipedia:", pope_age_wiki)
   pope_age_search = web_search(query="current pope age")
   print("Pope age as per google search:", pope_age_search)
-  </code>
+  {{code_block_closing_tag}}
   Observation:
   Pope age: "The pope Francis is currently 88 years old."
 
   Thought: I know that the pope is 88 years old. Let's compute the result using python code.
-  <code>
+  {{code_block_opening_tag}}
   pope_current_age = 88 ** 0.36
   final_answer(pope_current_age)
-  </code>
+  {{code_block_closing_tag}}
 
   Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:
-  ```python
+  {{code_block_opening_tag}}
   {%- for tool in tools.values() %}
   def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
       """{{ tool.description }}
@@ -141,14 +141,14 @@ system_prompt: |-
       {%- endfor %}
       """
   {% endfor %}
-  ```
+  {{code_block_closing_tag}}
 
   {%- if managed_agents and managed_agents.values() | list %}
   You can also give tasks to team members.
   Calling a team member works similarly to calling a tool: provide the task description as the 'task' argument. Since this team member is a real human, be as detailed and verbose as necessary in your task description.
   You can also include any relevant variables or context using the 'additional_args' argument.
   Here is a list of the team members that you can call:
-  ```python
+  {{code_block_opening_tag}}
   {%- for agent in managed_agents.values() %}
   def {{ agent.name }}(task: str, additional_args: dict[str, Any]) -> str:
       """{{ agent.description }}
@@ -158,11 +158,11 @@ system_prompt: |-
           additional_args: Dictionary of extra inputs to pass to the managed agent, e.g. images, dataframes, or any other contextual data it may need.
       """
   {% endfor %}
-  ```
+  {{code_block_closing_tag}}
   {%- endif %}
 
   Here are the rules you should always follow to solve your task:
-  1. Always provide a 'Thought:' sequence, and a '<code>' sequence ending with '</code>', else you will fail.
+  1. Always provide a 'Thought:' sequence, and a '{{code_block_opening_tag}}' sequence ending with '{{code_block_closing_tag}}', else you will fail.
   2. Use only variables that you have defined!
   3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wikipedia_search({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wikipedia_search(query="What is the place where James Bond lives?")'.
   4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to wikipedia_search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.

diff --git a/src/smolagents/utils.py b/src/smolagents/utils.py
@@ -154,8 +154,8 @@ def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]:
     try:
         first_accolade_index = json_blob.find("{")
         last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1]
-        json_data = json_blob[first_accolade_index : last_accolade_index + 1]
-        json_data = json.loads(json_data, strict=False)
+        json_str = json_blob[first_accolade_index : last_accolade_index + 1]
+        json_data = json.loads(json_str, strict=False)
         return json_data, json_blob[:first_accolade_index]
     except IndexError:
         raise ValueError("The model output does not contain any JSON blob.")
@@ -172,16 +172,16 @@ def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]:
         )
 
 
-def extract_code_from_text(text: str) -> str | None:
+def extract_code_from_text(text: str, code_block_tags: tuple[str, str]) -> str | None:
     """Extract code from the LLM's output."""
-    pattern = r"<code>(.*?)</code>"
-    matches = re.findall(pattern, text, re.DOTALL)
+    initial_pattern = rf"{code_block_tags[0]}(.*?){code_block_tags[1]}"
+    matches = re.findall(initial_pattern, text, re.DOTALL)
     if matches:
         return "\n\n".join(match.strip() for match in matches)
     return None
 
 
-def parse_code_blobs(text: str) -> str:
+def parse_code_blobs(text: str, code_block_tags: tuple[str, str]) -> str:
     """Extract code blocs from the LLM's output.
 
     If a valid code block is passed, it returns it directly.
@@ -195,7 +195,9 @@ def parse_code_blobs(text: str) -> str:
     Raises:
         ValueError: If no valid code block is found in the text.
     """
-    matches = extract_code_from_text(text)
+    matches = extract_code_from_text(text, code_block_tags)
+    if not matches:  # Fallback to markdown pattern
+        matches = extract_code_from_text(text, ("```(?:python|py)", "\n```"))
     if matches:
         return matches
     # Maybe the LLM outputted a code blob directly
@@ -209,27 +211,27 @@ def parse_code_blobs(text: str) -> str:
         raise ValueError(
             dedent(
                 f"""
-                Your code snippet is invalid, because the regex pattern <code>(.*?)</code> was not found in it.
+                Your code snippet is invalid, because the regex pattern {code_block_tags[0]}(.*?){code_block_tags[1]} was not found in it.
                 Here is your code snippet:
                 {text}
                 It seems like you're trying to return the final answer, you can do it as follows:
-                <code>
+                {code_block_tags[0]}
                 final_answer("YOUR FINAL ANSWER HERE")
-                </code>
+                {code_block_tags[1]}
                 """
             ).strip()
         )
     raise ValueError(
         dedent(
             f"""
-            Your code snippet is invalid, because the regex pattern <code>(.*?)</code> was not found in it.
+            Your code snippet is invalid, because the regex pattern {code_block_tags[0]}(.*?){code_block_tags[1]} was not found in it.
             Here is your code snippet:
             {text}
             Make sure to include code with the correct pattern, for instance:
             Thoughts: Your thoughts
-            <code>
+            {code_block_tags[0]}
             # Your python code here
-            </code>
+            {code_block_tags[1]}
             """
         ).strip()
     )

diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -1674,11 +1674,11 @@ def test_end_code_appending(self):
 
         outputs = [s.model_output for s in actions_steps if s.model_output]
         assert outputs
-        assert all(o.endswith("<end_code>") for o in outputs)
+        assert all(o.endswith("</code>") for o in outputs)
 
         messages = [s.model_output_message for s in actions_steps if s.model_output_message]
         assert messages
-        assert all(m.content.endswith("<end_code>") for m in messages)
+        assert all(m.content.endswith("</code>") for m in messages)
 
     def test_change_tools_after_init(self):
         from smolagents import tool