feat: add response_format param to OllamaChatGenerator (#1326)

Amnah199 · web-flow · commit f08c26400457 · 2025-01-29T15:27:09.000+01:00
* Add response_ format param to Ollama integration
* Add related tests
diff --git a/integrations/ollama/pyproject.toml b/integrations/ollama/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai", "ollama>=0.4.0"]
+dependencies = ["haystack-ai", "ollama>=0.4.0", "pydantic"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme"
@@ -165,5 +165,5 @@ markers = [
 addopts = ["--import-mode=importlib"]
 
 [[tool.mypy.overrides]]
-module = ["haystack.*", "haystack_integrations.*", "pytest.*", "ollama.*"]
+module = ["haystack.*", "haystack_integrations.*", "pytest.*", "ollama.*", "pydantic.*"]
 ignore_missing_imports = true
diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py
@@ -1,9 +1,10 @@
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Union
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall
 from haystack.tools import Tool, _check_duplicate_tool_names, deserialize_tools_inplace
 from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
+from pydantic.json_schema import JsonSchemaValue
 
 from ollama import ChatResponse, Client
 
@@ -97,6 +98,7 @@ def __init__(
         keep_alive: Optional[Union[float, str]] = None,
         streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
         tools: Optional[List[Tool]] = None,
+        response_format: Optional[Union[None, Literal["json"], JsonSchemaValue]] = None,
     ):
         """
         :param model:
@@ -124,6 +126,11 @@ def __init__(
             A list of tools for which the model can prepare calls.
             Not all models support tools. For a list of models compatible with tools, see the
             [models page](https://ollama.com/search?c=tools).
+        :param response_format:
+            The format for structured model outputs. The value can be:
+            - None: No specific structure or format is applied to the response. The response is returned as-is.
+            - "json": The response is formatted as a JSON object.
+            - JSON Schema: The response is formatted as a JSON object that adheres to the specified JSON Schema.
         """
 
         _check_duplicate_tool_names(tools)
@@ -135,7 +142,7 @@ def __init__(
         self.keep_alive = keep_alive
         self.streaming_callback = streaming_callback
         self.tools = tools
-
+        self.response_format = response_format
         self._client = Client(host=self.url, timeout=self.timeout)
 
     def to_dict(self) -> Dict[str, Any]:
@@ -156,6 +163,7 @@ def to_dict(self) -> Dict[str, Any]:
             timeout=self.timeout,
             streaming_callback=callback_name,
             tools=serialized_tools,
+            response_format=self.response_format,
         )
 
     @classmethod
@@ -237,6 +245,14 @@ def run(
             msg = "Ollama does not support tools and streaming at the same time. Please choose one."
             raise ValueError(msg)
 
+        if self.response_format and tools:
+            msg = "Ollama does not support tools and response_format at the same time. Please choose one."
+            raise ValueError(msg)
+
+        if self.response_format and stream:
+            msg = "Ollama does not support streaming and response_format at the same time. Please choose one."
+            raise ValueError(msg)
+
         ollama_tools = [{"type": "function", "function": {**t.tool_spec}} for t in tools] if tools else None
 
         ollama_messages = [_convert_chatmessage_to_ollama_format(msg) for msg in messages]
@@ -247,6 +263,7 @@ def run(
             stream=stream,
             keep_alive=self.keep_alive,
             options=generation_kwargs,
+            format=self.response_format,
         )
 
         if stream:
diff --git a/integrations/ollama/tests/test_chat_generator.py b/integrations/ollama/tests/test_chat_generator.py
@@ -165,6 +165,7 @@ def test_init_default(self):
         assert component.streaming_callback is None
         assert component.tools is None
         assert component.keep_alive is None
+        assert component.response_format is None
 
     def test_init(self, tools):
         component = OllamaChatGenerator(
@@ -175,6 +176,7 @@ def test_init(self, tools):
             keep_alive="10m",
             streaming_callback=print_streaming_chunk,
             tools=tools,
+            response_format={"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}},
         )
 
         assert component.model == "llama2"
@@ -184,6 +186,10 @@ def test_init(self, tools):
         assert component.keep_alive == "10m"
         assert component.streaming_callback is print_streaming_chunk
         assert component.tools == tools
+        assert component.response_format == {
+            "type": "object",
+            "properties": {"name": {"type": "string"}, "age": {"type": "number"}},
+        }
 
     def test_init_fail_with_duplicate_tool_names(self, tools):
 
@@ -206,6 +212,7 @@ def test_to_dict(self):
             generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
             tools=[tool],
             keep_alive="5m",
+            response_format={"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}},
         )
         data = component.to_dict()
         assert data == {
@@ -235,6 +242,10 @@ def test_to_dict(self):
                         },
                     },
                 ],
+                "response_format": {
+                    "type": "object",
+                    "properties": {"name": {"type": "string"}, "age": {"type": "number"}},
+                },
             },
         }
 
@@ -273,6 +284,10 @@ def test_from_dict(self):
                         },
                     },
                 ],
+                "response_format": {
+                    "type": "object",
+                    "properties": {"name": {"type": "string"}, "age": {"type": "number"}},
+                },
             },
         }
         component = OllamaChatGenerator.from_dict(data)
@@ -286,6 +301,10 @@ def test_from_dict(self):
         }
         assert component.timeout == 120
         assert component.tools == [tool]
+        assert component.response_format == {
+            "type": "object",
+            "properties": {"name": {"type": "string"}, "age": {"type": "number"}},
+        }
 
     @patch("haystack_integrations.components.generators.ollama.chat.chat_generator.Client")
     def test_run(self, mock_client):
@@ -319,6 +338,7 @@ def test_run(self, mock_client):
             tools=None,
             options={},
             keep_alive=None,
+            format=None,
         )
 
         assert "replies" in result
@@ -456,3 +476,54 @@ def test_run_with_tools(self, tools):
         assert isinstance(tool_call, ToolCall)
         assert tool_call.tool_name == "weather"
         assert tool_call.arguments == {"city": "Paris"}
+
+    @pytest.mark.integration
+    def test_run_with_response_format(self):
+        response_format = {
+            "type": "object",
+            "properties": {"capital": {"type": "string"}, "population": {"type": "number"}},
+        }
+        chat_generator = OllamaChatGenerator(model="llama3.2:3b", response_format=response_format)
+
+        message = ChatMessage.from_user("What's the capital of France and its population?")
+        response = chat_generator.run([message])
+
+        assert isinstance(response, dict)
+        assert isinstance(response["replies"], list)
+
+        # Parse the response text as JSON and verify its structure
+        response_data = json.loads(response["replies"][0].text)
+        assert isinstance(response_data, dict)
+        assert "capital" in response_data
+        assert isinstance(response_data["capital"], str)
+        assert "population" in response_data
+        assert isinstance(response_data["population"], (int, float))
+        assert response_data["capital"] == "Paris"
+
+    def test_run_with_streaming_and_format(self):
+        response_format = {
+            "type": "object",
+            "properties": {"answer": {"type": "string"}},
+        }
+        streaming_callback = Mock()
+        chat_generator = OllamaChatGenerator(
+            model="llama3.2:3b", streaming_callback=streaming_callback, response_format=response_format
+        )
+
+        chat_messages = [
+            ChatMessage.from_user("What is the largest city in the United Kingdom by population?"),
+            ChatMessage.from_assistant("London is the largest city in the United Kingdom by population"),
+            ChatMessage.from_user("And what is the second largest?"),
+        ]
+        with pytest.raises(ValueError):
+            chat_generator.run([chat_messages])
+
+    def test_run_with_tools_and_format(self, tools):
+        response_format = {
+            "type": "object",
+            "properties": {"capital": {"type": "string"}, "population": {"type": "number"}},
+        }
+        chat_generator = OllamaChatGenerator(model="llama3.2:3b", tools=tools, response_format=response_format)
+        message = ChatMessage.from_user("What's the weather in Paris?")
+        with pytest.raises(ValueError):
+            chat_generator.run([message])