Adding streaming responses (#3)

jonigl · web-flow · commit 0e36bc698423 · 2025-05-19T23:27:18.000+02:00
* adding streaming responses

* Bump version to 0.5.0
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ This implementation was adapted from the [Model Context Protocol quickstart guid
 - 🌐 **Multi-Server Support**: Connect to multiple MCP servers simultaneously
 - 🚀 **Multiple Transport Types**: Supports STDIO, SSE, and Streamable HTTP server connections
 - 🎨 **Rich Terminal Interface**: Interactive console UI
+- 🖥️ **Streaming Responses**: View model outputs in real-time as they're generated
 - 🛠️ **Tool Management**: Enable/disable specific tools or entire servers during chat sessions
 - 🧠 **Context Management**: Control conversation memory with configurable retention settings
 - 🔄 **Cross-Language Support**: Seamlessly work with both Python and JavaScript MCP servers
@@ -57,7 +58,7 @@ git clone https://github.com/jonigl/mcp-client-for-ollama.git
 cd mcp-client-for-ollama
 uv venv && source .venv/bin/activate
 uv pip install .
-uv run -m mcp_client_for_ollama.client
+uv run -m mcp_client_for_ollama
 ```
 
 ## Usage
diff --git a/cli-package/pyproject.toml b/cli-package/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ollmcp"
-version = "0.4.0"
+version = "0.5.0"
 description = "CLI for MCP Client for Ollama - An easy-to-use command for interacting with Ollama through MCP"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -9,7 +9,7 @@ authors = [
     {name = "Jonathan Löwenstern"}
 ]
 dependencies = [
-    "mcp-client-for-ollama==0.4.0"
+    "mcp-client-for-ollama==0.5.0"
 ]
 
 [project.scripts]
diff --git a/mcp_client_for_ollama/__init__.py b/mcp_client_for_ollama/__init__.py
@@ -1,3 +1,3 @@
 """MCP Client for Ollama package."""
 
-__version__ = "0.4.0"
+__version__ = "0.5.0"
diff --git a/mcp_client_for_ollama/__main__.py b/mcp_client_for_ollama/__main__.py
@@ -0,0 +1,14 @@
+"""
+Main entry point for the MCP Client for Ollama when run as a module.
+
+This allows you to run the client using:
+    python -m mcp_client_for_ollama
+
+It simply imports and runs the main function from cli.py.
+"""
+
+import asyncio
+from .cli import main
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/mcp_client_for_ollama/client.py b/mcp_client_for_ollama/client.py
@@ -18,6 +18,7 @@
 from .server.connector import ServerConnector
 from .models.manager import ModelManager
 from .tools.manager import ToolManager
+from .utils.streaming import StreamingManager
 
 class MCPClient:
     def __init__(self, model: str = DEFAULT_MODEL):
@@ -32,6 +33,8 @@ def __init__(self, model: str = DEFAULT_MODEL):
         self.model_manager = ModelManager(console=self.console, default_model=model)
         # Initialize the tool manager with server connector reference
         self.tool_manager = ToolManager(console=self.console, server_connector=self.server_connector)
+        # Initialize the streaming manager
+        self.streaming_manager = StreamingManager(console=self.console)
         # Store server and tool data
         self.sessions = {}  # Dict to store multiple sessions
         # UI components
@@ -177,24 +180,20 @@ async def process_query(self, query: str) -> str:
 
         # Get current model from the model manager
         model = self.model_manager.get_current_model()
-
-        # Initial Ollama API call
-        with self.console.status("[cyan]Thinking...[/cyan]"):
-            response: ChatResponse = await self.ollama.chat(
-                model=model,
-                messages=messages,
-                tools=available_tools,
-                options={"num_predict": 1000}
-            )
-
-        # Process response and handle tool calls
-        final_text = []
-        
-        if hasattr(response.message, 'content') and response.message.content:
-            final_text.append(response.message.content)            
-
-        elif response.message.tool_calls:
-            for tool in response.message.tool_calls:
+        # Initial Ollama API call with the query and available tools
+        stream = await self.ollama.chat(
+            model=model,
+            messages=messages,
+            stream=True,
+            tools=available_tools            
+        )
+        # Process the streaming response
+        response_text = ""
+        tool_calls = []        
+        response_text, tool_calls = await self.streaming_manager.process_streaming_response(stream)
+        # Check if there are any tool calls in the response
+        if len(tool_calls) > 0:
+            for tool in tool_calls:
                 tool_name = tool.function.name
                 tool_args = tool.function.arguments
 
@@ -209,33 +208,29 @@ async def process_query(self, query: str) -> str:
                 self.console.print(Panel(f"[bold]Calling tool[/bold]: [blue]{tool_name}[/blue]", 
                                        subtitle=f"[dim]{tool_args}[/dim]", 
                                        expand=True))
-                self.console.print()
                 
                 with self.console.status(f"[cyan]Running {tool_name}...[/cyan]"):
                     result = await self.sessions[server_name]["session"].call_tool(actual_tool_name, tool_args)
                 
-                self.console.print()
-                
                 messages.append({
                     "role": "tool",
                     "content": result.content[0].text,
                     "name": tool_name
                 })            
 
-                # Get next response from Ollama with the tool results
-                with self.console.status("[cyan]Processing results...[/cyan]"):
-                    response = await self.ollama.chat(
-                        model=model,
-                        messages=messages,
-                        tools=available_tools,
-                    )
+                # Get stream response from Ollama with the tool results                                
+                stream = await self.ollama.chat(
+                    model=model,
+                    messages=messages,
+                    stream=True,
+                )
+                # Process the streaming response
+                response_text, _ = await self.streaming_manager.process_streaming_response(stream)
 
-                self.console.print() 
-                final_text.append(response.message.content)
+        if not response_text:
+            self.console.print("[red]No response received.[/red]")
+            response_text = ""
 
-        # Create the final response text
-        response_text = "\n".join(final_text)
-        
         # Append query and response to chat history
         self.chat_history.append({"query": query, "response": response_text})
         
@@ -279,7 +274,7 @@ async def display_check_for_updates(self):
 
     async def chat_loop(self):
         """Run an interactive chat loop"""
-        self.clear_console()        
+        self.clear_console()
         self.console.print(Panel(Text.from_markup("[bold green]Welcome to the MCP Client for Ollama[/bold green]", justify="center"), expand=True, border_style="green"))
         self.display_available_tools()
         self.display_current_model()
@@ -357,11 +352,7 @@ async def chat_loop(self):
                     continue
 
                 try:
-                    response = await self.process_query(query)
-                    if response:
-                        self.console.print(Markdown(response))                                                
-                    else:
-                        self.console.print("[red]No response received.[/red]")
+                    await self.process_query(query)
                 except ollama.ResponseError as e:
                     # Extract error message without the traceback
                     error_msg = str(e)
diff --git a/mcp_client_for_ollama/utils/streaming.py b/mcp_client_for_ollama/utils/streaming.py
@@ -0,0 +1,91 @@
+"""
+This file implements streaming functionality for the MCP client for Ollama.
+
+Classes:
+    StreamingManager: Handles streaming responses from Ollama.
+"""
+from rich.markdown import Markdown
+from rich.live import Live
+from rich.spinner import Spinner
+from rich.table import Table
+from rich.text import Text
+
+class StreamingManager:
+    """Manages streaming responses for Ollama API calls"""
+    
+    def __init__(self, console):
+        """Initialize the streaming manager
+        
+        Args:
+            console: Rich console for output
+        """
+        self.console = console
+
+    def get_table(self, header=True):
+        """Create a table for displaying streaming responses
+        
+        Returns:
+            Table: Rich table object
+        """
+         # Create a table with spinner in first row and content in second
+        table = Table.grid(expand=True)
+        spinner = Spinner("dots")
+        spinner.style = "cyan"  # Make the spinner cyan
+        thinking_text = Text("Thinking...", style="cyan")
+        if not header:
+            spinner = ""
+            thinking_text = ""
+        # Create inner grid for spinner and text with minimal padding
+        header = Table.grid(padding=(0, 1))
+        header.add_row(spinner, thinking_text)
+        # Add header and content to main table                        
+        table.add_row(header)
+        return table
+
+
+    async def process_streaming_response(self, stream, print_response=True):
+        """Process a streaming response from Ollama with status spinner and content updates
+        
+        Args:
+            stream: Async iterator of response chunks
+            print_response: Flag to control live updating of response text
+                
+        Returns:
+            str: Accumulated response text
+            list: Tool calls if any
+        """
+            
+        accumulated_text = ""
+        tool_calls = []
+        
+        # Process the streaming response chunks with live updating markdown
+        if print_response:            
+            with Live(console=self.console, refresh_per_second=10) as live:
+                    table = self.get_table()
+                    live.update(table)
+                    async for chunk in stream:
+                        if hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
+                            content = chunk.message.content
+                            accumulated_text += content
+                            table = self.get_table(header=not chunk.done)
+                            if len(accumulated_text) > 0:
+                                table.add_row(Markdown(accumulated_text))
+                            live.update(table)
+                        if hasattr(chunk, 'message') and hasattr(chunk.message, 'tool_calls') and chunk.message.tool_calls:
+                            # return messages with tool calls
+                            for tool in chunk.message.tool_calls:
+                                tool_calls.append(tool)
+            if len(accumulated_text) > 0:
+                self.console.print()
+        else:
+            async for chunk in stream:
+                if hasattr(chunk, 'message') and hasattr(chunk.message, 'content') and chunk.message.content:
+                    content = chunk.message.content
+                    if content:
+                        accumulated_text += content
+                    elif hasattr(chunk, 'message') and hasattr(chunk.message, 'tool_calls') and chunk.message.tool_calls:                        
+                        for tool in chunk.message.tool_calls:
+                            tool_calls.append(tool)
+                                        
+        # Return the accumulated text and tool calls
+        return accumulated_text, tool_calls
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-client-for-ollama"
-version = "0.4.0"
+version = "0.5.0"
 description = "MCP Client for Ollama - A client for connecting to Model Context Protocol servers using Ollama"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""MCP Client for Ollama package."""`
`2`	`2`
`3`		`-__version__ = "0.4.0"`
	`3`	`+__version__ = "0.5.0"`