Skip to content

Commit 0e36bc6

Browse files
authored
Adding streaming responses (#3)
* adding streaming responses * Bump version to 0.5.0
1 parent 2e6f27b commit 0e36bc6

File tree

8 files changed

+142
-45
lines changed

8 files changed

+142
-45
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ This implementation was adapted from the [Model Context Protocol quickstart guid
2222
- 🌐 **Multi-Server Support**: Connect to multiple MCP servers simultaneously
2323
- 🚀 **Multiple Transport Types**: Supports STDIO, SSE, and Streamable HTTP server connections
2424
- 🎨 **Rich Terminal Interface**: Interactive console UI
25+
- 🖥️ **Streaming Responses**: View model outputs in real-time as they're generated
2526
- 🛠️ **Tool Management**: Enable/disable specific tools or entire servers during chat sessions
2627
- 🧠 **Context Management**: Control conversation memory with configurable retention settings
2728
- 🔄 **Cross-Language Support**: Seamlessly work with both Python and JavaScript MCP servers
@@ -57,7 +58,7 @@ git clone https://github.com/jonigl/mcp-client-for-ollama.git
5758
cd mcp-client-for-ollama
5859
uv venv && source .venv/bin/activate
5960
uv pip install .
60-
uv run -m mcp_client_for_ollama.client
61+
uv run -m mcp_client_for_ollama
6162
```
6263

6364
## Usage

cli-package/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "ollmcp"
3-
version = "0.4.0"
3+
version = "0.5.0"
44
description = "CLI for MCP Client for Ollama - An easy-to-use command for interacting with Ollama through MCP"
55
readme = "README.md"
66
requires-python = ">=3.10"
@@ -9,7 +9,7 @@ authors = [
99
{name = "Jonathan Löwenstern"}
1010
]
1111
dependencies = [
12-
"mcp-client-for-ollama==0.4.0"
12+
"mcp-client-for-ollama==0.5.0"
1313
]
1414

1515
[project.scripts]

mcp_client_for_ollama/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""MCP Client for Ollama package."""
22

3-
__version__ = "0.4.0"
3+
__version__ = "0.5.0"

mcp_client_for_ollama/__main__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
Main entry point for the MCP Client for Ollama when run as a module.
3+
4+
This allows you to run the client using:
5+
python -m mcp_client_for_ollama
6+
7+
It simply imports and runs the main function from cli.py.
8+
"""
9+
10+
import asyncio
11+
from .cli import main
12+
13+
if __name__ == "__main__":
14+
asyncio.run(main())

mcp_client_for_ollama/client.py

Lines changed: 30 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .server.connector import ServerConnector
1919
from .models.manager import ModelManager
2020
from .tools.manager import ToolManager
21+
from .utils.streaming import StreamingManager
2122

2223
class MCPClient:
2324
def __init__(self, model: str = DEFAULT_MODEL):
@@ -32,6 +33,8 @@ def __init__(self, model: str = DEFAULT_MODEL):
3233
self.model_manager = ModelManager(console=self.console, default_model=model)
3334
# Initialize the tool manager with server connector reference
3435
self.tool_manager = ToolManager(console=self.console, server_connector=self.server_connector)
36+
# Initialize the streaming manager
37+
self.streaming_manager = StreamingManager(console=self.console)
3538
# Store server and tool data
3639
self.sessions = {} # Dict to store multiple sessions
3740
# UI components
@@ -177,24 +180,20 @@ async def process_query(self, query: str) -> str:
177180

178181
# Get current model from the model manager
179182
model = self.model_manager.get_current_model()
180-
181-
# Initial Ollama API call
182-
with self.console.status("[cyan]Thinking...[/cyan]"):
183-
response: ChatResponse = await self.ollama.chat(
184-
model=model,
185-
messages=messages,
186-
tools=available_tools,
187-
options={"num_predict": 1000}
188-
)
189-
190-
# Process response and handle tool calls
191-
final_text = []
192-
193-
if hasattr(response.message, 'content') and response.message.content:
194-
final_text.append(response.message.content)
195-
196-
elif response.message.tool_calls:
197-
for tool in response.message.tool_calls:
183+
# Initial Ollama API call with the query and available tools
184+
stream = await self.ollama.chat(
185+
model=model,
186+
messages=messages,
187+
stream=True,
188+
tools=available_tools
189+
)
190+
# Process the streaming response
191+
response_text = ""
192+
tool_calls = []
193+
response_text, tool_calls = await self.streaming_manager.process_streaming_response(stream)
194+
# Check if there are any tool calls in the response
195+
if len(tool_calls) > 0:
196+
for tool in tool_calls:
198197
tool_name = tool.function.name
199198
tool_args = tool.function.arguments
200199

@@ -209,33 +208,29 @@ async def process_query(self, query: str) -> str:
209208
self.console.print(Panel(f"[bold]Calling tool[/bold]: [blue]{tool_name}[/blue]",
210209
subtitle=f"[dim]{tool_args}[/dim]",
211210
expand=True))
212-
self.console.print()
213211

214212
with self.console.status(f"[cyan]Running {tool_name}...[/cyan]"):
215213
result = await self.sessions[server_name]["session"].call_tool(actual_tool_name, tool_args)
216214

217-
self.console.print()
218-
219215
messages.append({
220216
"role": "tool",
221217
"content": result.content[0].text,
222218
"name": tool_name
223219
})
224220

225-
# Get next response from Ollama with the tool results
226-
with self.console.status("[cyan]Processing results...[/cyan]"):
227-
response = await self.ollama.chat(
228-
model=model,
229-
messages=messages,
230-
tools=available_tools,
231-
)
221+
# Get stream response from Ollama with the tool results
222+
stream = await self.ollama.chat(
223+
model=model,
224+
messages=messages,
225+
stream=True,
226+
)
227+
# Process the streaming response
228+
response_text, _ = await self.streaming_manager.process_streaming_response(stream)
232229

233-
self.console.print()
234-
final_text.append(response.message.content)
230+
if not response_text:
231+
self.console.print("[red]No response received.[/red]")
232+
response_text = ""
235233

236-
# Create the final response text
237-
response_text = "\n".join(final_text)
238-
239234
# Append query and response to chat history
240235
self.chat_history.append({"query": query, "response": response_text})
241236

@@ -279,7 +274,7 @@ async def display_check_for_updates(self):
279274

280275
async def chat_loop(self):
281276
"""Run an interactive chat loop"""
282-
self.clear_console()
277+
self.clear_console()
283278
self.console.print(Panel(Text.from_markup("[bold green]Welcome to the MCP Client for Ollama[/bold green]", justify="center"), expand=True, border_style="green"))
284279
self.display_available_tools()
285280
self.display_current_model()
@@ -357,11 +352,7 @@ async def chat_loop(self):
357352
continue
358353

359354
try:
360-
response = await self.process_query(query)
361-
if response:
362-
self.console.print(Markdown(response))
363-
else:
364-
self.console.print("[red]No response received.[/red]")
355+
await self.process_query(query)
365356
except ollama.ResponseError as e:
366357
# Extract error message without the traceback
367358
error_msg = str(e)
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""
2+
This file implements streaming functionality for the MCP client for Ollama.
3+
4+
Classes:
5+
StreamingManager: Handles streaming responses from Ollama.
6+
"""
7+
from rich.markdown import Markdown
8+
from rich.live import Live
9+
from rich.spinner import Spinner
10+
from rich.table import Table
11+
from rich.text import Text
12+
13+
class StreamingManager:
14+
"""Manages streaming responses for Ollama API calls"""
15+
16+
def __init__(self, console):
17+
"""Initialize the streaming manager
18+
19+
Args:
20+
console: Rich console for output
21+
"""
22+
self.console = console
23+
24+
def get_table(self, header=True):
25+
"""Create a table for displaying streaming responses
26+
27+
Returns:
28+
Table: Rich table object
29+
"""
30+
# Create a table with spinner in first row and content in second
31+
table = Table.grid(expand=True)
32+
spinner = Spinner("dots")
33+
spinner.style = "cyan" # Make the spinner cyan
34+
thinking_text = Text("Thinking...", style="cyan")
35+
if not header:
36+
spinner = ""
37+
thinking_text = ""
38+
# Create inner grid for spinner and text with minimal padding
39+
header = Table.grid(padding=(0, 1))
40+
header.add_row(spinner, thinking_text)
41+
# Add header and content to main table
42+
table.add_row(header)
43+
return table
44+
45+
46+
async def process_streaming_response(self, stream, print_response=True):
47+
"""Process a streaming response from Ollama with status spinner and content updates
48+
49+
Args:
50+
stream: Async iterator of response chunks
51+
print_response: Flag to control live updating of response text
52+
53+
Returns:
54+
str: Accumulated response text
55+
list: Tool calls if any
56+
"""
57+
58+
accumulated_text = ""
59+
tool_calls = []
60+
61+
# Process the streaming response chunks with live updating markdown
62+
if print_response:
63+
with Live(console=self.console, refresh_per_second=10) as live:
64+
table = self.get_table()
65+
live.update(table)
66+
async for chunk in stream:
67+
if hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
68+
content = chunk.message.content
69+
accumulated_text += content
70+
table = self.get_table(header=not chunk.done)
71+
if len(accumulated_text) > 0:
72+
table.add_row(Markdown(accumulated_text))
73+
live.update(table)
74+
if hasattr(chunk, 'message') and hasattr(chunk.message, 'tool_calls') and chunk.message.tool_calls:
75+
# return messages with tool calls
76+
for tool in chunk.message.tool_calls:
77+
tool_calls.append(tool)
78+
if len(accumulated_text) > 0:
79+
self.console.print()
80+
else:
81+
async for chunk in stream:
82+
if hasattr(chunk, 'message') and hasattr(chunk.message, 'content') and chunk.message.content:
83+
content = chunk.message.content
84+
if content:
85+
accumulated_text += content
86+
elif hasattr(chunk, 'message') and hasattr(chunk.message, 'tool_calls') and chunk.message.tool_calls:
87+
for tool in chunk.message.tool_calls:
88+
tool_calls.append(tool)
89+
90+
# Return the accumulated text and tool calls
91+
return accumulated_text, tool_calls

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "mcp-client-for-ollama"
3-
version = "0.4.0"
3+
version = "0.5.0"
44
description = "MCP Client for Ollama - A client for connecting to Model Context Protocol servers using Ollama"
55
readme = "README.md"
66
requires-python = ">=3.10"

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)